Resource icon

Deutsche Umlaute in cpp und Linux mit tolower-Eigenbau ersetzen

Programmiersprache(n)
c++
Betriebssystem(e)
Linux
Hallo Leute!

Wer von euch ist in c++ schon über die deutschen Umlaute bei dem Versuch gestolpert,
Großbuchstaben in Kleinbuchstaben umzuwandeln? Nach einiger Bastelei hier ein
kleines Programmbeispiel das ich in Zukunft versuchen werde, zu erweitern.

Eigentlich könnte man so vorgehen:
Code:
#include <iostream>
#include <algorithm>

std::string str_tolower(std::string s) {
    std::transform(s.begin(), s.end(), s.begin(),
                // static_cast<int(*)(int)>(std::tolower)           // wrong
                // [](int c){ return std::tolower(c); }                   // wrong
                // [](char c){ return std::tolower(c); }                // wrong
                   [](unsigned char c){ return std::tolower(c); } // correct
                  );
    return s;
}


int main()
{
    std::string s("Teststring ÖÄÜ test");
    std::transform(s.begin(), s.end(), s.begin(), (int (*)(int))std::toupper);
    std::cout << s;

return 0;
}
Bei dem Beispiel, welches ich im Web gefunden habe, wird man aber merken, das dies die deutschen Umlaute kalt lässt

Code:
/** Testet eigenen tolower-Ersatz: er schluckt die deutschen Umlaute und die Sonderzeichen */
#include <iostream>
#include <string>


std::string de_tolower(std::string upsatz)
{
std::string losatz = "";
std::string gxe[29] = {"Ö", "Ä", "Ü", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "k", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "w", "X", "Y", "Z"};
std::string kxe[29] = {"ö", "ä", "ü", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"};

int slei = 0, slen = 0,  p = 1;

for (slei = 0; slei < 29; slei++)
{
p = upsatz.find(gxe[slei]);
  while(p != -1)
   {
    //std::cout << "p: "   << p << " *   ";
    //std::cout << upsatz << "  gxe: " << gxe[slei] << "   p: " << p << "     n: ";
     slen = gxe[slei].length();
     upsatz.erase(p,slen);
     upsatz.insert(p, kxe[slei]);
     //std::cout << upsatz << std::endl;
     p = upsatz.find(gxe[slei]);
    }
}

return upsatz;
}

std::string de_toupper (std::string upsatz)
{
std::string losatz = "";
std::string kxe[29] = {"Ö", "Ä", "Ü", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "k", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "w", "X", "Y", "Z"};
std::string gxe[29] = {"ö", "ä", "ü", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"};

int slei = 0, slen = 0,  p = 1;

for (slei = 0; slei < 29; slei++)
{
p = upsatz.find(gxe[slei]);
  while(p != -1)
   {
    //std::cout << "p: "   << p << " *   ";
    //std::cout << upsatz << "  gxe: " << gxe[slei] << "   p: " << p << "     n: ";
     slen = gxe[slei].length();
     upsatz.erase(p,slen);
     upsatz.insert(p, kxe[slei]);
     //std::cout << upsatz << std::endl;
     p = upsatz.find(gxe[slei]);
    }
}

return upsatz;
}

int main()
{
std::string stria = "Teststring ÖÄÜß²³µ€@~ test";
std::string strib = "";
std::string stric = "";

std::cout << "vorher........: " << stria << std::endl;

strib = de_tolower(stria);

std::cout << "nachher.......: " << strib << std::endl;

stric = de_toupper(strib);

std::cout << "wieder nachher: "<< stric << std::endl;

return EXIT_SUCCESS;
}

Weitere Infos auf:
https://de.wikipedia.org/wiki/UTF-8
https://mothereff.in/utf-8
https://www.utf8-zeichentabelle.de/unicode-utf8-table.pl
https://www.w3schools.com/charsets/ref_html_utf8.asp
https://de.wikipedia.org/wiki/Extended_Binary_Coded_Decimal_Interchange_Code

Eine Anmerkung wegen der zuletzt genannten Webside:
folgenden code habe ich auf einer CD gefunden (C-Box für DOS). Zwar uralt, aber
vielleicht erspart es jemanden einiges an Tipperei:
Code:
int ascii2ebcdic[256] = {
          0,  1,  2,  3, 55, 45, 46, 47, 22,  5, 37, 11, 12, 13, 14, 15,
         16, 17, 18, 19, 60, 61, 50, 38, 24, 25, 63, 39, 28, 29, 30, 31,
         64, 79,127,123, 91,108, 80,125, 77, 93, 92, 78,107, 96, 75, 97,
        240,241,242,243,244,245,246,247,248,249,122, 94, 76,126,110,111,
        124,193,194,195,196,197,198,199,200,201,209,210,211,212,213,214,
        215,216,217,226,227,228,229,230,231,232,233, 74,224, 90, 95,109,
        121,129,130,131,132,133,134,135,136,137,145,146,147,148,149,150,
        151,152,153,162,163,164,165,166,167,168,169,192,106,208,161,  7,
         32, 33, 34, 35, 36, 21,  6, 23, 40, 41, 42, 43, 44,  9, 10, 27,
         48, 49, 26, 51, 52, 53, 54,  8, 56, 57, 58, 59,  4, 20, 62,225,
         65, 66, 67, 68, 69, 70, 71, 72, 73, 81, 82, 83, 84, 85, 86, 87,
         88, 89, 98, 99,100,101,102,103,104,105,112,113,114,115,116,117,
        118,119,120,128,138,139,140,141,142,143,144,154,155,156,157,158,
        159,160,170,171,172,173,174,175,176,177,178,179,180,181,182,183,
        184,185,186,187,188,189,190,191,202,203,204,205,206,207,218,219,
        220,221,222,223,234,235,236,237,238,239,250,251,252,253,254,255
};

int ebcdic2ascii[256] = {
          0,  1,  2,  3,156,  9,134,127,151,141,142, 11, 12, 13, 14, 15,
         16, 17, 18, 19,157,133,  8,135, 24, 25,146,143, 28, 29, 30, 31,
        128,129,130,131,132, 10, 23, 27,136,137,138,139,140,  5,  6,  7,
        144,145, 22,147,148,149,150,  4,152,153,154,155, 20, 21,158, 26,
         32,160,161,162,163,164,165,166,167,168, 91, 46, 60, 40, 43, 33,
         38,169,170,171,172,173,174,175,176,177, 93, 36, 42, 41, 59, 94,
         45, 47,178,179,180,181,182,183,184,185,124, 44, 37, 95, 62, 63,
        186,187,188,189,190,191,192,193,194, 96, 58, 35, 64, 39, 61, 34,
        195, 97, 98, 99,100,101,102,103,104,105,196,197,198,199,200,201,
        202,106,107,108,109,110,111,112,113,114,203,204,205,206,207,208,
        209,126,115,116,117,118,119,120,121,122,210,211,212,213,214,215,
        216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,
        123, 65, 66, 67, 68, 69, 70, 71, 72, 73,232,233,234,235,236,237,
        125, 74, 75, 76, 77, 78, 79, 80, 81, 82,238,239,240,241,242,243,
         92,159, 83, 84, 85, 86, 87, 88, 89, 90,244,245,246,247,248,249,
         48, 49, 50, 51, 52, 53, 54, 55, 56, 57,250,251,252,253,254,255
};


Gecodet wurde auf SUSE Linux tumbleweed mit Kernel 4.16.12.2
und codeblocks build 2017-07-27 mit gcc 7-2.9

Viel Spaß
rustyoldguy
Autor
rustyoldguy
First release
Last update
Bewertung
0,00 Stern(e) 0 Bewertungen
Oben