diff options
author | Alexis211 <alexis211@gmail.com> | 2009-10-20 19:23:33 +0200 |
---|---|---|
committer | Alexis211 <alexis211@gmail.com> | 2009-10-20 19:23:33 +0200 |
commit | 768ada13917aeda373e6ff5fee21faf90c963746 (patch) | |
tree | 9e26d7d65e1693d1a7f9fd93c9fd33b41d175464 /Source/Library/Common/WChar.class.cpp | |
parent | 6ec4b3d31080f90393e72989d559cfb76eff6f9d (diff) | |
parent | 9836acd720988af30250c2c1ec18d618664dea4e (diff) | |
download | Melon-768ada13917aeda373e6ff5fee21faf90c963746.tar.gz Melon-768ada13917aeda373e6ff5fee21faf90c963746.zip |
Merge branch 'usermode_syscalls'
Conflicts:
Source/Kernel/Makefile
Diffstat (limited to 'Source/Library/Common/WChar.class.cpp')
-rw-r--r-- | Source/Library/Common/WChar.class.cpp | 155 |
1 files changed, 155 insertions, 0 deletions
diff --git a/Source/Library/Common/WChar.class.cpp b/Source/Library/Common/WChar.class.cpp new file mode 100644 index 0000000..5485bb8 --- /dev/null +++ b/Source/Library/Common/WChar.class.cpp @@ -0,0 +1,155 @@ +#include "WChar.class.h" + +#ifdef THIS_IS_MELON_KERNEL +using namespace CMem; +#endif + +#ifdef THIS_IS_MELON_USERLAND +using namespace CMem; +#endif + +WChar WChar::CP437[] = { //These are the UTF8 equivalents for the 128 extra characters of code page 437 + "Ç", "ü", "é", "â", "ä", "à", "å", "ç", "ê", "ë", "è", "ï", "î", "ì", "Ä", "Å", + "É", "æ", "Æ", "ô", "ö", "ò", "û", "ù", "ÿ", "Ö", "Ü", "¢", "£", "¥", "₧", "ƒ", + "á", "í", "ó", "ú", "ñ", "Ñ", "ª", "º", "¿", "⌐", "¬", "½", "¼", "¡", "«", "»", + "░", "▒", "▓", "│", "┤", "╡", "╢", "╖", "╕", "╣", "║", "╗", "╝", "╜", "╛", "┐", + "└", "┴", "┬", "├", "─", "┼", "╞", "╟", "╚", "╔", "╩", "╦", "╠", "═", "╬", "¤", + "╨", "╤", "╥", "╙", "╘", "╒", "╓", "╫", "╪", "┘", "┌", "█", "▄", "▌", "▐", "▀", + "α", "ß", "Γ", "π", "Σ", "σ", "µ", "τ", "Φ", "Θ", "Ω", "δ", "∞", "φ", "ε", "∩", + "≡", "±", "≥", "≤", "⌠", "⌡", "÷", "≈", "°", "∙", "·", "√", "ⁿ", "²", "■", "⍽" +}; + +WChar::WChar() { + value = 0; +} + +WChar::WChar(char c) { + affectAscii(c); +} + +WChar::WChar(const char* c, u8int encoding) { + if (encoding == UE_UTF8) affectUtf8(c); + if (encoding == UE_UTF16) affectUtf16(c); + if (encoding == UE_UTF32) affectUtf32(c); +} + +u32int WChar::ucharLen(const char* c, u8int encoding) { + if (encoding == UE_UTF8) { + if ((c[0] & 0x80) == 0) return 1; + else if ((c[0] & 0xE0) == 0xC0) return 2; + else if ((c[0] & 0xF0) == 0xE0) return 3; + else if ((c[0] & 0xF8) == 0xF0) return 4; + else return 1; + } else if (encoding == UE_UTF16) { + if ((c[0] & 0xFC) == 0xD8 and (c[2] & 0xFC) == 0xDC) return 4; + else return 2; + } else if (encoding == UE_UTF32) { + return 4; + } + return 1; +} + +u32int WChar::utfLen(const char* c, u8int encoding) { + int i = 0, l = strlen(c), co = 0; + while (i < l) { + i += ucharLen(c + i, encoding); + co++; + } + return co; +} + +void WChar::affectAscii(char c) { + if (c >= 0) value = c; + else value = CP437[c + 128]; +} + +u32int WChar::affectUtf8(const char* c) { //Returns the number of bytes for the character + if ((c[0] & 0x80) == 0) { + value = c[0]; //0x80 = 10000000b + return 1; + } + if ((c[0] & 0xE0) == 0xC0) { // 11100000b, 11000000b + value = ((c[0] & 0x1F) << 6) | (c[1] & 0x3F); + if (value < 128) value = 0; //Bad value + return 2; + } + if ((c[0] & 0xF0) == 0xE0) { // 11110000b, 11100000b + value = ((c[0] & 0x0F) << 12) | ((c[1] & 0x3F) << 6) | (c[2] & 0x3F); + if (value < 2048) value = 0; //Bad value + if (value >= 0xD800 and value <= 0xDFFF) value = 0; //These values are unallowed + if (value >= 0xFFFE and value <= 0xFFFF) value = 0; + return 3; + } + if ((c[0] & 0xF8) == 0xF0) { // 11111000b, 11110000b + value = ((c[0] & 0x0E) << 18) | ((c[1] & 0x3F) << 12) | ((c[2] & 0x3F) << 6) | (c[3] & 0x3F); + if (value < 65536) value = 0; //Bad value + return 4; + } + value = 0; //Something wrong happenned + return 1; +} + +u32int WChar::affectUtf16(const char* c) { + if ((c[0] & 0xFC) == 0xD8 and // 11111100b, 11011000b + (c[2] & 0xFC) == 0xDC) { // 11111100b, 11011100b + u32int w = ((c[0] & 0x03) << 2) | ((c[1] & 0xC0) >> 6); + u32int x = (c[1] & 0x3F); + u32int y = ((c[2] & 0x03) << 8) | (c[2]); + value = ((w + 1) << 16) | (x << 10) | y; + if (value >= 0xD800 and value <= 0xDFFF) value = 0; //These values are unallowed + if (value >= 0xFFFE and value <= 0xFFFF) value = 0; + return 4; + } else { + value = (c[0] << 8) | (c[1]); + if (value >= 0xD800 and value <= 0xDFFF) value = 0; //These values are unallowed + if (value >= 0xFFFE and value <= 0xFFFF) value = 0; + return 2; + } +} + +u32int WChar::affectUtf32(const char* c) { + value = (c[0] << 24) | (c[1] << 16) | (c[2] << 8) | c[3]; + if (value >= 0xD800 and value <= 0xDFFF) value = 0; //These values are unallowed + if (value >= 0xFFFE and value <= 0xFFFF) value = 0; + return 4; +} + +u8int WChar::toAscii() { + if (value < 128) return (char)value; + for (int i = 0; i < 128; i++) { + if (CP437[i] == value) return (i + 128); + } + return '?'; +} + +uchar_repr_t WChar::toUtf8() { + uchar_repr_t r; + r.i = 0; + if (value < 128) { + r.c[0] = value; + } else if (value < 4096) { + r.c[0] = 0xC0 | ((value & 0x07C0) >> 6); + r.c[1] = 0x80 | (value & 0x3F); + } else if (value < 65536) { + r.c[0] = 0xE0 | ((value & 0xF000) >> 12); + r.c[1] = 0x80 | ((value & 0x0FC0) >> 6); + r.c[2] = 0x80 | (value & 0x003F); + } else { + r.c[0] = 0xF0 | ((value & 0x1C0000) >> 18); + r.c[1] = 0x80 | ((value & 0x3F000) >> 12); + r.c[2] = 0x80 | ((value & 0x0FC0) >> 6); + r.c[3] = 0x80 | (value & 0x003F); + } + return r; +} + +//TODO : code WChar::toUtf16 + +uchar_repr_t WChar::toUtf32() { + uchar_repr_t r; + r.c[0] = (value >> 24) & 0xFF; + r.c[1] = (value >> 16) & 0xFF; + r.c[2] = (value >> 8) & 0xFF; + r.c[3] = value & 0xFF; + return r; +} |