diff options
author | Alexis211 <alexis211@gmail.com> | 2009-08-29 19:03:00 +0200 |
---|---|---|
committer | Alexis211 <alexis211@gmail.com> | 2009-08-29 19:03:00 +0200 |
commit | 9ad8e2fe0242da26dae7fca2b180640637c8c062 (patch) | |
tree | 92fdd3f2195221a110d2c4f1f2e98bd6505578ed /Source/Kernel/Library | |
parent | 0139012d683036fb661fed62babb71f59ec9ab45 (diff) | |
download | Melon-9ad8e2fe0242da26dae7fca2b180640637c8c062.tar.gz Melon-9ad8e2fe0242da26dae7fca2b180640637c8c062.zip |
We now have (partial) support for UTF-8.
Diffstat (limited to 'Source/Kernel/Library')
-rw-r--r-- | Source/Kernel/Library/.String.class.cpp.swp | bin | 20480 -> 0 bytes | |||
-rw-r--r-- | Source/Kernel/Library/String.class.cpp | 89 | ||||
-rw-r--r-- | Source/Kernel/Library/String.class.h | 12 | ||||
-rw-r--r-- | Source/Kernel/Library/wchar.class.cpp | 74 | ||||
-rw-r--r-- | Source/Kernel/Library/wchar.class.h | 51 |
5 files changed, 178 insertions, 48 deletions
diff --git a/Source/Kernel/Library/.String.class.cpp.swp b/Source/Kernel/Library/.String.class.cpp.swp Binary files differdeleted file mode 100644 index 6fd369f..0000000 --- a/Source/Kernel/Library/.String.class.cpp.swp +++ /dev/null diff --git a/Source/Kernel/Library/String.class.cpp b/Source/Kernel/Library/String.class.cpp index c118800..518d8c9 100644 --- a/Source/Kernel/Library/String.class.cpp +++ b/Source/Kernel/Library/String.class.cpp @@ -6,7 +6,7 @@ using namespace CMem; //strlen and memcpy String String::hex(u32int number) { String ret; ret.m_length = 10; - ret.m_string = (char*)Mem::kalloc(11); + ret.m_string = new wchar[11]; ret.m_string[0] = '0'; ret.m_string[1] = 'x'; ret.m_string[10] = 0; @@ -35,7 +35,7 @@ String String::number(s32int number) { String ret; ret.m_length = order; - ret.m_string = (char*)Mem::kalloc(order + 1); + ret.m_string = new wchar[order + 1]; for (u32int i = order; i > 0; i--) { ret.m_string[i - 1] = numbers[number % 10]; @@ -55,14 +55,16 @@ String::String() { } String::String(char* string) { - m_length = strlen(string); + m_length = wchar::utf8len(string); if (m_length == 0) { m_string = 0; return; } - m_string = (char*)Mem::kalloc(m_length + 1); - for (u32int i = 0; i < m_length; i++) { - m_string[i] = string[i]; + m_string = new wchar[m_length + 1]; + int i = 0, l = strlen(string), c = 0; + while (i < l) { + i += m_string[c].affectUtf8(string + i); + c++; } m_string[m_length] = 0; } @@ -73,7 +75,7 @@ String::String(const String &other) { m_string = 0; return; } - m_string = (char*)Mem::kalloc(m_length + 1); + m_string = new wchar[m_length + 1]; for (u32int i = 0; i < m_length; i++) { m_string[i] = other.m_string[i]; } @@ -81,17 +83,17 @@ String::String(const String &other) { } String::~String() { - if (m_string != 0) Mem::kfree(m_string); + if (m_string != 0) delete [] m_string; } void String::operator= (const String &other) { m_length = other.m_length; - if (m_string != 0) Mem::kfree(m_string); + if (m_string != 0) delete [] m_string; if (m_length == 0) { m_string = 0; return; } - m_string = (char*)Mem::kalloc(m_length + 1); + m_string = new wchar[m_length + 1]; for (u32int i = 0; i < m_length; i++) { m_string[i] = other.m_string[i]; } @@ -99,15 +101,17 @@ void String::operator= (const String &other) { } void String::operator= (char* string) { - m_length = strlen(string); - if (m_string != 0) Mem::kfree(m_string); + m_length = wchar::utf8len(string); + if (m_string != 0) delete [] m_string; if (m_length == 0) { m_string = 0; return; } - m_string = (char*)Mem::kalloc(m_length + 1); - for (u32int i = 0; i < m_length; i++) { - m_string[i] = string[i]; + m_string = new wchar[m_length + 1]; + int i = 0, l = strlen(string), c = 0; + while (i < l) { + i += m_string[c].affectUtf8(string + i); + c++; } m_string[m_length] = 0; } @@ -121,22 +125,26 @@ bool String::operator== (String &other) { } bool String::operator== (char* string) { - if (m_length != strlen(string)) return false; - for (u32int i = 0; i < m_length; i++) { - if (m_string[i] != string[i]) return false; + if (m_length != wchar::utf8len(string)) return false; + int i = 0, l = strlen(string), c = 0; + wchar tmp; + while (i < l) { + i += tmp.affectUtf8(string + i); + if (m_string[c] != tmp) return false; + c++; } return true; } String& String::operator+= (String &other) { - char* newdata = (char*)Mem::kalloc(m_length + other.m_length + 1); + wchar* newdata = new wchar[m_length + other.m_length + 1]; for (u32int i = 0; i < m_length; i++) { newdata[i] = m_string[i]; } for (u32int i = 0; i < other.m_length; i++) { newdata[i + m_length] = other.m_string[i]; } - if (m_string != 0) Mem::kfree(m_string); + if (m_string != 0) delete [] m_string; m_string = newdata; m_length += other.m_length; m_string[m_length] = 0; @@ -144,30 +152,32 @@ String& String::operator+= (String &other) { } String& String::operator+= (char* other) { - char* newdata = (char*)Mem::kalloc(m_length + strlen(other) + 1); + wchar* newdata = new wchar[m_length + wchar::utf8len(other) + 1]; for (u32int i = 0; i < m_length; i++) { newdata[i] = m_string[i]; } - for (u32int i = 0; i < strlen(other); i++) { - newdata[i + m_length] = other[i]; + int i = 0, l = strlen(other), c = 0; + while (i < l) { + i += newdata[c + m_length].affectUtf8(other + i); + c++; } - if (m_string != 0) Mem::kfree(m_string); + if (m_string != 0) delete [] m_string; m_string = newdata; m_length += strlen(other); m_string[m_length] = 0; return *this; } -String& String::operator+= (char other) { - char* newdata = (char*)Mem::kalloc(m_length + 2); +String& String::operator+= (wchar other) { + wchar* newdata = new wchar[m_length + 2]; for (u32int i = 0; i < m_length; i++) { newdata[i] = m_string[i]; } - if (m_string != 0) Mem::kfree(m_string); + if (m_string != 0) delete[] m_string; m_string = newdata; m_string[m_length] = other; m_length++; - m_string[m_length] = 0; + m_string[m_length].value = 0; return *this; } @@ -181,27 +191,22 @@ String& String::operator+ (char* other) { //Can be optimized return (ret += other); } -String& String::operator+ (char other) { //Can be optimized +String& String::operator+ (wchar other) { String ret(*this); return (ret += other); } -String::operator char* () { - if (m_string == 0) return ""; - return m_string; -} - s32int String::toInt() { if (m_string == 0) return 0; s32int pos = 0, number = 0; bool negative = false; - if (m_string[0] == '-') { + if (m_string[0].value == '-') { negative = true; pos = 1; } while (m_string[pos] >= '0' && m_string[pos] <= '9') { number *= 10; - number += (m_string[pos] - '0'); + number += (m_string[pos].value - '0'); pos++; } if (negative) return 0 - number; @@ -211,7 +216,7 @@ s32int String::toInt() { u32int String::toInt16() { if (m_string == 0) return 0; u32int pos = 0, number = 0; - if (m_string[0] == '0' && m_string[1] == 'x') pos = 2; + if (m_string[0].value == '0' && m_string[1].value == 'x') pos = 2; while (1) { char c = m_string[pos]; pos++; @@ -231,7 +236,7 @@ u32int String::toInt16() { return number; } -char& String::operator[] (int index) { +wchar& String::operator[] (int index) { return m_string[index]; } @@ -240,7 +245,7 @@ u32int String::size() { } void String::clear() { - Mem::kfree(m_string); + delete [] m_string; m_length = 0; m_string = 0; } @@ -249,7 +254,7 @@ bool String::empty() { return (m_length == 0); } -Vector<String> String::split(char c) { +Vector<String> String::split(wchar c) { Vector<String> ret; ret.push(String("")); for (u32int i = 0; i < m_length; i++) { @@ -269,9 +274,9 @@ String String::substr(s32int start, s32int size) { size = 0 - size; } String ret; - ret.m_string = (char*)Mem::kalloc(size + 1); + ret.m_string = new wchar[size + 1]; ret.m_length = size; - memcpy((u8int*)ret.m_string, (const u8int*)(m_string + start), size); + memcpy((u8int*)ret.m_string, (const u8int*)(m_string + start), size * sizeof(wchar)); ret.m_string[size] = 0; return ret; } diff --git a/Source/Kernel/Library/String.class.h b/Source/Kernel/Library/String.class.h index 58237f0..d086b31 100644 --- a/Source/Kernel/Library/String.class.h +++ b/Source/Kernel/Library/String.class.h @@ -2,12 +2,13 @@ #define DEF_STRING_CLASS #include <Core/common.wtf.h> +#include <Library/wchar.class.h> template <typename T> class Vector; class String { private: - char *m_string; + wchar *m_string; u32int m_length; public: @@ -26,20 +27,19 @@ class String { bool operator== (char* string); String &operator+= (String &other); String &operator+= (char* other); - String &operator+= (char other); + String &operator+= (wchar other); String &operator+ (String &other); String &operator+ (char* other); - String &operator+ (char other); - operator char* (); + String &operator+ (wchar other); s32int toInt(); u32int toInt16(); //From HEX - char& operator[] (int index); + wchar& operator[] (int index); u32int size(); void clear(); bool empty(); - Vector<String> split(char c); + Vector<String> split(wchar c); String substr(s32int start, s32int size); }; diff --git a/Source/Kernel/Library/wchar.class.cpp b/Source/Kernel/Library/wchar.class.cpp new file mode 100644 index 0000000..acf5392 --- /dev/null +++ b/Source/Kernel/Library/wchar.class.cpp @@ -0,0 +1,74 @@ +#include "wchar.class.h" + +wchar wchar::CP437[] = { //These are the UTF8 equivalents for the 128 extra characters of code page 850 + "Ç", "ü", "é", "â", "ä", "à", "å", "ç", "ê", "ë", "è", "ï", "î", "ì", "Ä", "Å", + "É", "æ", "Æ", "ô", "ö", "ò", "û", "ù", "ÿ", "Ö", "Ü", "¢", "£", "¥", "₧", "ƒ", + "á", "í", "ó", "ú", "ñ", "Ñ", "ª", "º", "¿", "⌐", "¬", "½", "¼", "¡", "«", "»", + "░", "▒", "▓", "│", "┤", "╡", "╢", "╖", "╕", "╣", "║", "╗", "╝", "╜", "╛", "┐", + "└", "┴", "┬", "├", "─", "┼", "╞", "╟", "╚", "╔", "╩", "╦", "╠", "═", "╬", "¤", + "╨", "╤", "╥", "╙", "╘", "╒", "╓", "╫", "╪", "┘", "┌", "█", "▄", "▌", "▐", "▀", + "α", "ß", "Γ", "π", "Σ", "σ", "µ", "τ", "Φ", "Θ", "Ω", "δ", "∞", "φ", "ε", "∩", + "≡", "±", "≥", "≤", "⌠", "⌡", "÷", "≈", "°", "∙", "·", "√", "ⁿ", "²", "■", "⍽" +}; + +wchar::wchar() { + value = 0; +} + +wchar::wchar(char c) { + affectAscii(c); +} + +wchar::wchar(char* c) { + affectUtf8(c); +} + +u32int wchar::utf8len(char* c) { + int i = 0, l = CMem::strlen(c), co = 0; + while (i < l) { + if ((c[i] & 0x80) == 0) i += 1; + else if ((c[i] & 0xE0) == 0xC0) i += 2; + else if ((c[i] & 0xF0) == 0xE0) i += 3; + else if ((c[i] & 0xF8) == 0xF0) i += 4; + else i += 1; + co++; + } + return co; +} + +void wchar::affectAscii(char c) { + if (c >= 0) value = c; + else value = CP437[c + 128]; +} + +u32int wchar::affectUtf8(char* c) { //Returns the number of bytes for the character + if ((c[0] & 0x80) == 0) { + value = c[0]; //0x80 = 10000000b + return 1; + } + if ((c[0] & 0xE0) == 0xC0) { // 11100000b, 11000000b + value = ((c[0] & 0x1F) << 6) | (c[1] & 0x3F); + if (value < 128) value = 0; //Bad value + return 2; + } + if ((c[0] & 0xF0) == 0xE0) { // 11110000b, 11100000b + value = ((c[0] & 0x0F) << 12) | ((c[1] & 0x3F) << 6) | (c[2] & 0x3F); + if (value < 2048) value = 0; //Bad value + return 3; + } + if ((c[0] & 0xF8) == 0xF0) { // 11111000b, 11110000b + value = ((c[0] & 0x0E) << 18) | ((c[1] & 0x3F) << 12) | ((c[2] & 0x3F) << 6) | (c[3] & 0x3F); + if (value < 65536) value = 0; //Bad value + return 4; + } + value = 0; //Something wrong happenned + return 1; +} + +u8int wchar::toAscii() { + if (value < 128) return (char)value; + for (int i = 0; i < 128; i++) { + if (CP437[i] == value) return (i + 128); + } + return '?'; +} diff --git a/Source/Kernel/Library/wchar.class.h b/Source/Kernel/Library/wchar.class.h new file mode 100644 index 0000000..cadabd0 --- /dev/null +++ b/Source/Kernel/Library/wchar.class.h @@ -0,0 +1,51 @@ +#ifndef DEF_UCHAR_CLASS_H +#define DEF_UCHAR_CLASS_H + +#include <Core/common.wtf.h> + +struct wchar { + u32int value; + static wchar CP437[]; + + wchar(); //Creates a null character + wchar(char c); //From ascii character + wchar(char* c); //From utf8 string + + static u32int utf8len(char* c); //Returns count of utf8 characters in string + + void affectAscii(char c); + u32int affectUtf8(char* c); + void affectUtf16(char* c); + void affectUtf32(char* c); + u8int toAscii(); + + inline wchar operator+ (u32int other) { + wchar r; + r.value = value + other; + return r; + } + inline wchar operator- (u32int other) { + wchar r; + r.value = value - other; + return r; + } + inline wchar& operator+= (u32int other) { + value += other; + return *this; + } + inline wchar& operator-= (u32int other) { + value -= other; + return *this; + } + inline bool operator== (u32int other) { + return value == other; + } + inline u32int operator= (u32int v) { + value = v; + return v; + } + + inline operator u32int () { return value; } +}; + +#endif |