diff options
Diffstat (limited to 'Source/Kernel/Library')
-rw-r--r-- | Source/Kernel/Library/String.class.cpp | 40 | ||||
-rw-r--r-- | Source/Kernel/Library/String.class.h | 46 | ||||
-rw-r--r-- | Source/Kernel/Library/WChar.class.cpp | 82 | ||||
-rw-r--r-- | Source/Kernel/Library/WChar.class.h | 16 |
4 files changed, 138 insertions, 46 deletions
diff --git a/Source/Kernel/Library/String.class.cpp b/Source/Kernel/Library/String.class.cpp index 6380b25..9d4083b 100644 --- a/Source/Kernel/Library/String.class.cpp +++ b/Source/Kernel/Library/String.class.cpp @@ -54,8 +54,8 @@ String::String() { m_length = 0; } -String::String(const char* string) { - m_length = WChar::utfLen(string); +String::String(const char* string, u8int encoding) { + m_length = WChar::utfLen(string, encoding); if (m_length == 0) { m_string = 0; return; @@ -63,7 +63,7 @@ String::String(const char* string) { m_string = new WChar[m_length + 1]; int i = 0, l = strlen(string), c = 0; while (i < l) { - i += m_string[c].affectUtf8(string + i); + i += m_string[c].affect(string + i, encoding); c++; } m_string[m_length] = 0; @@ -86,7 +86,7 @@ String::~String() { if (m_string != 0) delete [] m_string; } -void String::operator= (const String &other) { +void String::affect (const String &other) { m_length = other.m_length; if (m_string != 0) delete [] m_string; if (m_length == 0) { @@ -100,8 +100,8 @@ void String::operator= (const String &other) { m_string[m_length] = 0; } -void String::operator= (const char* string) { - m_length = WChar::utfLen(string); +void String::affect (const char* string, u8int encoding) { + m_length = WChar::utfLen(string, encoding); if (m_string != 0) delete [] m_string; if (m_length == 0) { m_string = 0; @@ -110,13 +110,13 @@ void String::operator= (const char* string) { m_string = new WChar[m_length + 1]; int i = 0, l = strlen(string), c = 0; while (i < l) { - i += m_string[c].affectUtf8(string + i); + i += m_string[c].affect(string + i, encoding); c++; } m_string[m_length] = 0; } -bool String::operator== (const String &other) const { +bool String::compare (const String &other) const { if (m_length != other.m_length) return false; for (u32int i = 0; i < m_length; i++) { if (m_string[i] != other.m_string[i]) return false; @@ -124,19 +124,19 @@ bool String::operator== (const String &other) const { return true; } -bool String::operator== (const char* string) const { - if (m_length != WChar::utfLen(string)) return false; +bool String::compare (const char* string, u8int encoding) const { + if (m_length != WChar::utfLen(string, encoding)) return false; int i = 0, l = strlen(string), c = 0; WChar tmp; while (i < l) { - i += tmp.affectUtf8(string + i); + i += tmp.affect(string + i, encoding); if (m_string[c] != tmp) return false; c++; } return true; } -String& String::operator+= (const String &other) { +String& String::append (const String &other) { WChar* newdata = new WChar[m_length + other.m_length + 1]; for (u32int i = 0; i < m_length; i++) { newdata[i] = m_string[i]; @@ -151,14 +151,14 @@ String& String::operator+= (const String &other) { return *this; } -String& String::operator+= (const char* other) { - WChar* newdata = new WChar[m_length + WChar::utfLen(other) + 1]; +String& String::append (const char* other, u8int encoding) { + WChar* newdata = new WChar[m_length + WChar::utfLen(other, encoding) + 1]; for (u32int i = 0; i < m_length; i++) { newdata[i] = m_string[i]; } int i = 0, l = strlen(other), c = 0; while (i < l) { - i += newdata[c + m_length].affectUtf8(other + i); + i += newdata[c + m_length].affect(other + i, encoding); c++; } if (m_string != 0) delete [] m_string; @@ -168,7 +168,7 @@ String& String::operator+= (const char* other) { return *this; } -String& String::operator+= (WChar other) { +String& String::append (WChar other) { WChar* newdata = new WChar[m_length + 2]; for (u32int i = 0; i < m_length; i++) { newdata[i] = m_string[i]; @@ -181,17 +181,17 @@ String& String::operator+= (WChar other) { return *this; } -String& String::operator+ (const String &other) const { //Can be optimized +String String::concat (const String &other) const { //Can be optimized String ret(*this); return (ret += other); } -String& String::operator+ (const char* other) const { //Can be optimized +String String::concat (const char* other, u8int encoding) const { //Can be optimized String ret(*this); - return (ret += other); + return (ret.append(other, encoding)); } -String& String::operator+ (WChar other) const { +String String::concat (WChar other) const { String ret(*this); return (ret += other); } diff --git a/Source/Kernel/Library/String.class.h b/Source/Kernel/Library/String.class.h index 01cc6a8..6a9de64 100644 --- a/Source/Kernel/Library/String.class.h +++ b/Source/Kernel/Library/String.class.h @@ -15,26 +15,40 @@ class String { static String hex(u32int number); static String number(s32int number); - String(const char* string); + String(const char* string, u8int encoding = UE_UTF8); String(); String(const String &other); ~String(); - void operator= (const String &other); - void operator= (const char* string); - - bool operator== (const String &other) const; - bool operator== (const char* string) const; - bool operator!= (const String &other) { return !(operator== (other)); } - bool operator!= (const char* other) { return !(operator== (other)); } - String &operator+= (const String &other); - String &operator+= (const char* other); - String &operator+= (WChar other); - String &operator+ (const String &other) const; - String &operator+ (const char* other) const; - String &operator+ (WChar other) const; - s32int toInt() const; - u32int toInt16() const; //From HEX + void affect(const String &other); + void affect(const char* string, u8int encoding = UE_UTF8); + void operator= (const String &other) { affect(other); } + void operator= (const char* other) { affect(other); } + + bool compare(const String &other) const; + bool compare(const char* string, u8int encoding = UE_UTF8) const; + bool operator== (const String &other) const { return compare(other); } + bool operator== (const char* other) const { return compare(other); } + bool operator!= (const String &other) { return !compare(other); } + bool operator!= (const char* other) { return !compare(other); } + + String& append(const String &other); + String& append(const char* other, u8int encoding = UE_UTF8); + String& append(WChar other); + String &operator+= (const String &other) { return append(other); } + String &operator+= (const char* other) { return append(other); } + String &operator+= (WChar other) { return append(other); } + + String concat(const String &other) const; + String concat(const char* other, u8int encoding = UE_UTF8) const; + String concat(WChar other) const; + String operator+ (const String &other) const { return concat(other); } + String operator+ (const char* other) const { return concat(other); } + String operator+ (WChar other) const { return concat(other); } + + s32int toInt() const; //Convert from DEC + u32int toInt16() const; //Convert from HEX + WChar& operator[] (int index) const; u32int size() const; diff --git a/Source/Kernel/Library/WChar.class.cpp b/Source/Kernel/Library/WChar.class.cpp index d7f01de..aad46c3 100644 --- a/Source/Kernel/Library/WChar.class.cpp +++ b/Source/Kernel/Library/WChar.class.cpp @@ -19,19 +19,28 @@ WChar::WChar(char c) { affectAscii(c); } -WChar::WChar(const char* c, u8int encoding) { //TODO : take encoding into account - affectUtf8(c); +WChar::WChar(const char* c, u8int encoding) { + if (encoding == UE_UTF8) affectUtf8(c); + if (encoding == UE_UTF16) affectUtf16(c); + if (encoding == UE_UTF32) affectUtf32(c); } u32int WChar::utfLen(const char* c, u8int encoding) { int i = 0, l = CMem::strlen(c), co = 0; while (i < l) { - if ((c[i] & 0x80) == 0) i += 1; - else if ((c[i] & 0xE0) == 0xC0) i += 2; - else if ((c[i] & 0xF0) == 0xE0) i += 3; - else if ((c[i] & 0xF8) == 0xF0) i += 4; - else i += 1; - co++; + if (encoding == UE_UTF8) { + if ((c[i] & 0x80) == 0) i += 1; + else if ((c[i] & 0xE0) == 0xC0) i += 2; + else if ((c[i] & 0xF0) == 0xE0) i += 3; + else if ((c[i] & 0xF8) == 0xF0) i += 4; + else i += 1; + co++; + } else if (encoding == UE_UTF16) { + if ((c[i] & 0xFC) == 0xD8 and (c[i + 2] & 0xFC) == 0xDC) i += 4; + else i += 2; + } else if (encoding == UE_UTF32) { + i += 4; + } } return co; } @@ -67,6 +76,31 @@ u32int WChar::affectUtf8(const char* c) { //Returns the number of bytes for the return 1; } +u32int WChar::affectUtf16(const char* c) { + if ((c[0] & 0xFC) == 0xD8 and // 11111100b, 11011000b + (c[2] & 0xFC) == 0xDC) { // 11111100b, 11011100b + u32int w = ((c[0] & 0x03) << 2) | ((c[1] & 0xC0) >> 6); + u32int x = (c[1] & 0x3F); + u32int y = ((c[2] & 0x03) << 8) | (c[2]); + value = ((w + 1) << 16) | (x << 10) | y; + if (value >= 0xD800 and value <= 0xDFFF) value = 0; //These values are unallowed + if (value >= 0xFFFE and value <= 0xFFFF) value = 0; + return 4; + } else { + value = (c[0] << 8) | (c[1]); + if (value >= 0xD800 and value <= 0xDFFF) value = 0; //These values are unallowed + if (value >= 0xFFFE and value <= 0xFFFF) value = 0; + return 2; + } +} + +u32int WChar::affectUtf32(const char* c) { + value = (c[0] << 24) | (c[1] << 16) | (c[2] << 8) | c[3]; + if (value >= 0xD800 and value <= 0xDFFF) value = 0; //These values are unallowed + if (value >= 0xFFFE and value <= 0xFFFF) value = 0; + return 4; +} + u8int WChar::toAscii() { if (value < 128) return (char)value; for (int i = 0; i < 128; i++) { @@ -74,3 +108,35 @@ u8int WChar::toAscii() { } return '?'; } + +uchar_repr_t WChar::toUtf8() { + uchar_repr_t r; + r.i = 0; + if (value < 128) { + r.c[0] = value; + } else if (value < 4096) { + r.c[0] = 0xC0 | ((value & 0x07C0) >> 6); + r.c[1] = 0x80 | (value & 0x3F); + } else if (value < 65536) { + r.c[0] = 0xE0 | ((value & 0xF000) >> 12); + r.c[1] = 0x80 | ((value & 0x0FC0) >> 6); + r.c[2] = 0x80 | (value & 0x003F); + } else { + r.c[0] = 0xF0 | ((value & 0x1C0000) >> 18); + r.c[1] = 0x80 | ((value & 0x3F000) >> 12); + r.c[2] = 0x80 | ((value & 0x0FC0) >> 6); + r.c[3] = 0x80 | (value & 0x003F); + } + return r; +} + +//TODO : code WChar::toUtf16 + +uchar_repr_t WChar::toUtf32() { + uchar_repr_t r; + r.c[0] = (value >> 24) & 0xFF; + r.c[1] = (value >> 16) & 0xFF; + r.c[2] = (value >> 8) & 0xFF; + r.c[3] = value & 0xFF; + return r; +} diff --git a/Source/Kernel/Library/WChar.class.h b/Source/Kernel/Library/WChar.class.h index fc00577..63f1ea3 100644 --- a/Source/Kernel/Library/WChar.class.h +++ b/Source/Kernel/Library/WChar.class.h @@ -26,11 +26,23 @@ struct WChar { void affectAscii(char c); u32int affectUtf8(const char* c); - void affectUtf16(const char* c); - void affectUtf32(const char* c); + u32int affectUtf16(const char* c); + u32int affectUtf32(const char* c); + + u32int affect(const char* c, u8int encoding = UE_UTF8) { + if (encoding == UE_UTF8) return affectUtf8(c); + if (encoding == UE_UTF16) return affectUtf16(c); + if (encoding == UE_UTF32) return affectUtf32(c); + affectAscii(c[0]); //Default case :/ + return 1; + } u8int toAscii(); + uchar_repr_t toUtf8(); + uchar_repr_t toUtf16(); + uchar_repr_t toUtf32(); + inline WChar operator+ (u32int other) { WChar r; r.value = value + other; |