summaryrefslogtreecommitdiff
path: root/Source/Kernel/Library
diff options
context:
space:
mode:
authorAlexis211 <alexis211@gmail.com>2009-08-29 19:03:00 +0200
committerAlexis211 <alexis211@gmail.com>2009-08-29 19:03:00 +0200
commit9ad8e2fe0242da26dae7fca2b180640637c8c062 (patch)
tree92fdd3f2195221a110d2c4f1f2e98bd6505578ed /Source/Kernel/Library
parent0139012d683036fb661fed62babb71f59ec9ab45 (diff)
downloadMelon-9ad8e2fe0242da26dae7fca2b180640637c8c062.tar.gz
Melon-9ad8e2fe0242da26dae7fca2b180640637c8c062.zip
We now have (partial) support for UTF-8.
Diffstat (limited to 'Source/Kernel/Library')
-rw-r--r--Source/Kernel/Library/.String.class.cpp.swpbin20480 -> 0 bytes
-rw-r--r--Source/Kernel/Library/String.class.cpp89
-rw-r--r--Source/Kernel/Library/String.class.h12
-rw-r--r--Source/Kernel/Library/wchar.class.cpp74
-rw-r--r--Source/Kernel/Library/wchar.class.h51
5 files changed, 178 insertions, 48 deletions
diff --git a/Source/Kernel/Library/.String.class.cpp.swp b/Source/Kernel/Library/.String.class.cpp.swp
deleted file mode 100644
index 6fd369f..0000000
--- a/Source/Kernel/Library/.String.class.cpp.swp
+++ /dev/null
Binary files differ
diff --git a/Source/Kernel/Library/String.class.cpp b/Source/Kernel/Library/String.class.cpp
index c118800..518d8c9 100644
--- a/Source/Kernel/Library/String.class.cpp
+++ b/Source/Kernel/Library/String.class.cpp
@@ -6,7 +6,7 @@ using namespace CMem; //strlen and memcpy
String String::hex(u32int number) {
String ret;
ret.m_length = 10;
- ret.m_string = (char*)Mem::kalloc(11);
+ ret.m_string = new wchar[11];
ret.m_string[0] = '0';
ret.m_string[1] = 'x';
ret.m_string[10] = 0;
@@ -35,7 +35,7 @@ String String::number(s32int number) {
String ret;
ret.m_length = order;
- ret.m_string = (char*)Mem::kalloc(order + 1);
+ ret.m_string = new wchar[order + 1];
for (u32int i = order; i > 0; i--) {
ret.m_string[i - 1] = numbers[number % 10];
@@ -55,14 +55,16 @@ String::String() {
}
String::String(char* string) {
- m_length = strlen(string);
+ m_length = wchar::utf8len(string);
if (m_length == 0) {
m_string = 0;
return;
}
- m_string = (char*)Mem::kalloc(m_length + 1);
- for (u32int i = 0; i < m_length; i++) {
- m_string[i] = string[i];
+ m_string = new wchar[m_length + 1];
+ int i = 0, l = strlen(string), c = 0;
+ while (i < l) {
+ i += m_string[c].affectUtf8(string + i);
+ c++;
}
m_string[m_length] = 0;
}
@@ -73,7 +75,7 @@ String::String(const String &other) {
m_string = 0;
return;
}
- m_string = (char*)Mem::kalloc(m_length + 1);
+ m_string = new wchar[m_length + 1];
for (u32int i = 0; i < m_length; i++) {
m_string[i] = other.m_string[i];
}
@@ -81,17 +83,17 @@ String::String(const String &other) {
}
String::~String() {
- if (m_string != 0) Mem::kfree(m_string);
+ if (m_string != 0) delete [] m_string;
}
void String::operator= (const String &other) {
m_length = other.m_length;
- if (m_string != 0) Mem::kfree(m_string);
+ if (m_string != 0) delete [] m_string;
if (m_length == 0) {
m_string = 0;
return;
}
- m_string = (char*)Mem::kalloc(m_length + 1);
+ m_string = new wchar[m_length + 1];
for (u32int i = 0; i < m_length; i++) {
m_string[i] = other.m_string[i];
}
@@ -99,15 +101,17 @@ void String::operator= (const String &other) {
}
void String::operator= (char* string) {
- m_length = strlen(string);
- if (m_string != 0) Mem::kfree(m_string);
+ m_length = wchar::utf8len(string);
+ if (m_string != 0) delete [] m_string;
if (m_length == 0) {
m_string = 0;
return;
}
- m_string = (char*)Mem::kalloc(m_length + 1);
- for (u32int i = 0; i < m_length; i++) {
- m_string[i] = string[i];
+ m_string = new wchar[m_length + 1];
+ int i = 0, l = strlen(string), c = 0;
+ while (i < l) {
+ i += m_string[c].affectUtf8(string + i);
+ c++;
}
m_string[m_length] = 0;
}
@@ -121,22 +125,26 @@ bool String::operator== (String &other) {
}
bool String::operator== (char* string) {
- if (m_length != strlen(string)) return false;
- for (u32int i = 0; i < m_length; i++) {
- if (m_string[i] != string[i]) return false;
+ if (m_length != wchar::utf8len(string)) return false;
+ int i = 0, l = strlen(string), c = 0;
+ wchar tmp;
+ while (i < l) {
+ i += tmp.affectUtf8(string + i);
+ if (m_string[c] != tmp) return false;
+ c++;
}
return true;
}
String& String::operator+= (String &other) {
- char* newdata = (char*)Mem::kalloc(m_length + other.m_length + 1);
+ wchar* newdata = new wchar[m_length + other.m_length + 1];
for (u32int i = 0; i < m_length; i++) {
newdata[i] = m_string[i];
}
for (u32int i = 0; i < other.m_length; i++) {
newdata[i + m_length] = other.m_string[i];
}
- if (m_string != 0) Mem::kfree(m_string);
+ if (m_string != 0) delete [] m_string;
m_string = newdata;
m_length += other.m_length;
m_string[m_length] = 0;
@@ -144,30 +152,32 @@ String& String::operator+= (String &other) {
}
String& String::operator+= (char* other) {
- char* newdata = (char*)Mem::kalloc(m_length + strlen(other) + 1);
+ wchar* newdata = new wchar[m_length + wchar::utf8len(other) + 1];
for (u32int i = 0; i < m_length; i++) {
newdata[i] = m_string[i];
}
- for (u32int i = 0; i < strlen(other); i++) {
- newdata[i + m_length] = other[i];
+ int i = 0, l = strlen(other), c = 0;
+ while (i < l) {
+ i += newdata[c + m_length].affectUtf8(other + i);
+ c++;
}
- if (m_string != 0) Mem::kfree(m_string);
+ if (m_string != 0) delete [] m_string;
m_string = newdata;
m_length += strlen(other);
m_string[m_length] = 0;
return *this;
}
-String& String::operator+= (char other) {
- char* newdata = (char*)Mem::kalloc(m_length + 2);
+String& String::operator+= (wchar other) {
+ wchar* newdata = new wchar[m_length + 2];
for (u32int i = 0; i < m_length; i++) {
newdata[i] = m_string[i];
}
- if (m_string != 0) Mem::kfree(m_string);
+ if (m_string != 0) delete[] m_string;
m_string = newdata;
m_string[m_length] = other;
m_length++;
- m_string[m_length] = 0;
+ m_string[m_length].value = 0;
return *this;
}
@@ -181,27 +191,22 @@ String& String::operator+ (char* other) { //Can be optimized
return (ret += other);
}
-String& String::operator+ (char other) { //Can be optimized
+String& String::operator+ (wchar other) {
String ret(*this);
return (ret += other);
}
-String::operator char* () {
- if (m_string == 0) return "";
- return m_string;
-}
-
s32int String::toInt() {
if (m_string == 0) return 0;
s32int pos = 0, number = 0;
bool negative = false;
- if (m_string[0] == '-') {
+ if (m_string[0].value == '-') {
negative = true;
pos = 1;
}
while (m_string[pos] >= '0' && m_string[pos] <= '9') {
number *= 10;
- number += (m_string[pos] - '0');
+ number += (m_string[pos].value - '0');
pos++;
}
if (negative) return 0 - number;
@@ -211,7 +216,7 @@ s32int String::toInt() {
u32int String::toInt16() {
if (m_string == 0) return 0;
u32int pos = 0, number = 0;
- if (m_string[0] == '0' && m_string[1] == 'x') pos = 2;
+ if (m_string[0].value == '0' && m_string[1].value == 'x') pos = 2;
while (1) {
char c = m_string[pos];
pos++;
@@ -231,7 +236,7 @@ u32int String::toInt16() {
return number;
}
-char& String::operator[] (int index) {
+wchar& String::operator[] (int index) {
return m_string[index];
}
@@ -240,7 +245,7 @@ u32int String::size() {
}
void String::clear() {
- Mem::kfree(m_string);
+ delete [] m_string;
m_length = 0;
m_string = 0;
}
@@ -249,7 +254,7 @@ bool String::empty() {
return (m_length == 0);
}
-Vector<String> String::split(char c) {
+Vector<String> String::split(wchar c) {
Vector<String> ret;
ret.push(String(""));
for (u32int i = 0; i < m_length; i++) {
@@ -269,9 +274,9 @@ String String::substr(s32int start, s32int size) {
size = 0 - size;
}
String ret;
- ret.m_string = (char*)Mem::kalloc(size + 1);
+ ret.m_string = new wchar[size + 1];
ret.m_length = size;
- memcpy((u8int*)ret.m_string, (const u8int*)(m_string + start), size);
+ memcpy((u8int*)ret.m_string, (const u8int*)(m_string + start), size * sizeof(wchar));
ret.m_string[size] = 0;
return ret;
}
diff --git a/Source/Kernel/Library/String.class.h b/Source/Kernel/Library/String.class.h
index 58237f0..d086b31 100644
--- a/Source/Kernel/Library/String.class.h
+++ b/Source/Kernel/Library/String.class.h
@@ -2,12 +2,13 @@
#define DEF_STRING_CLASS
#include <Core/common.wtf.h>
+#include <Library/wchar.class.h>
template <typename T> class Vector;
class String {
private:
- char *m_string;
+ wchar *m_string;
u32int m_length;
public:
@@ -26,20 +27,19 @@ class String {
bool operator== (char* string);
String &operator+= (String &other);
String &operator+= (char* other);
- String &operator+= (char other);
+ String &operator+= (wchar other);
String &operator+ (String &other);
String &operator+ (char* other);
- String &operator+ (char other);
- operator char* ();
+ String &operator+ (wchar other);
s32int toInt();
u32int toInt16(); //From HEX
- char& operator[] (int index);
+ wchar& operator[] (int index);
u32int size();
void clear();
bool empty();
- Vector<String> split(char c);
+ Vector<String> split(wchar c);
String substr(s32int start, s32int size);
};
diff --git a/Source/Kernel/Library/wchar.class.cpp b/Source/Kernel/Library/wchar.class.cpp
new file mode 100644
index 0000000..acf5392
--- /dev/null
+++ b/Source/Kernel/Library/wchar.class.cpp
@@ -0,0 +1,74 @@
+#include "wchar.class.h"
+
+wchar wchar::CP437[] = { //These are the UTF8 equivalents for the 128 extra characters of code page 850
+ "Ç", "ü", "é", "â", "ä", "à", "å", "ç", "ê", "ë", "è", "ï", "î", "ì", "Ä", "Å",
+ "É", "æ", "Æ", "ô", "ö", "ò", "û", "ù", "ÿ", "Ö", "Ü", "¢", "£", "¥", "₧", "ƒ",
+ "á", "í", "ó", "ú", "ñ", "Ñ", "ª", "º", "¿", "⌐", "¬", "½", "¼", "¡", "«", "»",
+ "░", "▒", "▓", "│", "┤", "╡", "╢", "╖", "╕", "╣", "║", "╗", "╝", "╜", "╛", "┐",
+ "└", "┴", "┬", "├", "─", "┼", "╞", "╟", "╚", "╔", "╩", "╦", "╠", "═", "╬", "¤",
+ "╨", "╤", "╥", "╙", "╘", "╒", "╓", "╫", "╪", "┘", "┌", "█", "▄", "▌", "▐", "▀",
+ "α", "ß", "Γ", "π", "Σ", "σ", "µ", "τ", "Φ", "Θ", "Ω", "δ", "∞", "φ", "ε", "∩",
+ "≡", "±", "≥", "≤", "⌠", "⌡", "÷", "≈", "°", "∙", "·", "√", "ⁿ", "²", "■", "⍽"
+};
+
+wchar::wchar() {
+ value = 0;
+}
+
+wchar::wchar(char c) {
+ affectAscii(c);
+}
+
+wchar::wchar(char* c) {
+ affectUtf8(c);
+}
+
+u32int wchar::utf8len(char* c) {
+ int i = 0, l = CMem::strlen(c), co = 0;
+ while (i < l) {
+ if ((c[i] & 0x80) == 0) i += 1;
+ else if ((c[i] & 0xE0) == 0xC0) i += 2;
+ else if ((c[i] & 0xF0) == 0xE0) i += 3;
+ else if ((c[i] & 0xF8) == 0xF0) i += 4;
+ else i += 1;
+ co++;
+ }
+ return co;
+}
+
+void wchar::affectAscii(char c) {
+ if (c >= 0) value = c;
+ else value = CP437[c + 128];
+}
+
+u32int wchar::affectUtf8(char* c) { //Returns the number of bytes for the character
+ if ((c[0] & 0x80) == 0) {
+ value = c[0]; //0x80 = 10000000b
+ return 1;
+ }
+ if ((c[0] & 0xE0) == 0xC0) { // 11100000b, 11000000b
+ value = ((c[0] & 0x1F) << 6) | (c[1] & 0x3F);
+ if (value < 128) value = 0; //Bad value
+ return 2;
+ }
+ if ((c[0] & 0xF0) == 0xE0) { // 11110000b, 11100000b
+ value = ((c[0] & 0x0F) << 12) | ((c[1] & 0x3F) << 6) | (c[2] & 0x3F);
+ if (value < 2048) value = 0; //Bad value
+ return 3;
+ }
+ if ((c[0] & 0xF8) == 0xF0) { // 11111000b, 11110000b
+ value = ((c[0] & 0x0E) << 18) | ((c[1] & 0x3F) << 12) | ((c[2] & 0x3F) << 6) | (c[3] & 0x3F);
+ if (value < 65536) value = 0; //Bad value
+ return 4;
+ }
+ value = 0; //Something wrong happenned
+ return 1;
+}
+
+u8int wchar::toAscii() {
+ if (value < 128) return (char)value;
+ for (int i = 0; i < 128; i++) {
+ if (CP437[i] == value) return (i + 128);
+ }
+ return '?';
+}
diff --git a/Source/Kernel/Library/wchar.class.h b/Source/Kernel/Library/wchar.class.h
new file mode 100644
index 0000000..cadabd0
--- /dev/null
+++ b/Source/Kernel/Library/wchar.class.h
@@ -0,0 +1,51 @@
+#ifndef DEF_UCHAR_CLASS_H
+#define DEF_UCHAR_CLASS_H
+
+#include <Core/common.wtf.h>
+
+struct wchar {
+ u32int value;
+ static wchar CP437[];
+
+ wchar(); //Creates a null character
+ wchar(char c); //From ascii character
+ wchar(char* c); //From utf8 string
+
+ static u32int utf8len(char* c); //Returns count of utf8 characters in string
+
+ void affectAscii(char c);
+ u32int affectUtf8(char* c);
+ void affectUtf16(char* c);
+ void affectUtf32(char* c);
+ u8int toAscii();
+
+ inline wchar operator+ (u32int other) {
+ wchar r;
+ r.value = value + other;
+ return r;
+ }
+ inline wchar operator- (u32int other) {
+ wchar r;
+ r.value = value - other;
+ return r;
+ }
+ inline wchar& operator+= (u32int other) {
+ value += other;
+ return *this;
+ }
+ inline wchar& operator-= (u32int other) {
+ value -= other;
+ return *this;
+ }
+ inline bool operator== (u32int other) {
+ return value == other;
+ }
+ inline u32int operator= (u32int v) {
+ value = v;
+ return v;
+ }
+
+ inline operator u32int () { return value; }
+};
+
+#endif