From 5f88058644587aa255d453eee74c212e53cf9ade Mon Sep 17 00:00:00 2001
From: Alexis211 <alexis211@gmail.com>
Date: Wed, 16 Sep 2009 15:41:10 +0200
Subject: Added stuff to WChar and String classes. WChar can now decode utf16
 and utf32, and encode utf8 and utf32. String now has functions append(),
 concat(), compare() and affect() with different prototypes for char[] arrays,
 so that we can use it as well with utf8, utf16 and utf32.

---
 Source/Kernel/Core/kmain.wtf.cpp           |   3 ++
 Source/Kernel/Core/loader.wtf.asm          |   6 +--
 Source/Kernel/Library/String.class.cpp     |  40 +++++++-------
 Source/Kernel/Library/String.class.h       |  46 ++++++++++------
 Source/Kernel/Library/WChar.class.cpp      |  82 ++++++++++++++++++++++++++---
 Source/Kernel/Library/WChar.class.h        |  16 +++++-
 Source/Kernel/Melon.ke                     | Bin 153566 -> 158798 bytes
 Source/Kernel/TaskManager/Thread.class.cpp |   2 +-
 8 files changed, 145 insertions(+), 50 deletions(-)

(limited to 'Source')

diff --git a/Source/Kernel/Core/kmain.wtf.cpp b/Source/Kernel/Core/kmain.wtf.cpp
index 89932e3..b8ebe49 100644
--- a/Source/Kernel/Core/kmain.wtf.cpp
+++ b/Source/Kernel/Core/kmain.wtf.cpp
@@ -130,6 +130,7 @@ void kmain(multiboot_info_t* mbd, u32int magic) {
 			*kvt << " - Command list for integrated kernel shell:\n";
 			*kvt << "  - help          shows this help screen\n";
 			*kvt << "  - reboot        reboots your computer\n";
+			*kvt << "  - panic         causes a kernel panic\n";
 			*kvt << "  - devices       shows all detected devices on your computer\n";
 			*kvt << "  - free          shows memory usage (physical frames and kernel heap)\n";
 			*kvt << "  - uptime        shows seconds since boot\n";
@@ -137,6 +138,8 @@ void kmain(multiboot_info_t* mbd, u32int magic) {
 			*kvt << " - Commands you should know how to use : ls, cd, cat, pwd, rm, mkdir, wf\n";
 		} else if (tokens[0] == "reboot") {
 			Sys::reboot();
+		} else if (tokens[0] == "panic") {
+			PANIC("This is what happens when you say 'panic'.");
 		} else if (tokens[0] == "ls") {
 			DirectoryNode* d = cwd;
 			if (tokens.size() == 2) {
diff --git a/Source/Kernel/Core/loader.wtf.asm b/Source/Kernel/Core/loader.wtf.asm
index 8d7b076..5a925d5 100644
--- a/Source/Kernel/Core/loader.wtf.asm
+++ b/Source/Kernel/Core/loader.wtf.asm
@@ -51,7 +51,7 @@ static_ctors_loop:			; construct global objects
  
    call  kmain                       ; call kernel proper
 
-   cli		; disable interuptions
+   cli		; disable interupts
 
 static_dtors_loop:					 ; useless, kernel should never return
    mov ebx, start_dtors				; destruct global objects
@@ -69,11 +69,11 @@ hang:
 
 [section .setup]	; this is included in the .setup section, so that it thinks it is at 0x00100000
 
-trickgdt:		; our false GDT
+trickgdt:		; our false GDT (this is equivalent to the gdt_ptr_t structure defined in GDT.ns.h)
    dw gdt_end - gdt - 1		; gdt limit
    dd gdt					; gdt base
 
-gdt:
+gdt:			; each of these is equivalent to one gdt_entry_t structure, defined in GDT.ns.h
    dd 0, 0					; null GDT entry
    db 0xFF, 0xFF, 0, 0, 0, 10011010b, 11001111b, 0x40	; kernel code segment
    db 0xFF, 0xFF, 0, 0, 0, 10010010b, 11001111b, 0x40	; kernel data segment
diff --git a/Source/Kernel/Library/String.class.cpp b/Source/Kernel/Library/String.class.cpp
index 6380b25..9d4083b 100644
--- a/Source/Kernel/Library/String.class.cpp
+++ b/Source/Kernel/Library/String.class.cpp
@@ -54,8 +54,8 @@ String::String() {
 	m_length = 0;
 }
 
-String::String(const char* string) {
-	m_length = WChar::utfLen(string);
+String::String(const char* string, u8int encoding) {
+	m_length = WChar::utfLen(string, encoding);
 	if (m_length == 0) {
 		m_string = 0;
 		return;
@@ -63,7 +63,7 @@ String::String(const char* string) {
 	m_string = new WChar[m_length + 1];
 	int i = 0, l = strlen(string), c = 0;
 	while (i < l) {
-		i += m_string[c].affectUtf8(string + i);
+		i += m_string[c].affect(string + i, encoding);
 		c++;
 	}
 	m_string[m_length] = 0;
@@ -86,7 +86,7 @@ String::~String() {
 	if (m_string != 0) delete [] m_string;
 }
 
-void String::operator= (const String &other) {
+void String::affect (const String &other) {
 	m_length = other.m_length;
 	if (m_string != 0) delete [] m_string;
 	if (m_length == 0) {
@@ -100,8 +100,8 @@ void String::operator= (const String &other) {
 	m_string[m_length] = 0;
 }
 
-void String::operator= (const char* string) {
-	m_length = WChar::utfLen(string);
+void String::affect (const char* string, u8int encoding) {
+	m_length = WChar::utfLen(string, encoding);
 	if (m_string != 0) delete [] m_string;
 	if (m_length == 0) {
 		m_string = 0;
@@ -110,13 +110,13 @@ void String::operator= (const char* string) {
 	m_string = new WChar[m_length + 1];
 	int i = 0, l = strlen(string), c = 0;
 	while (i < l) {
-		i += m_string[c].affectUtf8(string + i);
+		i += m_string[c].affect(string + i, encoding);
 		c++;
 	}
 	m_string[m_length] = 0;
 }
 
-bool String::operator== (const String &other) const {
+bool String::compare (const String &other) const {
 	if (m_length != other.m_length) return false;
 	for (u32int i = 0; i < m_length; i++) {
 		if (m_string[i] != other.m_string[i]) return false;
@@ -124,19 +124,19 @@ bool String::operator== (const String &other) const {
 	return true;
 }
 
-bool String::operator== (const char* string) const {
-	if (m_length != WChar::utfLen(string)) return false;
+bool String::compare (const char* string, u8int encoding) const {
+	if (m_length != WChar::utfLen(string, encoding)) return false;
 	int i = 0, l = strlen(string), c = 0;
 	WChar tmp;
 	while (i < l) {
-		i += tmp.affectUtf8(string + i);
+		i += tmp.affect(string + i, encoding);
 		if (m_string[c] != tmp) return false;
 		c++;
 	}
 	return true;
 }
 
-String& String::operator+= (const String &other) {
+String& String::append (const String &other) {
 	WChar* newdata = new WChar[m_length + other.m_length + 1];
 	for (u32int i = 0; i < m_length; i++) {
 		newdata[i] = m_string[i];
@@ -151,14 +151,14 @@ String& String::operator+= (const String &other) {
 	return *this;
 }
 
-String& String::operator+= (const char* other) {
-	WChar* newdata = new WChar[m_length + WChar::utfLen(other) + 1];
+String& String::append (const char* other, u8int encoding) {
+	WChar* newdata = new WChar[m_length + WChar::utfLen(other, encoding) + 1];
 	for (u32int i = 0; i < m_length; i++) {
 		newdata[i] = m_string[i];
 	}
 	int i = 0, l = strlen(other), c = 0;
 	while (i < l) {
-		i += newdata[c + m_length].affectUtf8(other + i);
+		i += newdata[c + m_length].affect(other + i, encoding);
 		c++;
 	}
 	if (m_string != 0) delete [] m_string;
@@ -168,7 +168,7 @@ String& String::operator+= (const char* other) {
 	return *this;
 }
 
-String& String::operator+= (WChar other) {
+String& String::append (WChar other) {
 	WChar* newdata = new WChar[m_length + 2];
 	for (u32int i = 0; i < m_length; i++) {
 		newdata[i] = m_string[i];
@@ -181,17 +181,17 @@ String& String::operator+= (WChar other) {
 	return *this;
 }
 
-String& String::operator+ (const String &other) const {	//Can be optimized
+String String::concat (const String &other) const {	//Can be optimized
 	String ret(*this);
 	return (ret += other);
 }
 
-String& String::operator+ (const char* other) const { //Can be optimized
+String String::concat (const char* other, u8int encoding) const { //Can be optimized
 	String ret(*this);
-	return (ret += other);
+	return (ret.append(other, encoding));
 }
 
-String& String::operator+ (WChar other) const {
+String String::concat (WChar other) const {
 	String ret(*this);
 	return (ret += other);
 }
diff --git a/Source/Kernel/Library/String.class.h b/Source/Kernel/Library/String.class.h
index 01cc6a8..6a9de64 100644
--- a/Source/Kernel/Library/String.class.h
+++ b/Source/Kernel/Library/String.class.h
@@ -15,26 +15,40 @@ class String {
 	static String hex(u32int number);
 	static String number(s32int number);
 
-	String(const char* string);
+	String(const char* string, u8int encoding = UE_UTF8);
 	String();
 	String(const String &other);
 	~String();
 
-	void operator= (const String &other);
-	void operator= (const char* string);
-
-	bool operator== (const String &other) const;
-	bool operator== (const char* string) const;
-	bool operator!= (const String &other) { return !(operator== (other)); }
-	bool operator!= (const char* other) { return !(operator== (other)); }
-	String &operator+= (const String &other);
-	String &operator+= (const char* other);
-	String &operator+= (WChar other);
-	String &operator+ (const String &other) const;
-	String &operator+ (const char* other) const;
-	String &operator+ (WChar other) const;
-	s32int toInt() const;
-	u32int toInt16() const;	//From HEX
+	void affect(const String &other);
+	void affect(const char* string, u8int encoding = UE_UTF8);
+	void operator= (const String &other) { affect(other); }
+	void operator= (const char* other) { affect(other); }
+
+	bool compare(const String &other) const;
+	bool compare(const char* string, u8int encoding = UE_UTF8) const;
+	bool operator== (const String &other) const { return compare(other); }
+	bool operator== (const char* other) const { return compare(other); }
+	bool operator!= (const String &other) { return !compare(other); }
+	bool operator!= (const char* other) { return !compare(other); }
+
+	String& append(const String &other);
+	String& append(const char* other, u8int encoding = UE_UTF8);
+	String& append(WChar other);
+	String &operator+= (const String &other) { return append(other); }
+	String &operator+= (const char* other) { return append(other); }
+	String &operator+= (WChar other) { return append(other); }
+
+	String concat(const String &other) const;
+	String concat(const char* other, u8int encoding = UE_UTF8) const;
+	String concat(WChar other) const;
+	String operator+ (const String &other) const { return concat(other); }
+	String operator+ (const char* other) const { return concat(other); }
+	String operator+ (WChar other) const { return concat(other); }
+
+	s32int toInt() const; 	//Convert from DEC
+	u32int toInt16() const;	//Convert from HEX
+
 	WChar& operator[] (int index) const;
 
 	u32int size() const;
diff --git a/Source/Kernel/Library/WChar.class.cpp b/Source/Kernel/Library/WChar.class.cpp
index d7f01de..aad46c3 100644
--- a/Source/Kernel/Library/WChar.class.cpp
+++ b/Source/Kernel/Library/WChar.class.cpp
@@ -19,19 +19,28 @@ WChar::WChar(char c) {
 	affectAscii(c);
 }
 
-WChar::WChar(const char* c, u8int encoding) {	//TODO : take encoding into account
-	affectUtf8(c);
+WChar::WChar(const char* c, u8int encoding) {
+	if (encoding == UE_UTF8) 	affectUtf8(c);
+	if (encoding == UE_UTF16)	affectUtf16(c);
+	if (encoding == UE_UTF32)	affectUtf32(c);
 }
 
 u32int WChar::utfLen(const char* c, u8int encoding) {
 	int i = 0, l = CMem::strlen(c), co = 0;
 	while (i < l) {
-		if ((c[i] & 0x80) == 0) i += 1;
-		else if ((c[i] & 0xE0) == 0xC0) i += 2;
-		else if ((c[i] & 0xF0) == 0xE0) i += 3;
-		else if ((c[i] & 0xF8) == 0xF0) i += 4;
-		else i += 1;
-		co++;
+		if (encoding == UE_UTF8) {
+			if ((c[i] & 0x80) == 0) i += 1;
+			else if ((c[i] & 0xE0) == 0xC0) i += 2;
+			else if ((c[i] & 0xF0) == 0xE0) i += 3;
+			else if ((c[i] & 0xF8) == 0xF0) i += 4;
+			else i += 1;
+			co++;
+		} else if (encoding == UE_UTF16) {
+			if ((c[i] & 0xFC) == 0xD8 and (c[i + 2] & 0xFC) == 0xDC) i += 4;
+			else i += 2;
+		} else if (encoding == UE_UTF32) {
+			i += 4;
+		}
 	}
 	return co;
 }
@@ -67,6 +76,31 @@ u32int WChar::affectUtf8(const char* c) {	//Returns the number of bytes for the
 	return 1;
 }
 
+u32int WChar::affectUtf16(const char* c) {
+	if ((c[0] & 0xFC) == 0xD8 and		// 11111100b, 11011000b
+		(c[2] & 0xFC) == 0xDC) {		// 11111100b, 11011100b
+		u32int w = ((c[0] & 0x03) << 2) | ((c[1] & 0xC0) >> 6);
+		u32int x = (c[1] & 0x3F);
+		u32int y = ((c[2] & 0x03) << 8) | (c[2]);
+		value = ((w + 1) << 16) | (x << 10) | y;
+		if (value >= 0xD800 and value <= 0xDFFF) value = 0;	//These values are unallowed
+		if (value >= 0xFFFE and value <= 0xFFFF) value = 0;	
+		return 4;
+	} else {
+		value = (c[0] << 8) | (c[1]);
+		if (value >= 0xD800 and value <= 0xDFFF) value = 0;	//These values are unallowed
+		if (value >= 0xFFFE and value <= 0xFFFF) value = 0;	
+		return 2;
+	}
+}
+
+u32int WChar::affectUtf32(const char* c) {
+	value = (c[0] << 24) | (c[1] << 16) | (c[2] << 8) | c[3];
+	if (value >= 0xD800 and value <= 0xDFFF) value = 0;	//These values are unallowed
+	if (value >= 0xFFFE and value <= 0xFFFF) value = 0;	
+	return 4;
+}
+
 u8int WChar::toAscii() {
 	if (value < 128) return (char)value;
 	for (int i = 0; i < 128; i++) {
@@ -74,3 +108,35 @@ u8int WChar::toAscii() {
 	}
 	return '?';
 }
+
+uchar_repr_t WChar::toUtf8() {
+	uchar_repr_t r;
+	r.i = 0;
+	if (value < 128) {
+		r.c[0] = value;
+	} else if (value < 4096) {
+		r.c[0] = 0xC0 | ((value & 0x07C0) >> 6);
+		r.c[1] = 0x80 | (value & 0x3F);
+	} else if (value < 65536) {
+		r.c[0] = 0xE0 | ((value & 0xF000) >> 12);
+		r.c[1] = 0x80 | ((value & 0x0FC0) >> 6);
+		r.c[2] = 0x80 | (value & 0x003F);
+	} else {
+		r.c[0] = 0xF0 | ((value & 0x1C0000) >> 18);
+		r.c[1] = 0x80 | ((value & 0x3F000) >> 12);
+		r.c[2] = 0x80 | ((value & 0x0FC0) >> 6);
+		r.c[3] = 0x80 | (value & 0x003F);
+	}
+	return r;
+}
+
+//TODO : code WChar::toUtf16
+
+uchar_repr_t WChar::toUtf32() {
+	uchar_repr_t r;
+	r.c[0] = (value >> 24) & 0xFF;
+	r.c[1] = (value >> 16) & 0xFF;
+	r.c[2] = (value >> 8) & 0xFF;
+	r.c[3] = value & 0xFF;
+	return r;
+}
diff --git a/Source/Kernel/Library/WChar.class.h b/Source/Kernel/Library/WChar.class.h
index fc00577..63f1ea3 100644
--- a/Source/Kernel/Library/WChar.class.h
+++ b/Source/Kernel/Library/WChar.class.h
@@ -26,11 +26,23 @@ struct WChar {
 
 	void affectAscii(char c);
 	u32int affectUtf8(const char* c);
-	void affectUtf16(const char* c);
-	void affectUtf32(const char* c);
+	u32int affectUtf16(const char* c);
+	u32int affectUtf32(const char* c);
+
+	u32int affect(const char* c, u8int encoding = UE_UTF8) {
+		if (encoding == UE_UTF8) return affectUtf8(c);
+		if (encoding == UE_UTF16) return affectUtf16(c);
+		if (encoding == UE_UTF32) return affectUtf32(c);
+		affectAscii(c[0]);	//Default case :/
+		return 1;
+	}
 
 	u8int toAscii();
 
+	uchar_repr_t toUtf8();
+	uchar_repr_t toUtf16();
+	uchar_repr_t toUtf32();
+
 	inline WChar operator+ (u32int other) {
 		WChar r;
 		r.value = value + other;
diff --git a/Source/Kernel/Melon.ke b/Source/Kernel/Melon.ke
index ca3160e..44d5f3c 100755
Binary files a/Source/Kernel/Melon.ke and b/Source/Kernel/Melon.ke differ
diff --git a/Source/Kernel/TaskManager/Thread.class.cpp b/Source/Kernel/TaskManager/Thread.class.cpp
index 2d167df..de92f0c 100644
--- a/Source/Kernel/TaskManager/Thread.class.cpp
+++ b/Source/Kernel/TaskManager/Thread.class.cpp
@@ -18,7 +18,7 @@ Thread::Thread(u32int (*entry_point)(), bool iskernel) {
 		u32int tmp;
 		m_kernelStackFrame = (u32int)PageAlloc::alloc(&tmp);
 		m_process = Task::getKernelProcess();
-		setup(entry_point, m_kernelStackFrame + STACKSIZE);
+		setup(entry_point, m_kernelStackFrame + 0x1000);	//A kernel stack always is 1 frame, meaning 0x1000 bytes
 	} else {
 		m_isKernel = false;
 		m_process = Task::currentProcess;
-- 
cgit v1.2.3