From 0f536540de15202d44cd9b1d708ee04edccb8c66 Mon Sep 17 00:00:00 2001
From: Alexis211 <alexis211@gmail.com>
Date: Tue, 24 Nov 2009 13:00:34 +0100
Subject: Multiple things :

- Partially fixed FATFS::read
- Corrected some unicode encoding/decoding related stuff
- Implemented LFN entries parsing
---
 Source/Kernel/FileSystems/FAT/FATFS.class.cpp | 36 +++++++++++-----
 Source/Library/Common/ByteArray.class.cpp     |  4 +-
 Source/Library/Common/String.class.cpp        |  4 +-
 Source/Library/Common/WChar.class.cpp         | 59 ++++++++++++++++++++++-----
 Source/Library/Common/WChar.class.h           | 30 +++++++++-----
 5 files changed, 98 insertions(+), 35 deletions(-)

(limited to 'Source')

diff --git a/Source/Kernel/FileSystems/FAT/FATFS.class.cpp b/Source/Kernel/FileSystems/FAT/FATFS.class.cpp
index 00107e5..67b4013 100644
--- a/Source/Kernel/FileSystems/FAT/FATFS.class.cpp
+++ b/Source/Kernel/FileSystems/FAT/FATFS.class.cpp
@@ -3,6 +3,7 @@
 #include "FATFileNode.class.h"
 #include "FATDirectoryNode.class.h"
 #include <VFS/VFS.ns.h>
+#include <ByteArray.class.h>
 
 #define FIRSTCLUS(node) ((u32int&)(node->type() == NT_DIRECTORY ? \
 			((FATDirectoryNode*)(node))->m_firstCluster : \
@@ -125,7 +126,7 @@ bool FATFS::setParent(FSNode* node, FSNode* parent) {
 u32int FATFS::read(FileNode* file, u64int position, u32int max_length, u8int *data) {
 	u32int len = max_length;
 	if (position >= file->getLength()) return 0;
-	if (position + len > file->getLength()) len = len - position;
+	if (position + len > file->getLength()) len = file->getLength() - position;
 	u32int firstCluster = position / m_clusterSize, clusterOffset = position % m_clusterSize;
 	u32int clusters = (len + clusterOffset) / m_clusterSize + 1, lastClusBytesToRead = (len + clusterOffset) % m_clusterSize;
 	u32int clust = FIRSTCLUS(file);
@@ -174,21 +175,34 @@ bool FATFS::loadContents(DirectoryNode* dir) {
 		e.c = (u8int*)Mem::alloc(m_clusterSize);
 	}
 
+	ByteArray lfnBuffer;
 	while (cluster != 0) {
 		if (cluster != 2 or m_fatType == 32) readCluster(cluster, e.c);
 		for (u32int i = 0; i < entries; i++) {
-			if (e.e[i].name[0] == 0 or e.e[i].name[0] == 0xE5) continue;	//Nothing intresting here.
-			if (e.e[i].attributes == FA_LFN) continue;	//Long file name entry, nothing intresting
+			if (e.e[i].attributes == FA_LFN && e.c[i*32] != 0xE5) {	//Long file name entry
+				u8int num = e.c[i*32] & 0x3;
+				if (lfnBuffer.size() < num * 26) lfnBuffer.resize(num * 26);
+				num--;
+				memcpy(lfnBuffer + (num * 26), e.c + (i*32 + 1), 10);
+				memcpy(lfnBuffer + (num * 26 + 10), e.c + (i*32 + 14), 12);
+				memcpy(lfnBuffer + (num * 26 + 22), e.c + (i*32 + 28), 4);
+			}
 			if (e.e[i].attributes & FA_VOLUMEID) continue;
+			if (e.e[i].name[0] == 0  or e.e[i].name[0] == 0xE5) continue;	//Nothing intresting here.
 			String name;
-			for (int j = 0; j < 8; j++) {
-				if (e.e[i].name[j] == ' ') break;
-				name += WChar(e.e[i].name[j]);
-			}
-			for (int j = 0; j < 3; j++) {
-				if (e.e[i].extension[j] == ' ') break;
-				if (j == 0) name += ".";
-				name += WChar(e.e[i].extension[j]);
+			if (lfnBuffer.empty()) {
+				for (int j = 0; j < 8; j++) {
+					if (e.e[i].name[j] == ' ') break;
+					name += WChar(e.e[i].name[j]);
+				}
+				for (int j = 0; j < 3; j++) {
+					if (e.e[i].extension[j] == ' ') break;
+					if (j == 0) name += ".";
+					name += WChar(e.e[i].extension[j]);
+				}
+			} else {
+				name = lfnBuffer.toString(UE_UTF16_LE);
+				lfnBuffer.clear();
 			}
 			u32int first_clus = (e.e[i].first_clust_high << 16) + e.e[i].first_clust_low;
 			FSNode* n;
diff --git a/Source/Library/Common/ByteArray.class.cpp b/Source/Library/Common/ByteArray.class.cpp
index 2a42702..95326f7 100644
--- a/Source/Library/Common/ByteArray.class.cpp
+++ b/Source/Library/Common/ByteArray.class.cpp
@@ -50,9 +50,9 @@ void ByteArray::resize(u32int size) {
 }
 
 String ByteArray::toString (u8int encoding) {
-	char* c = new char[m_length + 1];
+	char* c = new char[m_length + 4];
 	memcpy((u8int*)c, m_string, m_length);
-	c[m_length] = 0;	//Add NULL terminator
+	for (int i = 0; i < 4; i++) c[m_length + i] = 0;	//Add NULL terminator
 	String r(c, encoding);
 	delete c;
 	return r;
diff --git a/Source/Library/Common/String.class.cpp b/Source/Library/Common/String.class.cpp
index 63ff837..a824eac 100644
--- a/Source/Library/Common/String.class.cpp
+++ b/Source/Library/Common/String.class.cpp
@@ -84,8 +84,8 @@ void String::affect (const char* string, u8int encoding) {
 		return;
 	}
 	m_string = new WChar[m_length + 1];
-	int i = 0, l = strlen(string), c = 0;
-	while (i < l) {
+	u32int i = 0, c = 0;
+	while (c < m_length) {
 		i += m_string[c].affect(string + i, encoding);
 		c++;
 	}
diff --git a/Source/Library/Common/WChar.class.cpp b/Source/Library/Common/WChar.class.cpp
index f5bd5bc..312a5db 100644
--- a/Source/Library/Common/WChar.class.cpp
+++ b/Source/Library/Common/WChar.class.cpp
@@ -29,8 +29,10 @@ WChar::WChar(char c) {
 
 WChar::WChar(const char* c, u8int encoding) {
 	if (encoding == UE_UTF8) 	affectUtf8(c);
-	if (encoding == UE_UTF16)	affectUtf16(c);
-	if (encoding == UE_UTF32)	affectUtf32(c);
+	if (encoding == UE_UTF16_LE)	affectUtf16le(c);
+	if (encoding == UE_UTF16_BE)	affectUtf16be(c);
+	if (encoding == UE_UTF32_LE)	affectUtf32le(c);
+	if (encoding == UE_UTF32_BE)	affectUtf32be(c);
 }
 
 u32int WChar::ucharLen(const char* c, u8int encoding) {
@@ -40,18 +42,21 @@ u32int WChar::ucharLen(const char* c, u8int encoding) {
 		else if ((c[0] & 0xF0) == 0xE0) return 3;
 		else if ((c[0] & 0xF8) == 0xF0) return 4;
 		else return 1;
-	} else if (encoding == UE_UTF16) {
+	} else if (encoding == UE_UTF16_BE) {
 		if ((c[0] & 0xFC) == 0xD8 and (c[2] & 0xFC) == 0xDC) return 4;
 		else return 2;
-	} else if (encoding == UE_UTF32) {
+	} else if (encoding == UE_UTF16_LE) {
+		if ((c[1] & 0xFC) == 0xD8 and (c[3] & 0xFC) == 0xDC) return 4;
+		else return 2;
+	} else if (encoding == UE_UTF32_LE or encoding == UE_UTF16_BE) {
 		return 4;
 	}
 	return 1;
 }
 
 u32int WChar::utfLen(const char* c, u8int encoding) {
-	int i = 0, l = strlen(c), co = 0;
-	while (i < l) {
+	int i = 0, co = 0;
+	while (WChar(c + i, encoding) != 0) {
 		i += ucharLen(c + i, encoding);
 		co++;
 	}
@@ -90,7 +95,7 @@ u32int WChar::affectUtf8(const char* c) {	//Returns the number of bytes for the
 	return 1;
 }
 
-u32int WChar::affectUtf16(const char* c) {
+u32int WChar::affectUtf16be(const char* c) {
 	if ((c[0] & 0xFC) == 0xD8 and		// 11111100b, 11011000b
 		(c[2] & 0xFC) == 0xDC) {		// 11111100b, 11011100b
 		u32int w = ((c[0] & 0x03) << 2) | ((c[1] & 0xC0) >> 6);
@@ -108,13 +113,38 @@ u32int WChar::affectUtf16(const char* c) {
 	}
 }
 
-u32int WChar::affectUtf32(const char* c) {
+u32int WChar::affectUtf16le(const char* c) {
+	if ((c[1] & 0xFC) == 0xD8 and		// 11111100b, 11011000b
+		(c[3] & 0xFC) == 0xDC) {		// 11111100b, 11011100b
+		u32int w = ((c[1] & 0x03) << 2) | ((c[0] & 0xC0) >> 6);
+		u32int x = (c[0] & 0x3F);
+		u32int y = ((c[3] & 0x03) << 8) | (c[3]);
+		value = ((w + 1) << 16) | (x << 10) | y;
+		if (value >= 0xD800 and value <= 0xDFFF) value = 0;	//These values are unallowed
+		if (value >= 0xFFFE and value <= 0xFFFF) value = 0;	
+		return 4;
+	} else {
+		value = (c[1] << 8) | (c[0]);
+		if (value >= 0xD800 and value <= 0xDFFF) value = 0;	//These values are unallowed
+		if (value >= 0xFFFE and value <= 0xFFFF) value = 0;	
+		return 2;
+	}
+}
+
+u32int WChar::affectUtf32be(const char* c) {
 	value = (c[0] << 24) | (c[1] << 16) | (c[2] << 8) | c[3];
 	if (value >= 0xD800 and value <= 0xDFFF) value = 0;	//These values are unallowed
 	if (value >= 0xFFFE and value <= 0xFFFF) value = 0;	
 	return 4;
 }
 
+u32int WChar::affectUtf32le(const char* c) {
+	value = (c[3] << 24) | (c[2] << 16) | (c[1] << 8) | c[0];
+	if (value >= 0xD800 and value <= 0xDFFF) value = 0;	//These values are unallowed
+	if (value >= 0xFFFE and value <= 0xFFFF) value = 0;	
+	return 4;
+}
+
 u8int WChar::toAscii() {
 	if (value < 128) return (char)value;
 	for (int i = 0; i < 128; i++) {
@@ -144,9 +174,9 @@ uchar_repr_t WChar::toUtf8() {
 	return r;
 }
 
-//TODO : code WChar::toUtf16
+//TODO : code WChar::toUtf16(be|le)
 
-uchar_repr_t WChar::toUtf32() {
+uchar_repr_t WChar::toUtf32be() {
 	uchar_repr_t r;
 	r.c[0] = (value >> 24) & 0xFF;
 	r.c[1] = (value >> 16) & 0xFF;
@@ -154,3 +184,12 @@ uchar_repr_t WChar::toUtf32() {
 	r.c[3] = value & 0xFF;
 	return r;
 }
+
+uchar_repr_t WChar::toUtf32le() {
+	uchar_repr_t r;
+	r.c[3] = (value >> 24) & 0xFF;
+	r.c[2] = (value >> 16) & 0xFF;
+	r.c[1] = (value >> 8) & 0xFF;
+	r.c[0] = value & 0xFF;
+	return r;
+}
diff --git a/Source/Library/Common/WChar.class.h b/Source/Library/Common/WChar.class.h
index 5d6d26b..afaeb44 100644
--- a/Source/Library/Common/WChar.class.h
+++ b/Source/Library/Common/WChar.class.h
@@ -9,8 +9,10 @@
 
 enum {
 	UE_UTF8,
-	UE_UTF16,
-	UE_UTF32,
+	UE_UTF16_LE,
+	UE_UTF16_BE,
+	UE_UTF32_LE,
+	UE_UTF32_BE,
 };
 
 union uchar_repr_t {
@@ -31,13 +33,17 @@ struct WChar {
 
 	void affectAscii(char c);
 	u32int affectUtf8(const char* c);
-	u32int affectUtf16(const char* c);
-	u32int affectUtf32(const char* c);
+	u32int affectUtf16le(const char* c);
+	u32int affectUtf16be(const char* c);
+	u32int affectUtf32le(const char* c);
+	u32int affectUtf32be(const char* c);
 
 	u32int affect(const char* c, u8int encoding = UE_UTF8) {
 		if (encoding == UE_UTF8) return affectUtf8(c);
-		if (encoding == UE_UTF16) return affectUtf16(c);
-		if (encoding == UE_UTF32) return affectUtf32(c);
+		if (encoding == UE_UTF16_LE) return affectUtf16le(c);
+		if (encoding == UE_UTF16_BE) return affectUtf16be(c);
+		if (encoding == UE_UTF32_LE) return affectUtf32le(c);
+		if (encoding == UE_UTF32_BE) return affectUtf32be(c);
 		affectAscii(c[0]);	//Default case :/
 		return 1;
 	}
@@ -45,13 +51,17 @@ struct WChar {
 	u8int toAscii();
 
 	uchar_repr_t toUtf8();
-	uchar_repr_t toUtf16();
-	uchar_repr_t toUtf32();
+	uchar_repr_t toUtf16le();
+	uchar_repr_t toUtf16be();
+	uchar_repr_t toUtf32le();
+	uchar_repr_t toUtf32be();
 
 	uchar_repr_t encode(u8int encoding = UE_UTF8) {
 		if (encoding == UE_UTF8) return toUtf8();
-		//if (encoding == UE_UTF16) return toUtf16();
-		if (encoding == UE_UTF32) return toUtf32();
+		//if (encoding == UE_UTF16_LE) return toUtf16le();
+		//if (encoding == UE_UTF16_BE) return toUtf16be();
+		if (encoding == UE_UTF32_LE) return toUtf32le();
+		if (encoding == UE_UTF32_BE) return toUtf32be();
 		uchar_repr_t x;
 		x.c[0] = toAscii();
 		return x;
-- 
cgit v1.2.3