From 0f536540de15202d44cd9b1d708ee04edccb8c66 Mon Sep 17 00:00:00 2001 From: Alexis211 Date: Tue, 24 Nov 2009 13:00:34 +0100 Subject: Multiple things : - Partially fixed FATFS::read - Corrected some unicode encoding/decoding related stuff - Implemented LFN entries parsing --- Source/Kernel/FileSystems/FAT/FATFS.class.cpp | 36 +++++++++++----- Source/Library/Common/ByteArray.class.cpp | 4 +- Source/Library/Common/String.class.cpp | 4 +- Source/Library/Common/WChar.class.cpp | 59 ++++++++++++++++++++++----- Source/Library/Common/WChar.class.h | 30 +++++++++----- 5 files changed, 98 insertions(+), 35 deletions(-) (limited to 'Source') diff --git a/Source/Kernel/FileSystems/FAT/FATFS.class.cpp b/Source/Kernel/FileSystems/FAT/FATFS.class.cpp index 00107e5..67b4013 100644 --- a/Source/Kernel/FileSystems/FAT/FATFS.class.cpp +++ b/Source/Kernel/FileSystems/FAT/FATFS.class.cpp @@ -3,6 +3,7 @@ #include "FATFileNode.class.h" #include "FATDirectoryNode.class.h" #include +#include #define FIRSTCLUS(node) ((u32int&)(node->type() == NT_DIRECTORY ? \ ((FATDirectoryNode*)(node))->m_firstCluster : \ @@ -125,7 +126,7 @@ bool FATFS::setParent(FSNode* node, FSNode* parent) { u32int FATFS::read(FileNode* file, u64int position, u32int max_length, u8int *data) { u32int len = max_length; if (position >= file->getLength()) return 0; - if (position + len > file->getLength()) len = len - position; + if (position + len > file->getLength()) len = file->getLength() - position; u32int firstCluster = position / m_clusterSize, clusterOffset = position % m_clusterSize; u32int clusters = (len + clusterOffset) / m_clusterSize + 1, lastClusBytesToRead = (len + clusterOffset) % m_clusterSize; u32int clust = FIRSTCLUS(file); @@ -174,21 +175,34 @@ bool FATFS::loadContents(DirectoryNode* dir) { e.c = (u8int*)Mem::alloc(m_clusterSize); } + ByteArray lfnBuffer; while (cluster != 0) { if (cluster != 2 or m_fatType == 32) readCluster(cluster, e.c); for (u32int i = 0; i < entries; i++) { - if (e.e[i].name[0] == 0 or e.e[i].name[0] == 0xE5) continue; //Nothing intresting here. - if (e.e[i].attributes == FA_LFN) continue; //Long file name entry, nothing intresting + if (e.e[i].attributes == FA_LFN && e.c[i*32] != 0xE5) { //Long file name entry + u8int num = e.c[i*32] & 0x3; + if (lfnBuffer.size() < num * 26) lfnBuffer.resize(num * 26); + num--; + memcpy(lfnBuffer + (num * 26), e.c + (i*32 + 1), 10); + memcpy(lfnBuffer + (num * 26 + 10), e.c + (i*32 + 14), 12); + memcpy(lfnBuffer + (num * 26 + 22), e.c + (i*32 + 28), 4); + } if (e.e[i].attributes & FA_VOLUMEID) continue; + if (e.e[i].name[0] == 0 or e.e[i].name[0] == 0xE5) continue; //Nothing intresting here. String name; - for (int j = 0; j < 8; j++) { - if (e.e[i].name[j] == ' ') break; - name += WChar(e.e[i].name[j]); - } - for (int j = 0; j < 3; j++) { - if (e.e[i].extension[j] == ' ') break; - if (j == 0) name += "."; - name += WChar(e.e[i].extension[j]); + if (lfnBuffer.empty()) { + for (int j = 0; j < 8; j++) { + if (e.e[i].name[j] == ' ') break; + name += WChar(e.e[i].name[j]); + } + for (int j = 0; j < 3; j++) { + if (e.e[i].extension[j] == ' ') break; + if (j == 0) name += "."; + name += WChar(e.e[i].extension[j]); + } + } else { + name = lfnBuffer.toString(UE_UTF16_LE); + lfnBuffer.clear(); } u32int first_clus = (e.e[i].first_clust_high << 16) + e.e[i].first_clust_low; FSNode* n; diff --git a/Source/Library/Common/ByteArray.class.cpp b/Source/Library/Common/ByteArray.class.cpp index 2a42702..95326f7 100644 --- a/Source/Library/Common/ByteArray.class.cpp +++ b/Source/Library/Common/ByteArray.class.cpp @@ -50,9 +50,9 @@ void ByteArray::resize(u32int size) { } String ByteArray::toString (u8int encoding) { - char* c = new char[m_length + 1]; + char* c = new char[m_length + 4]; memcpy((u8int*)c, m_string, m_length); - c[m_length] = 0; //Add NULL terminator + for (int i = 0; i < 4; i++) c[m_length + i] = 0; //Add NULL terminator String r(c, encoding); delete c; return r; diff --git a/Source/Library/Common/String.class.cpp b/Source/Library/Common/String.class.cpp index 63ff837..a824eac 100644 --- a/Source/Library/Common/String.class.cpp +++ b/Source/Library/Common/String.class.cpp @@ -84,8 +84,8 @@ void String::affect (const char* string, u8int encoding) { return; } m_string = new WChar[m_length + 1]; - int i = 0, l = strlen(string), c = 0; - while (i < l) { + u32int i = 0, c = 0; + while (c < m_length) { i += m_string[c].affect(string + i, encoding); c++; } diff --git a/Source/Library/Common/WChar.class.cpp b/Source/Library/Common/WChar.class.cpp index f5bd5bc..312a5db 100644 --- a/Source/Library/Common/WChar.class.cpp +++ b/Source/Library/Common/WChar.class.cpp @@ -29,8 +29,10 @@ WChar::WChar(char c) { WChar::WChar(const char* c, u8int encoding) { if (encoding == UE_UTF8) affectUtf8(c); - if (encoding == UE_UTF16) affectUtf16(c); - if (encoding == UE_UTF32) affectUtf32(c); + if (encoding == UE_UTF16_LE) affectUtf16le(c); + if (encoding == UE_UTF16_BE) affectUtf16be(c); + if (encoding == UE_UTF32_LE) affectUtf32le(c); + if (encoding == UE_UTF32_BE) affectUtf32be(c); } u32int WChar::ucharLen(const char* c, u8int encoding) { @@ -40,18 +42,21 @@ u32int WChar::ucharLen(const char* c, u8int encoding) { else if ((c[0] & 0xF0) == 0xE0) return 3; else if ((c[0] & 0xF8) == 0xF0) return 4; else return 1; - } else if (encoding == UE_UTF16) { + } else if (encoding == UE_UTF16_BE) { if ((c[0] & 0xFC) == 0xD8 and (c[2] & 0xFC) == 0xDC) return 4; else return 2; - } else if (encoding == UE_UTF32) { + } else if (encoding == UE_UTF16_LE) { + if ((c[1] & 0xFC) == 0xD8 and (c[3] & 0xFC) == 0xDC) return 4; + else return 2; + } else if (encoding == UE_UTF32_LE or encoding == UE_UTF16_BE) { return 4; } return 1; } u32int WChar::utfLen(const char* c, u8int encoding) { - int i = 0, l = strlen(c), co = 0; - while (i < l) { + int i = 0, co = 0; + while (WChar(c + i, encoding) != 0) { i += ucharLen(c + i, encoding); co++; } @@ -90,7 +95,7 @@ u32int WChar::affectUtf8(const char* c) { //Returns the number of bytes for the return 1; } -u32int WChar::affectUtf16(const char* c) { +u32int WChar::affectUtf16be(const char* c) { if ((c[0] & 0xFC) == 0xD8 and // 11111100b, 11011000b (c[2] & 0xFC) == 0xDC) { // 11111100b, 11011100b u32int w = ((c[0] & 0x03) << 2) | ((c[1] & 0xC0) >> 6); @@ -108,13 +113,38 @@ u32int WChar::affectUtf16(const char* c) { } } -u32int WChar::affectUtf32(const char* c) { +u32int WChar::affectUtf16le(const char* c) { + if ((c[1] & 0xFC) == 0xD8 and // 11111100b, 11011000b + (c[3] & 0xFC) == 0xDC) { // 11111100b, 11011100b + u32int w = ((c[1] & 0x03) << 2) | ((c[0] & 0xC0) >> 6); + u32int x = (c[0] & 0x3F); + u32int y = ((c[3] & 0x03) << 8) | (c[3]); + value = ((w + 1) << 16) | (x << 10) | y; + if (value >= 0xD800 and value <= 0xDFFF) value = 0; //These values are unallowed + if (value >= 0xFFFE and value <= 0xFFFF) value = 0; + return 4; + } else { + value = (c[1] << 8) | (c[0]); + if (value >= 0xD800 and value <= 0xDFFF) value = 0; //These values are unallowed + if (value >= 0xFFFE and value <= 0xFFFF) value = 0; + return 2; + } +} + +u32int WChar::affectUtf32be(const char* c) { value = (c[0] << 24) | (c[1] << 16) | (c[2] << 8) | c[3]; if (value >= 0xD800 and value <= 0xDFFF) value = 0; //These values are unallowed if (value >= 0xFFFE and value <= 0xFFFF) value = 0; return 4; } +u32int WChar::affectUtf32le(const char* c) { + value = (c[3] << 24) | (c[2] << 16) | (c[1] << 8) | c[0]; + if (value >= 0xD800 and value <= 0xDFFF) value = 0; //These values are unallowed + if (value >= 0xFFFE and value <= 0xFFFF) value = 0; + return 4; +} + u8int WChar::toAscii() { if (value < 128) return (char)value; for (int i = 0; i < 128; i++) { @@ -144,9 +174,9 @@ uchar_repr_t WChar::toUtf8() { return r; } -//TODO : code WChar::toUtf16 +//TODO : code WChar::toUtf16(be|le) -uchar_repr_t WChar::toUtf32() { +uchar_repr_t WChar::toUtf32be() { uchar_repr_t r; r.c[0] = (value >> 24) & 0xFF; r.c[1] = (value >> 16) & 0xFF; @@ -154,3 +184,12 @@ uchar_repr_t WChar::toUtf32() { r.c[3] = value & 0xFF; return r; } + +uchar_repr_t WChar::toUtf32le() { + uchar_repr_t r; + r.c[3] = (value >> 24) & 0xFF; + r.c[2] = (value >> 16) & 0xFF; + r.c[1] = (value >> 8) & 0xFF; + r.c[0] = value & 0xFF; + return r; +} diff --git a/Source/Library/Common/WChar.class.h b/Source/Library/Common/WChar.class.h index 5d6d26b..afaeb44 100644 --- a/Source/Library/Common/WChar.class.h +++ b/Source/Library/Common/WChar.class.h @@ -9,8 +9,10 @@ enum { UE_UTF8, - UE_UTF16, - UE_UTF32, + UE_UTF16_LE, + UE_UTF16_BE, + UE_UTF32_LE, + UE_UTF32_BE, }; union uchar_repr_t { @@ -31,13 +33,17 @@ struct WChar { void affectAscii(char c); u32int affectUtf8(const char* c); - u32int affectUtf16(const char* c); - u32int affectUtf32(const char* c); + u32int affectUtf16le(const char* c); + u32int affectUtf16be(const char* c); + u32int affectUtf32le(const char* c); + u32int affectUtf32be(const char* c); u32int affect(const char* c, u8int encoding = UE_UTF8) { if (encoding == UE_UTF8) return affectUtf8(c); - if (encoding == UE_UTF16) return affectUtf16(c); - if (encoding == UE_UTF32) return affectUtf32(c); + if (encoding == UE_UTF16_LE) return affectUtf16le(c); + if (encoding == UE_UTF16_BE) return affectUtf16be(c); + if (encoding == UE_UTF32_LE) return affectUtf32le(c); + if (encoding == UE_UTF32_BE) return affectUtf32be(c); affectAscii(c[0]); //Default case :/ return 1; } @@ -45,13 +51,17 @@ struct WChar { u8int toAscii(); uchar_repr_t toUtf8(); - uchar_repr_t toUtf16(); - uchar_repr_t toUtf32(); + uchar_repr_t toUtf16le(); + uchar_repr_t toUtf16be(); + uchar_repr_t toUtf32le(); + uchar_repr_t toUtf32be(); uchar_repr_t encode(u8int encoding = UE_UTF8) { if (encoding == UE_UTF8) return toUtf8(); - //if (encoding == UE_UTF16) return toUtf16(); - if (encoding == UE_UTF32) return toUtf32(); + //if (encoding == UE_UTF16_LE) return toUtf16le(); + //if (encoding == UE_UTF16_BE) return toUtf16be(); + if (encoding == UE_UTF32_LE) return toUtf32le(); + if (encoding == UE_UTF32_BE) return toUtf32be(); uchar_repr_t x; x.c[0] = toAscii(); return x; -- cgit v1.2.3