summaryrefslogtreecommitdiff
path: root/Source/Library/Common
diff options
context:
space:
mode:
Diffstat (limited to 'Source/Library/Common')
-rw-r--r--Source/Library/Common/BasicString.class.cpp4
-rw-r--r--Source/Library/Common/BasicString.class.h2
-rw-r--r--Source/Library/Common/ByteArray.class.cpp4
-rw-r--r--Source/Library/Common/String.class.cpp8
-rw-r--r--Source/Library/Common/String.class.h2
-rw-r--r--Source/Library/Common/WChar.class.cpp59
-rw-r--r--Source/Library/Common/WChar.class.h30
7 files changed, 81 insertions, 28 deletions
diff --git a/Source/Library/Common/BasicString.class.cpp b/Source/Library/Common/BasicString.class.cpp
index f3a6164..ddb4e2c 100644
--- a/Source/Library/Common/BasicString.class.cpp
+++ b/Source/Library/Common/BasicString.class.cpp
@@ -173,8 +173,10 @@ Vector< BasicString<T> > BasicString<T>::split(T sep) const {
}
template <typename T>
-BasicString<T> BasicString<T>::substr(s32int start, u32int size) {
+BasicString<T> BasicString<T>::substr(s32int start, s32int size) {
+ if (size == 0) return BasicString<T>();
if (start < 0) start = m_length - start;
+ if (size == -1) size = m_length - start;
BasicString<T> ret;
ret.m_string = new T[size + 1];
ret.m_length = size;
diff --git a/Source/Library/Common/BasicString.class.h b/Source/Library/Common/BasicString.class.h
index 21041e8..03d82c1 100644
--- a/Source/Library/Common/BasicString.class.h
+++ b/Source/Library/Common/BasicString.class.h
@@ -46,7 +46,7 @@ class BasicString {
bool contains(const T& chr) const;
Vector< BasicString<T> > split(T sep) const;
- BasicString<T> substr(s32int start, u32int size);
+ BasicString<T> substr(s32int start, s32int size = -1);
};
#include "BasicString.class.cpp"
diff --git a/Source/Library/Common/ByteArray.class.cpp b/Source/Library/Common/ByteArray.class.cpp
index 2a42702..95326f7 100644
--- a/Source/Library/Common/ByteArray.class.cpp
+++ b/Source/Library/Common/ByteArray.class.cpp
@@ -50,9 +50,9 @@ void ByteArray::resize(u32int size) {
}
String ByteArray::toString (u8int encoding) {
- char* c = new char[m_length + 1];
+ char* c = new char[m_length + 4];
memcpy((u8int*)c, m_string, m_length);
- c[m_length] = 0; //Add NULL terminator
+ for (int i = 0; i < 4; i++) c[m_length + i] = 0; //Add NULL terminator
String r(c, encoding);
delete c;
return r;
diff --git a/Source/Library/Common/String.class.cpp b/Source/Library/Common/String.class.cpp
index 63ff837..fe851bd 100644
--- a/Source/Library/Common/String.class.cpp
+++ b/Source/Library/Common/String.class.cpp
@@ -84,8 +84,8 @@ void String::affect (const char* string, u8int encoding) {
return;
}
m_string = new WChar[m_length + 1];
- int i = 0, l = strlen(string), c = 0;
- while (i < l) {
+ u32int i = 0, c = 0;
+ while (c < m_length) {
i += m_string[c].affect(string + i, encoding);
c++;
}
@@ -191,8 +191,10 @@ Vector<String> String::split(WChar c) const {
return ret;
}
-String String::substr(s32int start, u32int size) {
+String String::substr(s32int start, s32int size) {
+ if (size == 0) return String();
if (start < 0) start = m_length - start;
+ if (size == -1) size = m_length - start;
String ret;
ret.m_string = new WChar[size + 1];
ret.m_length = size;
diff --git a/Source/Library/Common/String.class.h b/Source/Library/Common/String.class.h
index 0d48ce6..b623fb2 100644
--- a/Source/Library/Common/String.class.h
+++ b/Source/Library/Common/String.class.h
@@ -43,7 +43,7 @@ class String : public BasicString<WChar> {
Vector<String> split(WChar c) const;
- String substr(s32int start, u32int size);
+ String substr(s32int start, s32int size = -1);
};
#endif
diff --git a/Source/Library/Common/WChar.class.cpp b/Source/Library/Common/WChar.class.cpp
index f5bd5bc..312a5db 100644
--- a/Source/Library/Common/WChar.class.cpp
+++ b/Source/Library/Common/WChar.class.cpp
@@ -29,8 +29,10 @@ WChar::WChar(char c) {
WChar::WChar(const char* c, u8int encoding) {
if (encoding == UE_UTF8) affectUtf8(c);
- if (encoding == UE_UTF16) affectUtf16(c);
- if (encoding == UE_UTF32) affectUtf32(c);
+ if (encoding == UE_UTF16_LE) affectUtf16le(c);
+ if (encoding == UE_UTF16_BE) affectUtf16be(c);
+ if (encoding == UE_UTF32_LE) affectUtf32le(c);
+ if (encoding == UE_UTF32_BE) affectUtf32be(c);
}
u32int WChar::ucharLen(const char* c, u8int encoding) {
@@ -40,18 +42,21 @@ u32int WChar::ucharLen(const char* c, u8int encoding) {
else if ((c[0] & 0xF0) == 0xE0) return 3;
else if ((c[0] & 0xF8) == 0xF0) return 4;
else return 1;
- } else if (encoding == UE_UTF16) {
+ } else if (encoding == UE_UTF16_BE) {
if ((c[0] & 0xFC) == 0xD8 and (c[2] & 0xFC) == 0xDC) return 4;
else return 2;
- } else if (encoding == UE_UTF32) {
+ } else if (encoding == UE_UTF16_LE) {
+ if ((c[1] & 0xFC) == 0xD8 and (c[3] & 0xFC) == 0xDC) return 4;
+ else return 2;
+ } else if (encoding == UE_UTF32_LE or encoding == UE_UTF16_BE) {
return 4;
}
return 1;
}
u32int WChar::utfLen(const char* c, u8int encoding) {
- int i = 0, l = strlen(c), co = 0;
- while (i < l) {
+ int i = 0, co = 0;
+ while (WChar(c + i, encoding) != 0) {
i += ucharLen(c + i, encoding);
co++;
}
@@ -90,7 +95,7 @@ u32int WChar::affectUtf8(const char* c) { //Returns the number of bytes for the
return 1;
}
-u32int WChar::affectUtf16(const char* c) {
+u32int WChar::affectUtf16be(const char* c) {
if ((c[0] & 0xFC) == 0xD8 and // 11111100b, 11011000b
(c[2] & 0xFC) == 0xDC) { // 11111100b, 11011100b
u32int w = ((c[0] & 0x03) << 2) | ((c[1] & 0xC0) >> 6);
@@ -108,13 +113,38 @@ u32int WChar::affectUtf16(const char* c) {
}
}
-u32int WChar::affectUtf32(const char* c) {
+u32int WChar::affectUtf16le(const char* c) {
+ if ((c[1] & 0xFC) == 0xD8 and // 11111100b, 11011000b
+ (c[3] & 0xFC) == 0xDC) { // 11111100b, 11011100b
+ u32int w = ((c[1] & 0x03) << 2) | ((c[0] & 0xC0) >> 6);
+ u32int x = (c[0] & 0x3F);
+ u32int y = ((c[3] & 0x03) << 8) | (c[3]);
+ value = ((w + 1) << 16) | (x << 10) | y;
+ if (value >= 0xD800 and value <= 0xDFFF) value = 0; //These values are unallowed
+ if (value >= 0xFFFE and value <= 0xFFFF) value = 0;
+ return 4;
+ } else {
+ value = (c[1] << 8) | (c[0]);
+ if (value >= 0xD800 and value <= 0xDFFF) value = 0; //These values are unallowed
+ if (value >= 0xFFFE and value <= 0xFFFF) value = 0;
+ return 2;
+ }
+}
+
+u32int WChar::affectUtf32be(const char* c) {
value = (c[0] << 24) | (c[1] << 16) | (c[2] << 8) | c[3];
if (value >= 0xD800 and value <= 0xDFFF) value = 0; //These values are unallowed
if (value >= 0xFFFE and value <= 0xFFFF) value = 0;
return 4;
}
+u32int WChar::affectUtf32le(const char* c) {
+ value = (c[3] << 24) | (c[2] << 16) | (c[1] << 8) | c[0];
+ if (value >= 0xD800 and value <= 0xDFFF) value = 0; //These values are unallowed
+ if (value >= 0xFFFE and value <= 0xFFFF) value = 0;
+ return 4;
+}
+
u8int WChar::toAscii() {
if (value < 128) return (char)value;
for (int i = 0; i < 128; i++) {
@@ -144,9 +174,9 @@ uchar_repr_t WChar::toUtf8() {
return r;
}
-//TODO : code WChar::toUtf16
+//TODO : code WChar::toUtf16(be|le)
-uchar_repr_t WChar::toUtf32() {
+uchar_repr_t WChar::toUtf32be() {
uchar_repr_t r;
r.c[0] = (value >> 24) & 0xFF;
r.c[1] = (value >> 16) & 0xFF;
@@ -154,3 +184,12 @@ uchar_repr_t WChar::toUtf32() {
r.c[3] = value & 0xFF;
return r;
}
+
+uchar_repr_t WChar::toUtf32le() {
+ uchar_repr_t r;
+ r.c[3] = (value >> 24) & 0xFF;
+ r.c[2] = (value >> 16) & 0xFF;
+ r.c[1] = (value >> 8) & 0xFF;
+ r.c[0] = value & 0xFF;
+ return r;
+}
diff --git a/Source/Library/Common/WChar.class.h b/Source/Library/Common/WChar.class.h
index 5d6d26b..afaeb44 100644
--- a/Source/Library/Common/WChar.class.h
+++ b/Source/Library/Common/WChar.class.h
@@ -9,8 +9,10 @@
enum {
UE_UTF8,
- UE_UTF16,
- UE_UTF32,
+ UE_UTF16_LE,
+ UE_UTF16_BE,
+ UE_UTF32_LE,
+ UE_UTF32_BE,
};
union uchar_repr_t {
@@ -31,13 +33,17 @@ struct WChar {
void affectAscii(char c);
u32int affectUtf8(const char* c);
- u32int affectUtf16(const char* c);
- u32int affectUtf32(const char* c);
+ u32int affectUtf16le(const char* c);
+ u32int affectUtf16be(const char* c);
+ u32int affectUtf32le(const char* c);
+ u32int affectUtf32be(const char* c);
u32int affect(const char* c, u8int encoding = UE_UTF8) {
if (encoding == UE_UTF8) return affectUtf8(c);
- if (encoding == UE_UTF16) return affectUtf16(c);
- if (encoding == UE_UTF32) return affectUtf32(c);
+ if (encoding == UE_UTF16_LE) return affectUtf16le(c);
+ if (encoding == UE_UTF16_BE) return affectUtf16be(c);
+ if (encoding == UE_UTF32_LE) return affectUtf32le(c);
+ if (encoding == UE_UTF32_BE) return affectUtf32be(c);
affectAscii(c[0]); //Default case :/
return 1;
}
@@ -45,13 +51,17 @@ struct WChar {
u8int toAscii();
uchar_repr_t toUtf8();
- uchar_repr_t toUtf16();
- uchar_repr_t toUtf32();
+ uchar_repr_t toUtf16le();
+ uchar_repr_t toUtf16be();
+ uchar_repr_t toUtf32le();
+ uchar_repr_t toUtf32be();
uchar_repr_t encode(u8int encoding = UE_UTF8) {
if (encoding == UE_UTF8) return toUtf8();
- //if (encoding == UE_UTF16) return toUtf16();
- if (encoding == UE_UTF32) return toUtf32();
+ //if (encoding == UE_UTF16_LE) return toUtf16le();
+ //if (encoding == UE_UTF16_BE) return toUtf16be();
+ if (encoding == UE_UTF32_LE) return toUtf32le();
+ if (encoding == UE_UTF32_BE) return toUtf32be();
uchar_repr_t x;
x.c[0] = toAscii();
return x;