aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMax Kellermann <max@duempel.org>2008-08-29 09:38:58 +0200
committerMax Kellermann <max@duempel.org>2008-08-29 09:38:58 +0200
commit43c389b961c609f9c705cfe14ed429082ac9115a (patch)
tree61e93ef93115a5f96c5981cae1fd7a36e789d9f7 /src
parent92b757674ebcf5cf90e8adb66e7583edf1bc604e (diff)
added "length" parameter to validUtf8String()
At several places, we create temporary copies of non-null-terminated strings, just to use them in functions like validUtf8String(). We can save this temporary allocation and avoid heap fragmentation if we add a length parameter instead of expecting a null-terminated string.
Diffstat (limited to 'src')
-rw-r--r--src/path.c2
-rw-r--r--src/tag.c2
-rw-r--r--src/utf8.c18
-rw-r--r--src/utf8.h4
4 files changed, 16 insertions, 10 deletions
diff --git a/src/path.c b/src/path.c
index 6aaff84c..ceb00c5d 100644
--- a/src/path.c
+++ b/src/path.c
@@ -47,7 +47,7 @@ static char *path_conv_charset(char *dest, const char *to,
char *fs_charset_to_utf8(char *dst, const char *str)
{
char *ret = path_conv_charset(dst, "UTF-8", fsCharset, str);
- return (ret && !validUtf8String(ret)) ? NULL : ret;
+ return (ret && !validUtf8String(ret, strlen(ret))) ? NULL : ret;
}
char *utf8_to_fs_charset(char *dst, const char *str)
diff --git a/src/tag.c b/src/tag.c
index a52391b4..1d22b46d 100644
--- a/src/tag.c
+++ b/src/tag.c
@@ -352,7 +352,7 @@ static inline char *fix_utf8(char *str) {
assert(str != NULL);
- if (validUtf8String(str))
+ if (validUtf8String(str, strlen(str)))
return str;
DEBUG("not valid utf8 in tag: %s\n",str);
diff --git a/src/utf8.c b/src/utf8.c
index e8f3dbdd..1b03f5d2 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -69,10 +69,12 @@ static char utf8_to_latin1_char(const char *inUtf8)
return (char)(c + utf8[1]);
}
-static unsigned int validateUtf8Char(const char *inUtf8Char)
+static unsigned int validateUtf8Char(const char *inUtf8Char, size_t length)
{
const unsigned char *utf8Char = (const unsigned char *)inUtf8Char;
+ assert(length > 0);
+
if (utf8Char[0] < 0x80)
return 1;
@@ -84,7 +86,7 @@ static unsigned int validateUtf8Char(const char *inUtf8Char)
t = (t >> 1);
count++;
}
- if (count > 5)
+ if (count > 5 || (size_t)count > length)
return 0;
for (i = 1; i <= count; i++) {
if (utf8Char[i] < 0x80 || utf8Char[i] > 0xBF)
@@ -95,15 +97,17 @@ static unsigned int validateUtf8Char(const char *inUtf8Char)
return 0;
}
-int validUtf8String(const char *string)
+int validUtf8String(const char *string, size_t length)
{
unsigned int ret;
- while (*string) {
- ret = validateUtf8Char(string);
+ while (length > 0) {
+ ret = validateUtf8Char(string, length);
+ assert((size_t)ret <= length);
if (0 == ret)
return 0;
string += ret;
+ length -= ret;
}
return 1;
@@ -118,7 +122,7 @@ char *utf8StrToLatin1Dup(const char *utf8)
size_t len = 0;
while (*utf8) {
- count = validateUtf8Char(utf8);
+ count = validateUtf8Char(utf8, INT_MAX);
if (!count) {
free(ret);
return NULL;
@@ -140,7 +144,7 @@ char *utf8_to_latin1(char *dest, const char *utf8)
size_t len = 0;
while (*utf8) {
- count = validateUtf8Char(utf8);
+ count = validateUtf8Char(utf8, INT_MAX);
if (count) {
*(cp++) = utf8_to_latin1_char(utf8);
utf8 += count;
diff --git a/src/utf8.h b/src/utf8.h
index 4a498306..353977be 100644
--- a/src/utf8.h
+++ b/src/utf8.h
@@ -19,11 +19,13 @@
#ifndef UTF_8_H
#define UTF_8_H
+#include <os_compat.h>
+
char *latin1StrToUtf8Dup(const char *latin1);
char *utf8StrToLatin1Dup(const char *utf8);
-int validUtf8String(const char *string);
+int validUtf8String(const char *string, size_t length);
char *utf8_to_latin1(char *dest, const char *utf8);