summaryrefslogtreecommitdiff
path: root/libavutil/avstring.c
diff options
context:
space:
mode:
authorStefano Sabatini <stefasab@gmail.com>2013-10-03 01:21:40 +0200
committerStefano Sabatini <stefasab@gmail.com>2013-11-22 16:51:05 +0100
commit68590650f05f2bf97766362f2817372987c8a52e (patch)
treeb5b0292ed90bb1db4172f646e5e8e59c4326f31c /libavutil/avstring.c
parente782eea183ba3c03f5179ac83f85e25ae9c1290f (diff)
lavu/avstring: add av_utf8_decode() function
Diffstat (limited to 'libavutil/avstring.c')
-rw-r--r--libavutil/avstring.c64
1 files changed, 64 insertions, 0 deletions
diff --git a/libavutil/avstring.c b/libavutil/avstring.c
index eed58fae1e..20931071b3 100644
--- a/libavutil/avstring.c
+++ b/libavutil/avstring.c
@@ -307,6 +307,70 @@ int av_isxdigit(int c)
return av_isdigit(c) || (c >= 'a' && c <= 'f');
}
+int av_utf8_decode(int32_t *codep, const uint8_t **bufp, const uint8_t *buf_end,
+ unsigned int flags)
+{
+ const uint8_t *p = *bufp;
+ uint32_t top;
+ uint64_t code;
+ int ret = 0;
+
+ if (p >= buf_end)
+ return 0;
+
+ code = *p++;
+
+ /* first sequence byte starts with 10, or is 1111-1110 or 1111-1111,
+ which is not admitted */
+ if ((code & 0xc0) == 0x80 || code >= 0xFE) {
+ ret = AVERROR(EILSEQ);
+ goto end;
+ }
+ top = (code & 128) >> 1;
+
+ while (code & top) {
+ int tmp;
+ if (p >= buf_end) {
+ ret = AVERROR(EILSEQ); /* incomplete sequence */
+ goto end;
+ }
+
+ /* we assume the byte to be in the form 10xx-xxxx */
+ tmp = *p++ - 128; /* strip leading 1 */
+ if (tmp>>6) {
+ ret = AVERROR(EILSEQ);
+ goto end;
+ }
+ code = (code<<6) + tmp;
+ top <<= 5;
+ }
+ code &= (top << 1) - 1;
+
+ if (code >= 1<<31) {
+ ret = AVERROR(EILSEQ); /* out-of-range value */
+ goto end;
+ }
+
+ *codep = code;
+
+ if (code > 0x10FFFF &&
+ !(flags & AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES))
+ ret = AVERROR(EILSEQ);
+ if (code < 0x20 && code != 0x9 && code != 0xA && code != 0xD &&
+ flags & AV_UTF8_FLAG_EXCLUDE_XML_INVALID_CONTROL_CODES)
+ ret = AVERROR(EILSEQ);
+ if (code >= 0xD800 && code <= 0xDFFF &&
+ !(flags & AV_UTF8_FLAG_ACCEPT_SURROGATES))
+ ret = AVERROR(EILSEQ);
+ if (code == 0xFFFE || code == 0xFFFF &&
+ (!flags & AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS))
+ ret = AVERROR(EILSEQ);
+
+end:
+ *bufp = p;
+ return ret;
+}
+
#ifdef TEST
int main(void)