misc/libphysfs/physfs_unicode.c
author nemo
Thu, 31 Dec 2015 15:11:44 -0500
changeset 11478 8c95d5a4366c
parent 8524 a65e9bcf0a03
child 12213 bb5522e88ab2
permissions -rw-r--r--
Tweak the hole carving to become less agressive about spacing if random points keep failing.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
7768
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
     1
#define __PHYSICSFS_INTERNAL__
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
     2
#include "physfs_internal.h"
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
     3
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
     4
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
     5
/*
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
     6
 * From rfc3629, the UTF-8 spec:
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
     7
 *  http://www.ietf.org/rfc/rfc3629.txt
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
     8
 *
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
     9
 *   Char. number range  |        UTF-8 octet sequence
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    10
 *      (hexadecimal)    |              (binary)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    11
 *   --------------------+---------------------------------------------
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    12
 *   0000 0000-0000 007F | 0xxxxxxx
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    13
 *   0000 0080-0000 07FF | 110xxxxx 10xxxxxx
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    14
 *   0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    15
 *   0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    16
 */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    17
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    18
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    19
/*
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    20
 * This may not be the best value, but it's one that isn't represented
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    21
 *  in Unicode (0x10FFFF is the largest codepoint value). We return this
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    22
 *  value from utf8codepoint() if there's bogus bits in the
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    23
 *  stream. utf8codepoint() will turn this value into something
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    24
 *  reasonable (like a question mark), for text that wants to try to recover,
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    25
 *  whereas utf8valid() will use the value to determine if a string has bad
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    26
 *  bits.
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    27
 */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    28
#define UNICODE_BOGUS_CHAR_VALUE 0xFFFFFFFF
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    29
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    30
/*
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    31
 * This is the codepoint we currently return when there was bogus bits in a
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    32
 *  UTF-8 string. May not fly in Asian locales?
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    33
 */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    34
#define UNICODE_BOGUS_CHAR_CODEPOINT '?'
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    35
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    36
static PHYSFS_uint32 utf8codepoint(const char **_str)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    37
{
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    38
    const char *str = *_str;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    39
    PHYSFS_uint32 retval = 0;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    40
    PHYSFS_uint32 octet = (PHYSFS_uint32) ((PHYSFS_uint8) *str);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    41
    PHYSFS_uint32 octet2, octet3, octet4;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    42
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    43
    if (octet == 0)  /* null terminator, end of string. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    44
        return 0;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    45
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    46
    else if (octet < 128)  /* one octet char: 0 to 127 */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    47
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    48
        (*_str)++;  /* skip to next possible start of codepoint. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    49
        return octet;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    50
    } /* else if */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    51
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    52
    else if ((octet > 127) && (octet < 192))  /* bad (starts with 10xxxxxx). */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    53
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    54
        /*
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    55
         * Apparently each of these is supposed to be flagged as a bogus
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    56
         *  char, instead of just resyncing to the next valid codepoint.
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    57
         */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    58
        (*_str)++;  /* skip to next possible start of codepoint. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    59
        return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    60
    } /* else if */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    61
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    62
    else if (octet < 224)  /* two octets */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    63
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    64
        (*_str)++;  /* advance at least one byte in case of an error */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    65
        octet -= (128+64);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    66
        octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    67
        if ((octet2 & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    68
            return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    69
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    70
        *_str += 1;  /* skip to next possible start of codepoint. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    71
        retval = ((octet << 6) | (octet2 - 128));
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    72
        if ((retval >= 0x80) && (retval <= 0x7FF))
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    73
            return retval;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    74
    } /* else if */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    75
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    76
    else if (octet < 240)  /* three octets */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    77
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    78
        (*_str)++;  /* advance at least one byte in case of an error */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    79
        octet -= (128+64+32);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    80
        octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    81
        if ((octet2 & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    82
            return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    83
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    84
        octet3 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    85
        if ((octet3 & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    86
            return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    87
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    88
        *_str += 2;  /* skip to next possible start of codepoint. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    89
        retval = ( ((octet << 12)) | ((octet2-128) << 6) | ((octet3-128)) );
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    90
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    91
        /* There are seven "UTF-16 surrogates" that are illegal in UTF-8. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    92
        switch (retval)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    93
        {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    94
            case 0xD800:
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    95
            case 0xDB7F:
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    96
            case 0xDB80:
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    97
            case 0xDBFF:
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    98
            case 0xDC00:
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
    99
            case 0xDF80:
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   100
            case 0xDFFF:
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   101
                return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   102
        } /* switch */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   103
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   104
        /* 0xFFFE and 0xFFFF are illegal, too, so we check them at the edge. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   105
        if ((retval >= 0x800) && (retval <= 0xFFFD))
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   106
            return retval;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   107
    } /* else if */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   108
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   109
    else if (octet < 248)  /* four octets */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   110
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   111
        (*_str)++;  /* advance at least one byte in case of an error */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   112
        octet -= (128+64+32+16);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   113
        octet2 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   114
        if ((octet2 & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   115
            return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   116
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   117
        octet3 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   118
        if ((octet3 & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   119
            return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   120
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   121
        octet4 = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   122
        if ((octet4 & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   123
            return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   124
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   125
        *_str += 3;  /* skip to next possible start of codepoint. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   126
        retval = ( ((octet << 18)) | ((octet2 - 128) << 12) |
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   127
                   ((octet3 - 128) << 6) | ((octet4 - 128)) );
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   128
        if ((retval >= 0x10000) && (retval <= 0x10FFFF))
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   129
            return retval;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   130
    } /* else if */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   131
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   132
    /*
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   133
     * Five and six octet sequences became illegal in rfc3629.
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   134
     *  We throw the codepoint away, but parse them to make sure we move
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   135
     *  ahead the right number of bytes and don't overflow the buffer.
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   136
     */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   137
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   138
    else if (octet < 252)  /* five octets */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   139
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   140
        (*_str)++;  /* advance at least one byte in case of an error */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   141
        octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   142
        if ((octet & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   143
            return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   144
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   145
        octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   146
        if ((octet & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   147
            return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   148
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   149
        octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   150
        if ((octet & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   151
            return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   152
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   153
        octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   154
        if ((octet & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   155
            return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   156
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   157
        *_str += 4;  /* skip to next possible start of codepoint. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   158
        return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   159
    } /* else if */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   160
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   161
    else  /* six octets */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   162
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   163
        (*_str)++;  /* advance at least one byte in case of an error */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   164
        octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   165
        if ((octet & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   166
            return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   167
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   168
        octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   169
        if ((octet & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   170
            return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   171
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   172
        octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   173
        if ((octet & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   174
            return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   175
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   176
        octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   177
        if ((octet & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   178
            return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   179
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   180
        octet = (PHYSFS_uint32) ((PHYSFS_uint8) *(++str));
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   181
        if ((octet & (128+64)) != 128)  /* Format isn't 10xxxxxx? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   182
            return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   183
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   184
        *_str += 6;  /* skip to next possible start of codepoint. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   185
        return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   186
    } /* else if */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   187
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   188
    return UNICODE_BOGUS_CHAR_VALUE;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   189
} /* utf8codepoint */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   190
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   191
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   192
void PHYSFS_utf8ToUcs4(const char *src, PHYSFS_uint32 *dst, PHYSFS_uint64 len)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   193
{
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   194
    len -= sizeof (PHYSFS_uint32);   /* save room for null char. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   195
    while (len >= sizeof (PHYSFS_uint32))
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   196
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   197
        PHYSFS_uint32 cp = utf8codepoint(&src);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   198
        if (cp == 0)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   199
            break;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   200
        else if (cp == UNICODE_BOGUS_CHAR_VALUE)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   201
            cp = UNICODE_BOGUS_CHAR_CODEPOINT;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   202
        *(dst++) = cp;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   203
        len -= sizeof (PHYSFS_uint32);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   204
    } /* while */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   205
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   206
    *dst = 0;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   207
} /* PHYSFS_utf8ToUcs4 */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   208
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   209
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   210
void PHYSFS_utf8ToUcs2(const char *src, PHYSFS_uint16 *dst, PHYSFS_uint64 len)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   211
{
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   212
    len -= sizeof (PHYSFS_uint16);   /* save room for null char. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   213
    while (len >= sizeof (PHYSFS_uint16))
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   214
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   215
        PHYSFS_uint32 cp = utf8codepoint(&src);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   216
        if (cp == 0)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   217
            break;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   218
        else if (cp == UNICODE_BOGUS_CHAR_VALUE)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   219
            cp = UNICODE_BOGUS_CHAR_CODEPOINT;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   220
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   221
        if (cp > 0xFFFF)  /* UTF-16 surrogates (bogus chars in UCS-2) */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   222
            cp = UNICODE_BOGUS_CHAR_CODEPOINT;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   223
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   224
        *(dst++) = cp;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   225
        len -= sizeof (PHYSFS_uint16);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   226
    } /* while */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   227
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   228
    *dst = 0;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   229
} /* PHYSFS_utf8ToUcs2 */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   230
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   231
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   232
void PHYSFS_utf8ToUtf16(const char *src, PHYSFS_uint16 *dst, PHYSFS_uint64 len)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   233
{
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   234
    len -= sizeof (PHYSFS_uint16);   /* save room for null char. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   235
    while (len >= sizeof (PHYSFS_uint16))
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   236
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   237
        PHYSFS_uint32 cp = utf8codepoint(&src);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   238
        if (cp == 0)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   239
            break;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   240
        else if (cp == UNICODE_BOGUS_CHAR_VALUE)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   241
            cp = UNICODE_BOGUS_CHAR_CODEPOINT;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   242
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   243
        if (cp > 0xFFFF)  /* encode as surrogate pair */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   244
        {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   245
            if (len < (sizeof (PHYSFS_uint16) * 2))
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   246
                break;  /* not enough room for the pair, stop now. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   247
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   248
            cp -= 0x10000;  /* Make this a 20-bit value */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   249
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   250
            *(dst++) = 0xD800 + ((cp >> 10) & 0x3FF);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   251
            len -= sizeof (PHYSFS_uint16);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   252
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   253
            cp = 0xDC00 + (cp & 0x3FF);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   254
        } /* if */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   255
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   256
        *(dst++) = cp;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   257
        len -= sizeof (PHYSFS_uint16);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   258
    } /* while */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   259
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   260
    *dst = 0;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   261
} /* PHYSFS_utf8ToUtf16 */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   262
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   263
static void utf8fromcodepoint(PHYSFS_uint32 cp, char **_dst, PHYSFS_uint64 *_len)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   264
{
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   265
    char *dst = *_dst;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   266
    PHYSFS_uint64 len = *_len;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   267
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   268
    if (len == 0)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   269
        return;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   270
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   271
    if (cp > 0x10FFFF)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   272
        cp = UNICODE_BOGUS_CHAR_CODEPOINT;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   273
    else if ((cp == 0xFFFE) || (cp == 0xFFFF))  /* illegal values. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   274
        cp = UNICODE_BOGUS_CHAR_CODEPOINT;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   275
    else
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   276
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   277
        /* There are seven "UTF-16 surrogates" that are illegal in UTF-8. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   278
        switch (cp)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   279
        {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   280
            case 0xD800:
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   281
            case 0xDB7F:
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   282
            case 0xDB80:
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   283
            case 0xDBFF:
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   284
            case 0xDC00:
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   285
            case 0xDF80:
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   286
            case 0xDFFF:
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   287
                cp = UNICODE_BOGUS_CHAR_CODEPOINT;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   288
        } /* switch */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   289
    } /* else */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   290
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   291
    /* Do the encoding... */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   292
    if (cp < 0x80)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   293
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   294
        *(dst++) = (char) cp;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   295
        len--;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   296
    } /* if */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   297
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   298
    else if (cp < 0x800)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   299
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   300
        if (len < 2)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   301
            len = 0;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   302
        else
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   303
        {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   304
            *(dst++) = (char) ((cp >> 6) | 128 | 64);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   305
            *(dst++) = (char) (cp & 0x3F) | 128;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   306
            len -= 2;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   307
        } /* else */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   308
    } /* else if */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   309
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   310
    else if (cp < 0x10000)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   311
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   312
        if (len < 3)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   313
            len = 0;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   314
        else
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   315
        {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   316
            *(dst++) = (char) ((cp >> 12) | 128 | 64 | 32);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   317
            *(dst++) = (char) ((cp >> 6) & 0x3F) | 128;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   318
            *(dst++) = (char) (cp & 0x3F) | 128;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   319
            len -= 3;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   320
        } /* else */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   321
    } /* else if */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   322
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   323
    else
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   324
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   325
        if (len < 4)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   326
            len = 0;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   327
        else
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   328
        {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   329
            *(dst++) = (char) ((cp >> 18) | 128 | 64 | 32 | 16);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   330
            *(dst++) = (char) ((cp >> 12) & 0x3F) | 128;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   331
            *(dst++) = (char) ((cp >> 6) & 0x3F) | 128;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   332
            *(dst++) = (char) (cp & 0x3F) | 128;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   333
            len -= 4;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   334
        } /* else if */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   335
    } /* else */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   336
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   337
    *_dst = dst;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   338
    *_len = len;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   339
} /* utf8fromcodepoint */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   340
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   341
#define UTF8FROMTYPE(typ, src, dst, len) \
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   342
    if (len == 0) return; \
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   343
    len--;  \
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   344
    while (len) \
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   345
    { \
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   346
        const PHYSFS_uint32 cp = (PHYSFS_uint32) ((typ) (*(src++))); \
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   347
        if (cp == 0) break; \
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   348
        utf8fromcodepoint(cp, &dst, &len); \
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   349
    } \
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   350
    *dst = '\0'; \
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   351
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   352
void PHYSFS_utf8FromUcs4(const PHYSFS_uint32 *src, char *dst, PHYSFS_uint64 len)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   353
{
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   354
    UTF8FROMTYPE(PHYSFS_uint32, src, dst, len);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   355
} /* PHYSFS_utf8FromUcs4 */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   356
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   357
void PHYSFS_utf8FromUcs2(const PHYSFS_uint16 *src, char *dst, PHYSFS_uint64 len)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   358
{
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   359
    UTF8FROMTYPE(PHYSFS_uint64, src, dst, len);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   360
} /* PHYSFS_utf8FromUcs2 */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   361
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   362
/* latin1 maps to unicode codepoints directly, we just utf-8 encode it. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   363
void PHYSFS_utf8FromLatin1(const char *src, char *dst, PHYSFS_uint64 len)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   364
{
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   365
    UTF8FROMTYPE(PHYSFS_uint8, src, dst, len);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   366
} /* PHYSFS_utf8FromLatin1 */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   367
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   368
#undef UTF8FROMTYPE
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   369
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   370
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   371
void PHYSFS_utf8FromUtf16(const PHYSFS_uint16 *src, char *dst, PHYSFS_uint64 len)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   372
{
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   373
    if (len == 0)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   374
        return;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   375
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   376
    len--;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   377
    while (len)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   378
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   379
        PHYSFS_uint32 cp = (PHYSFS_uint32) *(src++);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   380
        if (cp == 0)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   381
            break;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   382
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   383
        /* Orphaned second half of surrogate pair? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   384
        if ((cp >= 0xDC00) && (cp <= 0xDFFF))
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   385
            cp = UNICODE_BOGUS_CHAR_CODEPOINT;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   386
        else if ((cp >= 0xD800) && (cp <= 0xDBFF))  /* start surrogate pair! */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   387
        {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   388
            const PHYSFS_uint32 pair = (PHYSFS_uint32) *src;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   389
            if ((pair < 0xDC00) || (pair > 0xDFFF))
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   390
                cp = UNICODE_BOGUS_CHAR_CODEPOINT;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   391
            else
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   392
            {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   393
                src++;  /* eat the other surrogate. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   394
                cp = (((cp - 0xD800) << 10) | (pair - 0xDC00));
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   395
            } /* else */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   396
        } /* else if */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   397
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   398
        utf8fromcodepoint(cp, &dst, &len);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   399
    } /* while */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   400
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   401
    *dst = '\0';
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   402
} /* PHYSFS_utf8FromUtf16 */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   403
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   404
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   405
typedef struct CaseFoldMapping
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   406
{
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   407
    PHYSFS_uint32 from;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   408
    PHYSFS_uint32 to0;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   409
    PHYSFS_uint32 to1;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   410
    PHYSFS_uint32 to2;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   411
} CaseFoldMapping;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   412
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   413
typedef struct CaseFoldHashBucket
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   414
{
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   415
    const PHYSFS_uint8 count;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   416
    const CaseFoldMapping *list;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   417
} CaseFoldHashBucket;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   418
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   419
#include "physfs_casefolding.h"
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   420
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   421
static void locate_case_fold_mapping(const PHYSFS_uint32 from,
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   422
                                     PHYSFS_uint32 *to)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   423
{
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   424
    PHYSFS_uint32 i;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   425
    const PHYSFS_uint8 hashed = ((from ^ (from >> 8)) & 0xFF);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   426
    const CaseFoldHashBucket *bucket = &case_fold_hash[hashed];
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   427
    const CaseFoldMapping *mapping = bucket->list;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   428
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   429
    for (i = 0; i < bucket->count; i++, mapping++)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   430
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   431
        if (mapping->from == from)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   432
        {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   433
            to[0] = mapping->to0;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   434
            to[1] = mapping->to1;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   435
            to[2] = mapping->to2;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   436
            return;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   437
        } /* if */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   438
    } /* for */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   439
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   440
    /* Not found...there's no remapping for this codepoint. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   441
    to[0] = from;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   442
    to[1] = 0;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   443
    to[2] = 0;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   444
} /* locate_case_fold_mapping */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   445
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   446
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   447
static int utf8codepointcmp(const PHYSFS_uint32 cp1, const PHYSFS_uint32 cp2)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   448
{
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   449
    PHYSFS_uint32 folded1[3], folded2[3];
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   450
    locate_case_fold_mapping(cp1, folded1);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   451
    locate_case_fold_mapping(cp2, folded2);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   452
    return ( (folded1[0] == folded2[0]) &&
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   453
             (folded1[1] == folded2[1]) &&
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   454
             (folded1[2] == folded2[2]) );
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   455
} /* utf8codepointcmp */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   456
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   457
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   458
int __PHYSFS_utf8stricmp(const char *str1, const char *str2)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   459
{
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   460
    while (1)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   461
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   462
        const PHYSFS_uint32 cp1 = utf8codepoint(&str1);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   463
        const PHYSFS_uint32 cp2 = utf8codepoint(&str2);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   464
        if (!utf8codepointcmp(cp1, cp2)) break;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   465
        if (cp1 == 0) return 1;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   466
    } /* while */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   467
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   468
    return 0;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   469
} /* __PHYSFS_utf8stricmp */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   470
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   471
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   472
int __PHYSFS_utf8strnicmp(const char *str1, const char *str2, PHYSFS_uint32 n)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   473
{
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   474
    while (n > 0)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   475
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   476
        const PHYSFS_uint32 cp1 = utf8codepoint(&str1);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   477
        const PHYSFS_uint32 cp2 = utf8codepoint(&str2);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   478
        if (!utf8codepointcmp(cp1, cp2)) return 0;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   479
        if (cp1 == 0) return 1;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   480
        n--;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   481
    } /* while */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   482
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   483
    return 1;  /* matched to n chars. */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   484
} /* __PHYSFS_utf8strnicmp */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   485
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   486
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   487
int __PHYSFS_stricmpASCII(const char *str1, const char *str2)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   488
{
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   489
    while (1)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   490
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   491
        const char ch1 = *(str1++);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   492
        const char ch2 = *(str2++);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   493
        const char cp1 = ((ch1 >= 'A') && (ch1 <= 'Z')) ? (ch1+32) : ch1;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   494
        const char cp2 = ((ch2 >= 'A') && (ch2 <= 'Z')) ? (ch2+32) : ch2;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   495
        if (cp1 < cp2)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   496
            return -1;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   497
        else if (cp1 > cp2)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   498
            return 1;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   499
        else if (cp1 == 0)  /* they're both null chars? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   500
            break;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   501
    } /* while */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   502
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   503
    return 0;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   504
} /* __PHYSFS_stricmpASCII */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   505
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   506
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   507
int __PHYSFS_strnicmpASCII(const char *str1, const char *str2, PHYSFS_uint32 n)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   508
{
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   509
    while (n-- > 0)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   510
    {
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   511
        const char ch1 = *(str1++);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   512
        const char ch2 = *(str2++);
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   513
        const char cp1 = ((ch1 >= 'A') && (ch1 <= 'Z')) ? (ch1+32) : ch1;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   514
        const char cp2 = ((ch2 >= 'A') && (ch2 <= 'Z')) ? (ch2+32) : ch2;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   515
        if (cp1 < cp2)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   516
            return -1;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   517
        else if (cp1 > cp2)
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   518
            return 1;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   519
        else if (cp1 == 0)  /* they're both null chars? */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   520
            return 0;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   521
    } /* while */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   522
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   523
    return 0;
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   524
} /* __PHYSFS_strnicmpASCII */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   525
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   526
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   527
/* end of physfs_unicode.c ... */
13e2037ebc79 Try using PhysicsFS.
unc0rr
parents:
diff changeset
   528