gameServer/Codec/Binary/UTF8/String.hs
author unc0rr
Wed, 18 Feb 2009 15:04:40 +0000
changeset 1804 4e78ad846fb6
permissions -rw-r--r--
New game server: - Incomplete implementation - More robust, no memory leaks, better architecture for easy features addition - Incompatible with current client
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
1804
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
     1
--
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
     2
-- |
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
     3
-- Module      :  Codec.Binary.UTF8.String
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
     4
-- Copyright   :  (c) Eric Mertens 2007
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
     5
-- License     :  BSD3-style (see LICENSE)
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
     6
-- 
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
     7
-- Maintainer:    emertens@galois.com
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
     8
-- Stability   :  experimental
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
     9
-- Portability :  portable
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    10
--
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    11
-- Support for encoding UTF8 Strings to and from @[Word8]@
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    12
--
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    13
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    14
module Codec.Binary.UTF8.String (
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    15
      encode
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    16
    , decode
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    17
    , encodeString
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    18
    , decodeString
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    19
  ) where
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    20
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    21
import Data.Word        (Word8)
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    22
import Data.Bits        ((.|.),(.&.),shiftL,shiftR)
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    23
import Data.Char        (chr,ord)
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    24
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    25
default(Int)
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    26
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    27
-- | Encode a string using 'encode' and store the result in a 'String'.
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    28
encodeString :: String -> String
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    29
encodeString xs = map (toEnum . fromEnum) (encode xs)
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    30
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    31
-- | Decode a string using 'decode' using a 'String' as input.
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    32
-- | This is not safe but it is necessary if UTF-8 encoded text
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    33
-- | has been loaded into a 'String' prior to being decoded.
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    34
decodeString :: String -> String
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    35
decodeString xs = decode (map (toEnum . fromEnum) xs)
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    36
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    37
replacement_character :: Char
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    38
replacement_character = '\xfffd'
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    39
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    40
-- | Encode a Haskell String to a list of Word8 values, in UTF8 format.
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    41
encode :: String -> [Word8]
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    42
encode = concatMap (map fromIntegral . go . ord)
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    43
 where
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    44
  go oc
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    45
   | oc <= 0x7f       = [oc]
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    46
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    47
   | oc <= 0x7ff      = [ 0xc0 + (oc `shiftR` 6)
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    48
                        , 0x80 + oc .&. 0x3f
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    49
                        ]
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    50
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    51
   | oc <= 0xffff     = [ 0xe0 + (oc `shiftR` 12)
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    52
                        , 0x80 + ((oc `shiftR` 6) .&. 0x3f)
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    53
                        , 0x80 + oc .&. 0x3f
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    54
                        ]
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    55
   | otherwise        = [ 0xf0 + (oc `shiftR` 18)
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    56
                        , 0x80 + ((oc `shiftR` 12) .&. 0x3f)
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    57
                        , 0x80 + ((oc `shiftR` 6) .&. 0x3f)
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    58
                        , 0x80 + oc .&. 0x3f
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    59
                        ]
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    60
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    61
--
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    62
-- | Decode a UTF8 string packed into a list of Word8 values, directly to String
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    63
--
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    64
decode :: [Word8] -> String
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    65
decode [    ] = ""
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    66
decode (c:cs)
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    67
  | c < 0x80  = chr (fromEnum c) : decode cs
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    68
  | c < 0xc0  = replacement_character : decode cs
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    69
  | c < 0xe0  = multi1
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    70
  | c < 0xf0  = multi_byte 2 0xf  0x800
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    71
  | c < 0xf8  = multi_byte 3 0x7  0x10000
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    72
  | c < 0xfc  = multi_byte 4 0x3  0x200000
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    73
  | c < 0xfe  = multi_byte 5 0x1  0x4000000
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    74
  | otherwise = replacement_character : decode cs
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    75
  where
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    76
    multi1 = case cs of
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    77
      c1 : ds | c1 .&. 0xc0 == 0x80 ->
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    78
        let d = ((fromEnum c .&. 0x1f) `shiftL` 6) .|.  fromEnum (c1 .&. 0x3f)
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    79
        in if d >= 0x000080 then toEnum d : decode ds
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    80
                            else replacement_character : decode ds
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    81
      _ -> replacement_character : decode cs
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    82
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    83
    multi_byte :: Int -> Word8 -> Int -> [Char]
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    84
    multi_byte i mask overlong = aux i cs (fromEnum (c .&. mask))
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    85
      where
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    86
        aux 0 rs acc
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    87
          | overlong <= acc && acc <= 0x10ffff &&
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    88
            (acc < 0xd800 || 0xdfff < acc)     &&
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    89
            (acc < 0xfffe || 0xffff < acc)      = chr acc : decode rs
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    90
          | otherwise = replacement_character : decode rs
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    91
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    92
        aux n (r:rs) acc
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    93
          | r .&. 0xc0 == 0x80 = aux (n-1) rs
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    94
                               $ shiftL acc 6 .|. fromEnum (r .&. 0x3f)
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    95
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    96
        aux _ rs     _ = replacement_character : decode rs
4e78ad846fb6 New game server:
unc0rr
parents:
diff changeset
    97