tools/confuse.hs
changeset 10073 865a4089278d
parent 10064 bf1a5ef4ef14
child 10075 dbaf90a0fbe0
equal deleted inserted replaced
10072:20680676b41c 10073:865a4089278d
     4 import Numeric
     4 import Numeric
     5 import Data.Char
     5 import Data.Char
     6 import Control.Monad
     6 import Control.Monad
     7 import qualified Data.ByteString as B
     7 import qualified Data.ByteString as B
     8 import qualified Data.ByteString.UTF8 as UTF8
     8 import qualified Data.ByteString.UTF8 as UTF8
       
     9 import qualified Data.Map as Map
     9 
    10 
    10 hx :: [Char] -> String
    11 hx :: [Char] -> String
    11 hx cs = let ch = (chr . fst . last . readHex $ cs) in
    12 hx cs = let ch = (chr . fst . last . readHex $ cs) in
    12             case ch of
    13             case ch of
    13                  '\'' -> "''"
    14                  '\'' -> "''"
    20         i :: String
    21         i :: String
    21         i = hx s
    22         i = hx s
    22         r :: String
    23         r :: String
    23         r = concatMap hx . words . takeWhile ((/=) ';') . tail $ dropWhile ((/=) '\t') s
    24         r = concatMap hx . words . takeWhile ((/=) ';') . tail $ dropWhile ((/=) '\t') s
    24 
    25 
       
    26 convRules :: (B.ByteString, [B.ByteString]) -> B.ByteString
       
    27 convRules (a, b) = B.concat ["<reset>", u a, "</reset>\n<s>", B.concat $ map u b, "</s>"]
       
    28     where
       
    29         u a = B.concat ["\\","u",a]
       
    30 
       
    31 toPair :: String -> (B.ByteString, [B.ByteString])
       
    32 toPair s = (UTF8.fromString $ takeWhile isHexDigit s, map UTF8.fromString . words . takeWhile ((/=) ';') . tail $ dropWhile ((/=) '\t') s)
       
    33 
       
    34 
    25 main = do
    35 main = do
    26     ll <- liftM (filter (isHexDigit . head) . filter (not . null) . lines) $ readFile "confusables.txt"
    36     ll <- liftM (filter (isHexDigit . head) . filter (not . null) . lines) $ readFile "confusables.txt"
    27     B.writeFile "insert.sql" . B.intercalate ",\n" . map conv $ ll
    37     B.writeFile "rules.txt" . B.intercalate "\n" . map convRules . Map.toList . Map.fromList . filter (\(_, b) -> length b < 6). map toPair $ ll