tools/confuse.hs
author Wuzzy <Wuzzy2@mail.ru>
Thu, 25 Apr 2019 23:01:05 +0200
changeset 14844 e239378a9400
parent 10075 dbaf90a0fbe0
permissions -rw-r--r--
Prevent entering “/”, “\” and “:” in team and scheme names. The name of teams and schems is saved in the file name itself, so these characters would cause trouble as they are used in path names in Linux and Windows.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
10064
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
     1
{-# LANGUAGE OverloadedStrings #-}
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
     2
module Confuse where
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
     3
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
     4
import Numeric
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
     5
import Data.Char
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
     6
import Control.Monad
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
     7
import qualified Data.ByteString as B
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
     8
import qualified Data.ByteString.UTF8 as UTF8
10073
865a4089278d Now convert to xml rules
unc0rr
parents: 10064
diff changeset
     9
import qualified Data.Map as Map
10064
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    10
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    11
hx :: [Char] -> String
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    12
hx cs = let ch = (chr . fst . last . readHex $ cs) in
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    13
            case ch of
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    14
                 '\'' -> "''"
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    15
                 '\\' -> "\\\\"
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    16
                 c -> c : []
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    17
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    18
conv :: String -> B.ByteString
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    19
conv s = B.concat ["('", UTF8.fromString i, "', '", UTF8.fromString r, "')"]
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    20
    where
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    21
        i :: String
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    22
        i = hx s
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    23
        r :: String
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    24
        r = concatMap hx . words . takeWhile ((/=) ';') . tail $ dropWhile ((/=) '\t') s
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    25
10073
865a4089278d Now convert to xml rules
unc0rr
parents: 10064
diff changeset
    26
convRules :: (B.ByteString, [B.ByteString]) -> B.ByteString
865a4089278d Now convert to xml rules
unc0rr
parents: 10064
diff changeset
    27
convRules (a, b) = B.concat ["<reset>", u a, "</reset>\n<s>", B.concat $ map u b, "</s>"]
865a4089278d Now convert to xml rules
unc0rr
parents: 10064
diff changeset
    28
    where
865a4089278d Now convert to xml rules
unc0rr
parents: 10064
diff changeset
    29
        u a = B.concat ["\\","u",a]
865a4089278d Now convert to xml rules
unc0rr
parents: 10064
diff changeset
    30
865a4089278d Now convert to xml rules
unc0rr
parents: 10064
diff changeset
    31
toPair :: String -> (B.ByteString, [B.ByteString])
865a4089278d Now convert to xml rules
unc0rr
parents: 10064
diff changeset
    32
toPair s = (UTF8.fromString $ takeWhile isHexDigit s, map UTF8.fromString . words . takeWhile ((/=) ';') . tail $ dropWhile ((/=) '\t') s)
865a4089278d Now convert to xml rules
unc0rr
parents: 10064
diff changeset
    33
865a4089278d Now convert to xml rules
unc0rr
parents: 10064
diff changeset
    34
10064
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    35
main = do
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    36
    ll <- liftM (filter (isHexDigit . head) . filter (not . null) . lines) $ readFile "confusables.txt"
10075
dbaf90a0fbe0 Filter confusables to some limits
unc0rr
parents: 10073
diff changeset
    37
    B.writeFile "rules.txt" . B.intercalate "\n" . map convRules . Map.toList . Map.fromList . filter notTooLong . filter fits16bit . map toPair $ ll
dbaf90a0fbe0 Filter confusables to some limits
unc0rr
parents: 10073
diff changeset
    38
    where
dbaf90a0fbe0 Filter confusables to some limits
unc0rr
parents: 10073
diff changeset
    39
        notTooLong = (>) 6 . length . snd
dbaf90a0fbe0 Filter confusables to some limits
unc0rr
parents: 10073
diff changeset
    40
        fits16bit (a, b) = let f = (>) 5 . B.length in all f $ a:b
dbaf90a0fbe0 Filter confusables to some limits
unc0rr
parents: 10073
diff changeset
    41