diff -r 20680676b41c -r 865a4089278d tools/confuse.hs
--- a/tools/confuse.hs Fri Jan 24 13:19:35 2014 +0100
+++ b/tools/confuse.hs Fri Jan 24 22:38:15 2014 +0400
@@ -6,6 +6,7 @@
import Control.Monad
import qualified Data.ByteString as B
import qualified Data.ByteString.UTF8 as UTF8
+import qualified Data.Map as Map
hx :: [Char] -> String
hx cs = let ch = (chr . fst . last . readHex $ cs) in
@@ -22,6 +23,15 @@
r :: String
r = concatMap hx . words . takeWhile ((/=) ';') . tail $ dropWhile ((/=) '\t') s
+convRules :: (B.ByteString, [B.ByteString]) -> B.ByteString
+convRules (a, b) = B.concat ["", u a, "\n", B.concat $ map u b, ""]
+ where
+ u a = B.concat ["\\","u",a]
+
+toPair :: String -> (B.ByteString, [B.ByteString])
+toPair s = (UTF8.fromString $ takeWhile isHexDigit s, map UTF8.fromString . words . takeWhile ((/=) ';') . tail $ dropWhile ((/=) '\t') s)
+
+
main = do
ll <- liftM (filter (isHexDigit . head) . filter (not . null) . lines) $ readFile "confusables.txt"
- B.writeFile "insert.sql" . B.intercalate ",\n" . map conv $ ll
+ B.writeFile "rules.txt" . B.intercalate "\n" . map convRules . Map.toList . Map.fromList . filter (\(_, b) -> length b < 6). map toPair $ ll