{-# LANGUAGE TemplateHaskell #-}
-- | The hash algorithm used for Candid field names
--
-- Also includes a function that tries to reverse the hash, first using an
-- English word list, and then a brute force approach.
module Codec.Candid.Hash
  ( candidHash
  , invertHash
  ) where

import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.ByteString.Lazy as BS
import qualified Data.IntMap as M
import Data.Maybe
import Data.Char
import Data.Word
import Data.FileEmbed

-- | The Candid field label hashing algorithm
candidHash :: T.Text -> Word32
candidHash :: Text -> Word32
candidHash Text
s = (Word32 -> Word8 -> Word32) -> Word32 -> ByteString -> Word32
forall a. (a -> Word8 -> a) -> a -> ByteString -> a
BS.foldl (\Word32
h Word8
c -> Word32
h Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
* Word32
223 Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ Word8 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
c) Word32
0 (ByteString -> Word32) -> ByteString -> Word32
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString
BS.fromStrict (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ Text -> ByteString
T.encodeUtf8 Text
s

-- | Inversion of the Candid field label hash
invertHash :: Word32 -> Maybe T.Text
invertHash :: Word32 -> Maybe Text
invertHash Word32
w32 | Word32
w32 Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
< Word32
32 = Maybe Text
forall a. Maybe a
Nothing
    -- leave small numbers alone, tend to be tuple indicies
invertHash Word32
w32 | Just Text
t <- Key -> IntMap Text -> Maybe Text
forall a. Key -> IntMap a -> Maybe a
M.lookup (Word32 -> Key
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
w32) IntMap Text
m  = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
t
    -- try the word list
invertHash Word32
w32 = [Text] -> Maybe Text
forall a. [a] -> Maybe a
listToMaybe [Text]
guesses
  where
    x :: Word64
x = Word32 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
w32 :: Word64

    chars :: [Char]
chars = [Char
'a'..Char
'z'] [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char
'_']
    ords :: [Word64]
ords = Word64
0 Word64 -> [Word64] -> [Word64]
forall a. a -> [a] -> [a]
: (Char -> Word64) -> [Char] -> [Word64]
forall a b. (a -> b) -> [a] -> [b]
map (Key -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Key -> Word64) -> (Char -> Key) -> Char -> Word64
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Key
ord) [Char]
chars
    init_chars :: [Char]
init_chars = [Char]
chars [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [ Char
'A'..Char
'Z' ]
    init_ords :: [Word64]
init_ords = Word64
0 Word64 -> [Word64] -> [Word64]
forall a. a -> [a] -> [a]
: (Char -> Word64) -> [Char] -> [Word64]
forall a b. (a -> b) -> [a] -> [b]
map (Key -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Key -> Word64) -> (Char -> Key) -> Char -> Word64
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Key
ord) [Char]
init_chars

    non_mod :: a -> a
non_mod a
x = a
x a -> a -> a
forall a. Num a => a -> a -> a
- (a
x a -> a -> a
forall a. Integral a => a -> a -> a
`mod` a
2a -> Key -> a
forall a b. (Num a, Integral b) => a -> b -> a
^(Key
32::Int))
    guesses :: [Text]
guesses =
        [ [Char] -> Text
T.pack ([Char] -> Text) -> [Char] -> Text
forall a b. (a -> b) -> a -> b
$ [Char] -> [Char]
forall a. [a] -> [a]
reverse [Char]
guess
        | Word64
c8 <- [Word64]
init_ords, Word64
c7 <- [Word64]
ords, Word64
c6 <- [Word64]
ords, Word64
c5 <- [Word64]
ords
        -- It seems that 8 characters are enough to invert anything
        -- (based on quickchecking)
        -- Set up so that short guesses come first
        , let high_chars :: Word64
high_chars = Word64
c5 Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
* Word64
223Word64 -> Key -> Word64
forall a b. (Num a, Integral b) => a -> b -> a
^(Key
4::Int) Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
+ Word64
c6 Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
* Word64
223Word64 -> Key -> Word64
forall a b. (Num a, Integral b) => a -> b -> a
^(Key
5::Int) Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
+ Word64
c7 Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
* Word64
223Word64 -> Key -> Word64
forall a b. (Num a, Integral b) => a -> b -> a
^(Key
6::Int) Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
+ Word64
c8 Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
* Word64
223Word64 -> Key -> Word64
forall a b. (Num a, Integral b) => a -> b -> a
^(Key
7::Int)
        , let guess :: [Char]
guess = Word64 -> [Char]
simple (Word64 -> [Char]) -> Word64 -> [Char]
forall a b. (a -> b) -> a -> b
$ Word64
x Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
+ Word64 -> Word64
forall a. Integral a => a -> a
non_mod Word64
high_chars
        , (Char -> Bool) -> [Char] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char]
init_chars) (Key -> [Char] -> [Char]
forall a. Key -> [a] -> [a]
take Key
1 [Char]
guess)
        , (Char -> Bool) -> [Char] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char]
chars) (Key -> [Char] -> [Char]
forall a. Key -> [a] -> [a]
drop Key
1 [Char]
guess)
        ]

    -- inverts the Hash if the hash was created without modulos
    -- returns string in reverse order
    simple :: Word64 -> String
    simple :: Word64 -> [Char]
simple Word64
0 = [Char]
""
    simple Word64
x = Key -> Char
chr (Word64 -> Key
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word64
b) Char -> [Char] -> [Char]
forall a. a -> [a] -> [a]
: Word64 -> [Char]
simple Word64
a
      where (Word64
a, Word64
b) = Word64
x Word64 -> Word64 -> (Word64, Word64)
forall a. Integral a => a -> a -> (a, a)
`divMod` Word64
223

-- Word list obtained from https://github.com/dwyl/english-words
wordFile :: T.Text
wordFile :: Text
wordFile = $(makeRelativeToProject "words.txt" >>= embedStringFile)

m :: M.IntMap T.Text
m :: IntMap Text
m = [(Key, Text)] -> IntMap Text
forall a. [(Key, a)] -> IntMap a
M.fromList [ (Word32 -> Key
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Text -> Word32
candidHash Text
w), Text
w) | Text
w <- [Text]
word_list ]
  where
    word_list :: [Text]
word_list = Text -> [Text]
T.lines Text
wordFile [Text] -> [Text] -> [Text]
forall a. [a] -> [a] -> [a]
++ (Text -> Text) -> [Text] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map Text -> Text
T.toTitle (Text -> [Text]
T.lines Text
wordFile)