module BasicCompiler where

import "/home/ralf/hudak_soe/HUnit-1.0/HUnit" 
-- import HUnit
import Maybe


{-
data Decl = VarDecl Symbol Type 
         | FunDecl Symbol [Type] Type -- has a list of types for params and return value 

data Type = TInteger 
         | TString 
         | TFun -- is 
-- FIX: add some more types here, floats and such



data Expr    = Concat Symbol Symbol -- Only works for strings 
            | Add    Symbol Symbol -- Only works for ints 
            | Assign Symbol Expr   -- Expr can't be a program. 
            | SSymbol Symbol -- A symbol can be an expression
            | PProgram Program -- Programs can be composed of other programs
            | FFunctionCall Symbol [Expr] 
-- FIX: add stuff like "mult" and such, as well as "print"



-}

data Type = TInteger
          | TString 
          | TFun Type {- type of return value -} [Type] {- type of params -}
	  | TSomeFun
           deriving (Show, Eq)


-- ------------------------------------------------------------
-- * Symbols
-- ------------------------------------------------------------

-- |For the sake of abstraction
type Symbol = String

-- |A declaration is just associating a symbol with a type.
data Decl = Decl Symbol Type deriving (Show)
-- | FunDecl Symbol [Type]

-- |A symbol table is a simple mapping from symbols to types.
type SymbolTable  = [(Symbol, Type)]

-- |In reality, we deail with several symbol tables, each for nested
-- scopes!  That means that sub-programs' declarations should "shadow"
-- super-program declarations.  It behaves like a stack.
type SymbolTables = [SymbolTable]

-- |Look into our symbol tables for the type of this symbol.
getType :: Symbol -> SymbolTables -> Maybe Type
getType _ [] = Nothing
getType s (h:t) = case lookup s h of
                  Nothing -> getType s t
                  justT   -> justT


-- |Given a set of declarations, build a symbol table
buildSymbolTables :: SymbolTables -- ^Input tables from previous scopes
                 -> [Decl]       -- ^Declarations from this scope
                 -> SymbolTables
buildSymbolTables inTables decls
    = [(s,t) | Decl s t <- decls]:inTables



unBuildSymbolTable :: SymbolTable -> [Decl]
unBuildSymbolTable table = [Decl s t | (s,t) <- table]

unBuildSymbolTables :: SymbolTables -> [Decl]
unBuildSymbolTables tables = concatMap unBuildSymbolTable tables

--buildDecls :: SymbolTables -> [Decl] -> [Decl]
--buildDecls table decls = (unBuildSymbolTables table):decls

-- ------------------------------------------------------------
-- * Programs
-- ------------------------------------------------------------

-- |A program starts off with some declarations, and then we get into
-- the meat of it with the expressions!
data Program = Program [Decl] [Expr] deriving (Show)

-- |Type check a whole program.
typeCheck :: Program  -- ^The program to check
          -> [String] -- ^empty if it typechecks, list of errors if it doesn't

typeCheck (Program decls exprs) 
    = concatMap (typeCheckExpr (buildSymbolTables [] decls)) exprs
	 


-- ------------------------------------------------------------
-- * Expressions
-- ------------------------------------------------------------

-- |There are only a few kinds of expressions.  Concat among strings,
-- add among ints, and assignment.  Programs can consist of other
-- programs!  This is a little like a "where" clause, though this is
-- much simplified in order to limit the number of expressions you
-- might have.

data Expr    = Concat Symbol Symbol -- Only works for strings
             | Add    Symbol Symbol -- Only works for ints
	     | Mult   Symbol Symbol 
             | Assign Symbol Expr   -- Expr can't be a program.
             | SSymbol Symbol -- A symbol can be an expression
             | PProgram Program -- Programs can be composed of other programs
	     | FFunctionCall Symbol [Expr] deriving (Show)


-- |Get the type of this expression.  One of these is not implemented!
typeOf :: SymbolTables -> Expr -> Maybe Type
typeOf _ (Concat _ _) = Just TString
typeOf _ (Add _ _)    = Just TInteger
typeOf t (Assign _ e) = typeOf t e
typeOf t (SSymbol s)  = getType s t
typeOf t1 (PProgram (Program ds es)) = if  (null es) 
				       then Nothing
				       else typeOf (buildSymbolTables t1 ds) (last es)

-- | Error function section
-- |error when s is not the right type
unknownIdentifier :: (Show a) => a -> String
unknownIdentifier s = "Unknown identifier: " ++ (show s)

notWhatIExpected :: Symbol -> Type -- actual type
		           -> Type -- expected type
                           -> String 
notWhatIExpected s t1 t2 
    = s ++ " has type " ++ (show t1) ++ "; expecting type " ++ (show t2)

-- |Type check an expression.  Return a list of errors if there are
-- any problems.

-- |An abstraction
typeCheckBinaryExpr :: SymbolTables  -- ^The context in which we're type checking.
              -> Symbol -> Symbol          -- ^The two arguments to the binary expression
	      -> Type          -- expected type
              -> [String]      -- ^A list of errors. None if empty

typeCheckBinaryExpr table s1 s2 expectedType
    = case ((getType s1 table), (getType s2 table)) of
      (Just t1, Just t2) -> if (t1 == expectedType) 
                            then if  (t2 == expectedType)
				 then []
				 else [notWhatIExpected s2 t2 expectedType]
                            else [notWhatIExpected s1 t1 expectedType ]
      (Nothing, _)      -> [unknownIdentifier s1]
      (_, Nothing)      -> [unknownIdentifier s2]


typeCheckExpr :: SymbolTables  -- ^The context in which we're type checking.
              -> Expr          -- ^The expression to check
              -> [String]      -- ^A list of errors. None if empty

typeCheckExpr table (Concat s1 s2)
    = typeCheckBinaryExpr table s1 s2 TString

typeCheckExpr table (Add s1 s2)
    = typeCheckBinaryExpr table s1 s2 TInteger

typeCheckExpr table (Mult s1 s2)
    = typeCheckBinaryExpr table s1 s2 TInteger


typeCheckExpr table (Assign s e)
    =  case ((getType s table), (typeOf table e)) of
       (Just ts, Just te) -> if (ts == te) 
			     then  (typeCheckExpr table e)
			     else [(notWhatIExpected s ts te)]
       (Nothing, _)      -> [unknownIdentifier s]
       (_, Nothing)      -> [unknownIdentifier e] -- this line is not tested yet. 



typeCheckExpr table (FFunctionCall s es)
-- errors must be cleaned up.
    = case (getType s table) of 
      Just (TFun x xs) ->  typeCheckExprList ++ checkRecursively xs es
	  where 
	  checkThatTheyMatch :: [String]
	  checkThatTheyMatch = checkRecursively xs es
	  checkRecursively :: [Type] -> [Expr] -> [String]
	  checkRecursively [] [] = []
	  checkRecursively  (y:ys) [] = ["expecting more expressions in "++show s ]
	  checkRecursively  [] (f:fs) = ["expecting less expressions in "++show s]
	  checkRecursively  (y:ys) (f:fs) 
			    = case (typeOf table f) of 
			      Just tf -> if y == tf 
					 then checkRecursively ys fs 
					 else (misMatch y tf f):(checkRecursively ys fs)
			      Nothing -> ["f is a bad expression"]
	  typeCheckExprList :: [String] 
	  typeCheckExprList = concatMap (typeCheckExpr table) es
	  misMatch :: Type -> Type -> Expr -> String
	  misMatch y tf f = "in " ++ show f ++ " types do not match."
      Just t  -> [notWhatIExpected s t (TSomeFun)]
      Nothing -> [unknownIdentifier s]



typeCheckExpr table (SSymbol s)
 = maybe ["Unknown identifier: " ++ s] (\_ -> []) (getType s table)


typeCheckExpr table (PProgram (Program decls exprs))
-- You could use typeCheck here, but to make that call, you would have to 
-- build a list of decls and that's non-trivial because you'd have to collapse
-- a heirarchy of scopes into one list of declarations.
    = concatMap (typeCheckExpr (buildSymbolTables table decls)) exprs

-- | example of where I think expression should derive show. 
-- typeCheckExpr _ _ = ["Syntax error: irregular expression."] 


-- ------------------------------------------------------------
-- * Main and Testing
-- ------------------------------------------------------------


testTable :: SymbolTables
testTable = [
             [("string1", TString), 
	      ("string2", TString), 
	      ("integer1", TInteger), 
              ("integer2", TInteger)]
              ]

testTable2 :: SymbolTables
testTable2 = [
	     [("a", TString),
	      ("b", TString),
	      ("c", TString),
	      ("x", TInteger),
	      ("y", TInteger),
	      ("z", TInteger)
	     ]
	    ]

-- has more depth
testTable3 :: SymbolTables
testTable3 = [
	     [("a", TString),
	      ("b", TString),
	      ("c", TString),
	      ("x", TInteger),
	      ("y", TInteger),
	      ("z", TInteger)
	     ],
	     [
	      ("d", TString),  -- does not conflict most local scope
	      ("a", TInteger)  -- conflicts most local scope
	     ],
	     [
	      ("e", TString) -- two scopes deep. 
	     ]
	    ]
-- | testTable3 with a functions. 
testTable3wf :: SymbolTables
testTable3wf = [
	     [("a", TString),
	      ("b", TString),
	      ("c", TString),
	      ("x", TInteger),
	      ("y", TInteger),
	      ("z", TInteger)
	     ],
	     [
	      ("d", TString),  -- does not conflict most local scope
	      ("a", TInteger)  -- conflicts most local scope
	     ],
	     [
	      ("e", TString), -- two scopes deep. 
	      ("f'", (TFun TString [])),
	      ("f", (TFun TString [TString, TInteger])) -- function call
	     ]
	    ]

testProgram :: Program
testProgram = Program [(Decl "a" TString),
		       (Decl "b" TString),
		       (Decl "c" TString),
		       (Decl "x" TInteger),
		       (Decl "y" TInteger),
		       (Decl "z" TInteger)
		       		      ]
	              [(Add "x" "y"),
		       (Concat "a" "b")]

addStringProgram :: Program
addStringProgram = Program [(Decl "a" TString),
		       (Decl "b" TString),
		       (Decl "c" TString),
		       (Decl "x" TInteger),
		       (Decl "y" TInteger),
		       (Decl "z" TInteger)
		       		      ]
	              [(Add "x" "a"),
		       (Concat "a" "b")]


-- Meant to be called with testTable3wf
-- I.e. typeCheckExpr testTable3wf testFunctionCall1

testFunctionCall1 :: Expr
testFunctionCall1 = FFunctionCall "f" [(SSymbol "a"), (Add "x" "y")]


-- call with empty string
testFunctionCall2 :: Expr
testFunctionCall2 = FFunctionCall "f'" [] -- note f', not f

-- bind to variable of wrong type.
testFunctionCall3 :: Expr
testFunctionCall3 = FFunctionCall "a" []

-- Too few arguments in call
testFunctionCall4 :: Expr
testFunctionCall4 = FFunctionCall "f" [(SSymbol "a")]

-- Too many arguments in call
testFunctionCall5 :: Expr
testFunctionCall5 = FFunctionCall "f" [(SSymbol "a"), (Add "x" "y"), (Concat "a" "b")]

-- Bind to non-existant variable
testFunctionCall6 :: Expr
testFunctionCall6 = FFunctionCall "flkjlk" [(SSymbol "a"), (Add "x" "y")]

-- Ill formed expression 
testFunctionCall7 :: Expr
testFunctionCall7 = FFunctionCall "f" [(SSymbol "a"), (Add "a" "y")]





{- |Other ideas for tests. 
a test which will produce multiple of errors. 
a program which is incorrect. 
-}

tests :: Test
tests = test [ 
	      "test0" ~: "concat TStrings" ~: [] ~=? 
	      typeCheckExpr testTable (Concat "string1" "string2"), 



	      "test0.5" ~: "s1 not TString" ~:  ["integer1 has type TInteger; expecting type TString"] ~=? 
	      typeCheckExpr testTable (Concat "integer1" "string2"), 

              
        
	      "test1" ~: "s2 not TString" ~: ["integer2 has type TInteger; expecting type TString"] ~=? 
	      typeCheckExpr testTable (Concat "string1" "integer2"), 
	      "test2" ~: "concat TIntegers" ~: ["integer1 has type TInteger; expecting type TString"] ~=?
	      typeCheckExpr testTable (Concat "integer1" "integer2"),

	      "test5" ~: "add TIntegers" ~: [] ~=? 
	      typeCheckExpr testTable (Add "integer1" "integer2"), 
	      "test6" ~: "s1 not TInteger" ~: ["string1 has type TString; expecting type TInteger"] ~=? 
	      typeCheckExpr testTable (Add "string1" "integer2"), 
	      
              "test7" ~: "s2 not TInteger" ~: ["string2 has type TString; expecting type TInteger"] ~=? 
	      typeCheckExpr testTable (Add "integer1" "string2"), 
	      
	      -- The possible error in this case is ambiguous. 
	      -- FIX: should you demand that both errors be given? 
              "test8" ~: "add TStrings" ~: ["string1 has type TString; expecting type TInteger"] ~=?
	      typeCheckExpr testTable (Add "string1" "string2"),
	      
              "test9" ~: "assign to (Concat _ _)" ~: [] ~=?
	      typeCheckExpr testTable2 (Assign "a" (Concat "b" "c")),
	      
              "test10" ~: "assign to (Add _ _)" ~: [] ~=?
	      typeCheckExpr testTable2 (Assign "x" (Add "y" "z")),
	      
	      -- try to assign a TInteger symbol to (Concat _ _)
              "test11" ~: "assign int to (Concat _ _)" ~: 
			   ["x has type TInteger; expecting type TString"] ~=?
	      typeCheckExpr testTable2 (Assign "x" (Concat "a" "b")), 

	      -- try to assign a TString symbol to (Add _ _)
	      "test12" ~: "assign string to (Add _ _)" ~: 
			   ["a has type TString; expecting type TInteger"] ~=?
	      typeCheckExpr testTable2 (Assign "a" (Add "x" "y")),	      

              -- (Symbol s) where s is defined
              "test13" ~: "typeCheckExpr (Symbol s)" ~: [] ~=?
	      typeCheckExpr testTable2 (SSymbol "a"),

              -- (Symbol s) where s is UNdefined
              "test13" ~: "typeCheckExpr (Symbol s)" ~: ["Unknown identifier: d"] ~=?
	      typeCheckExpr testTable2 (SSymbol "d"),
	      
	      -- a variable in the next most local scope conflicts with 
	      --   a variable in the very most local scope. 
              "test14" ~: "conflicts local scope" ~: [] ~=?
	      typeCheckExpr testTable3 (SSymbol "a"),
	      
	      -- a variable in the next most local scope does not conflict with 
	      --   a variable in the very most local scope. 
              "test15" ~: "assign to (Concat _ _)" ~: [] ~=?
	      typeCheckExpr testTable3 (SSymbol "d"),
	      
	      -- a variable two scopes deep. 
              "test16" ~: "assign to (Concat _ _)" ~: [] ~=?
	      typeCheckExpr testTable3 (SSymbol "e"),
	

              -- check the test program
              "test17" ~: "check the test program" ~: [] ~=?
	      typeCheck testProgram,
 
             -- check the "add strings" test program
              "test18" ~: "check the `add strings' test program" ~: ["a has type TString; expecting type TInteger"] ~=?
	      typeCheck addStringProgram,

              -- first test for FFunctionCall
              "test19" ~: "a successful function call" ~: [] ~=?
	      typeCheckExpr testTable3wf testFunctionCall2,

              -- test for FFunctionCall with empty params.
              "test20" ~: "normal function which takes no params" ~: [] ~=?
	      typeCheckExpr testTable3wf testFunctionCall1,

              -- test for FFunctionCall assigned to string.
              "test21" ~: "bound to Symbol with type TString instead of TFun" ~: ["a has type TString; expecting type TSomeFun"] ~=?
	      typeCheckExpr testTable3wf testFunctionCall3,

              -- test for FFunctionCall with one too few arguments
              "test22" ~: "too few arguments" ~: ["expecting more expressions in \"f\""] ~=?
	      typeCheckExpr testTable3wf testFunctionCall4,

              -- test for FFunctionCall one too many arguments.
              "test23" ~: "too many arguments" ~: ["expecting less expressions in \"f\""] ~=?
	      typeCheckExpr testTable3wf testFunctionCall5,

              -- test for FFunctionCall bound to non-existant variable
              "test24" ~: "non existant variable. " ~: ["Unknown identifier: \"flkjlk\""] ~=?
	      typeCheckExpr testTable3wf testFunctionCall6,

              -- ill formed expression 
              "test25" ~: "ill formed expression in param" ~: ["a has type TString; expecting type TInteger"] ~=?
	      typeCheckExpr testTable3wf testFunctionCall7
	     ]


main :: IO Counts
main = do runTestTT tests
         
{- | buildSymbolTable should really be buildSymbolTables.  This actually caused me 
vsome confusion.  note that symbolTable and symbolTables are two distinct types. 
I have made the above change, but table should be changed to tables in some cases. 

Also suggest
typeOf -> typeOfExpr
getType -> typeOfSymbol
And they should both take table as their first argument, so that statements like 
	    mts = (getType s table)
	    mte = (typeOf table e)
are more elegant.  (Or both take table as their second argument, just so that it's 
the same for both).


Also, some errors would be clearer, and the implementation of the errors more elegant 
if expressions derived Show the same way symbols do. *expressions should derive Show

I could write some really great test cases.  Right now I have a haskell program in mind to write hundreds of 'em.  But I don't want to write the drunk on the bridge program.  I want to finish this problem and move on.  I know that this is a highly simplified compiler.  By the end of this exercise, I will understand it well and be ready to work to understand it on a deeper level

TODO: 
**abstract to remove redundancy in typeCheckExpr defs
**remove fromJust and friend idioms -- replace with case statements or maybe function
**add function declarations, mult, div, print
**make expressions derive show (I don't know how to do this)
* just need to fix the tests for the new typeCheckBinaryExpr
* fix messages in typeCheckExpr for function calls
* you could always improve tests

QUESTIONS:
if I say (Add s1 s2) where s1 and s2 are strings, there are two errors since there are two arguments of the wrong type given to Add.  how do you resolve the ambiguity in error messages here?  You could demand that both errors be given, but you must also demand that they be given in order to make the test cases truly independant of implementation. 
-}