diff --git a/src/Main.hs b/src/Main.hs index 65ae4a0..60ffc72 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -1,4 +1,12 @@ +{-# LANGUAGE OverloadedStrings #-} + module Main where +import qualified Data.Text.IO as T +import Text.Megaparsec (parseTest) +import Windows12.Parser (programP) + main :: IO () -main = putStrLn "Hello, Haskell!" +main = do + test <- T.readFile "test/hello.w12" + parseTest programP test diff --git a/src/Windows12.hs b/src/Windows12.hs new file mode 100644 index 0000000..c5f16f1 --- /dev/null +++ b/src/Windows12.hs @@ -0,0 +1,5 @@ +module Windows12 where + +import Windows12.Ast +import Windows12.Lexer +import Windows12.Parser \ No newline at end of file diff --git a/src/Windows12/Ast.hs b/src/Windows12/Ast.hs new file mode 100644 index 0000000..a2651ed --- /dev/null +++ b/src/Windows12/Ast.hs @@ -0,0 +1,96 @@ +module Windows12.Ast where + +import Data.Text (Text) + +data BinOp + = Add + | Sub + | Mul + | Div + | Mod + | Eq + | Ne + | Lt + | Gt + | Le + | Ge + | And + | Or + | BitAnd + | BitOr + | BitXor + | ShiftL + | ShiftR + deriving (Show, Eq) + +data UnOp + = Neg + | Not + | BitNot + | Deref + | AddrOf + deriving (Show, Eq) + +data Expr + = Id Text + | IntLit Int + | UIntLit Word + | FloatLit Double + | StrLit Text + | BoolLit Bool + | CharLit Char + | BinOp BinOp Expr Expr + | UnOp UnOp Expr + | Call Text [Expr] + | Index Expr Expr + | Member Expr Expr + | Cast Type Expr + | Sizeof Type + | StructInit Text [(Text, Expr)] + deriving (Show, Eq) + +data Stmt + = Expr Expr + | Return Expr + | If Expr Stmt (Maybe Stmt) + | While Expr Stmt + | For (Maybe Stmt) (Maybe Expr) (Maybe Expr) Stmt + | Assign Expr Expr + | Block [Stmt] + | Var Bind Expr + deriving (Show, Eq) + +data Type + = IntType + | UIntType + | FloatType + | StrType + | BoolType + | CharType + | PtrType Type + | ArrayType Type + | StructType Text + | EnumType Text + | VoidType + deriving (Show, Eq) + +data Bind = Bind {bindName :: Text, bindType :: Type} + deriving (Show, Eq) + +data TLStruct = Struct {structName :: Text, structFields :: [Bind]} + deriving (Show, Eq) + +data TLEnum = Enum {enumName :: Text, enumFields :: [Text]} + deriving (Show, Eq) + +data TLFunc = Func {funcName :: Text, funcArgs :: [Bind], funcRetType :: Type, funcBody :: [Stmt]} + deriving (Show, Eq) + +data TLMemberFunc = MemberFunc {memberFuncSelf :: Type, memberFuncName :: Text, memberFuncArgs :: [Bind], memberFuncRetType :: Type, memberFuncBody :: [Stmt]} + deriving (Show, Eq) + +data TL = TLStruct TLStruct | TLEnum TLEnum | TLFunc TLFunc | TLMemberFunc TLMemberFunc + deriving (Show, Eq) + +data Program = Program [TLStruct] [TLEnum] [TLFunc] [TLMemberFunc] + deriving (Show, Eq) \ No newline at end of file diff --git a/src/Windows12/Lexer.hs b/src/Windows12/Lexer.hs new file mode 100644 index 0000000..fc40f44 --- /dev/null +++ b/src/Windows12/Lexer.hs @@ -0,0 +1,82 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Windows12.Lexer where + +import Data.Text (Text) +import qualified Data.Text as T +import Data.Void +import Text.Megaparsec +import Text.Megaparsec.Char +import qualified Text.Megaparsec.Char.Lexer as L + +type Parser = Parsec Void Text + +sc :: Parser () +sc = + L.space + space1 + (L.skipLineComment "#") + (L.skipBlockComment "/*" "*/") + +lexeme :: Parser a -> Parser a +lexeme = L.lexeme sc + +symbol :: Text -> Parser Text +symbol = L.symbol sc + +charLiteral :: Parser Char +charLiteral = between (char '\'') (char '\'') L.charLiteral + +stringLiteral :: Parser Text +stringLiteral = T.pack <$> (char '"' *> manyTill L.charLiteral (char '"')) + +intLiteral :: Parser Int +intLiteral = lexeme L.decimal + +uintLiteral :: Parser Word +uintLiteral = lexeme L.decimal <* char 'u' + +floatLiteral :: Parser Double +floatLiteral = lexeme L.float + +reserved :: Text -> Parser () +reserved word = lexeme (string word *> notFollowedBy alphaNumChar) + +reservedWords :: [Text] +reservedWords = + [ "if", + "else", + "while", + "for", + "return", + "int", + "uint", + "float", + "char", + "bool", + "struct", + "sizeof", + "true", + "false", + "fn", + "on", + "var" + ] + +identifier :: Parser Text +identifier = (lexeme . try) (p >>= check) + where + p = fmap T.pack $ (:) <$> letterChar <*> many (alphaNumChar <|> char '_') + check x = + if x `elem` reservedWords + then fail $ "keyword " <> show x <> " cannot be an identifier" + else return x + +parens :: Parser a -> Parser a +parens = between (symbol "(") (symbol ")") + +braces :: Parser a -> Parser a +braces = between (symbol "{") (symbol "}") + +brackets :: Parser a -> Parser a +brackets = between (symbol "[") (symbol "]") \ No newline at end of file diff --git a/src/Windows12/Parser.hs b/src/Windows12/Parser.hs new file mode 100644 index 0000000..6d2ec84 --- /dev/null +++ b/src/Windows12/Parser.hs @@ -0,0 +1,147 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Windows12.Parser (programP) where + +import Control.Monad (void) +import Control.Monad.Combinators.Expr +import Data.Text (Text) +import Text.Megaparsec +import Windows12.Ast +import Windows12.Ast (Expr (UnOp)) +import Windows12.Lexer + +opTable :: [[Operator Parser Expr]] +opTable = + [ [ InfixL $ Member <$ symbol ".", + InfixL $ (\l r -> Member (UnOp Deref l) r) <$ symbol "->", + Postfix $ Index <$> (symbol "[" *> exprP <* symbol "]") + ], + [ Prefix (UnOp Neg <$ symbol "-"), + Prefix (UnOp Not <$ symbol "!"), + Prefix (UnOp BitNot <$ symbol "~"), + Prefix (UnOp Deref <$ symbol "*"), + Prefix (UnOp AddrOf <$ symbol "&") + ], + [ InfixL (BinOp Mul <$ symbol "*"), + InfixL (BinOp Div <$ symbol "/"), + InfixL (BinOp Mod <$ symbol "%") + ], + [ InfixL (BinOp Add <$ symbol "+"), + InfixL (BinOp Sub <$ symbol "-") + ], + [ InfixL (BinOp ShiftL <$ symbol "<<"), + InfixL (BinOp ShiftR <$ symbol ">>") + ], + [ InfixL (BinOp Lt <$ symbol "<"), + InfixL (BinOp Gt <$ symbol ">"), + InfixL (BinOp Le <$ symbol "<="), + InfixL (BinOp Ge <$ symbol ">=") + ], + [ InfixL (BinOp Eq <$ symbol "=="), + InfixL (BinOp Ne <$ symbol "!=") + ], + [ InfixL (BinOp BitAnd <$ symbol "&") + ], + [ InfixL (BinOp BitXor <$ symbol "^") + ], + [ InfixL (BinOp BitOr <$ symbol "|") + ], + [ InfixL (BinOp And <$ symbol "&&") + ], + [ InfixL (BinOp Or <$ symbol "||") + ] + ] + +termP :: Parser Expr +termP = + parens exprP + <|> IntLit <$> intLiteral + <|> UIntLit <$> uintLiteral + <|> try (FloatLit <$> floatLiteral) + <|> StrLit <$> stringLiteral + <|> BoolLit <$> (reserved "true" *> pure True <|> reserved "false" *> pure False) + <|> CharLit <$> charLiteral + <|> try (Sizeof <$> (reserved "sizeof" *> typeP)) + <|> try (Cast <$> (reserved "cast" *> typeP) <*> parens exprP) + <|> try (Call <$> identifier <*> parens (sepBy exprP (symbol ","))) + <|> try (StructInit <$> identifier <*> braces (sepEndBy1 ((,) <$> identifier <* symbol ":" <*> exprP) (symbol ","))) + <|> Id <$> identifier + +exprP :: Parser Expr +exprP = makeExprParser termP opTable + +structP :: Parser TLStruct +structP = do + reserved "struct" + name <- identifier + fields <- braces (sepEndBy (Bind <$> identifier <* symbol ":" <*> typeP) (symbol ",")) + return $ Struct name fields + +typeP :: Parser Type +typeP = do + t <- + ArrayType <$> (brackets typeP) + <|> IntType <$ reserved "int" + <|> UIntType <$ reserved "uint" + <|> FloatType <$ reserved "float" + <|> StrType <$ reserved "str" + <|> BoolType <$ reserved "bool" + <|> CharType <$ reserved "char" + <|> StructType <$> identifier + foldr (const PtrType) t <$> many (symbol "*") + +stmtP :: Parser Stmt +stmtP = + Return <$> (reserved "return" *> exprP <* symbol ";") + <|> (If <$> (reserved "if" *> parens exprP) <*> stmtP <*> optional (reserved "else" *> stmtP)) + <|> (While <$> (reserved "while" *> parens exprP) <*> stmtP) + <|> ( For + <$> (reserved "for" *> symbol "(" *> optional stmtP <* symbol ";") + <*> (optional exprP <* symbol ";") + <*> (optional exprP <* symbol ")") + <*> stmtP + ) + <|> (Var <$> (reserved "var" *> (Bind <$> identifier <* symbol ":" <*> typeP)) <* symbol "=" <*> exprP <* symbol ";") + <|> (Assign <$> exprP <* symbol "=" <*> exprP <* symbol ";") + +-- <|> Block <$> braces (many stmtP) + +funcP :: Parser TLFunc +funcP = do + reserved "fn" + name <- identifier + args <- parens (sepBy (Bind <$> identifier <* symbol ":" <*> typeP) (symbol ",")) + retType <- (symbol "->" *> typeP) <|> pure VoidType + body <- braces (many stmtP) + return $ Func name args retType body + +enumP :: Parser TLEnum +enumP = do + reserved "enum" + name <- identifier + fields <- braces (sepEndBy1 identifier (symbol ",")) + return $ Enum name fields + +memberFuncP :: Parser TLMemberFunc +memberFuncP = do + reserved "fn" + reserved "on" + self <- typeP + name <- identifier + args <- parens (sepBy (Bind <$> identifier <* symbol ":" <*> typeP) (symbol ",")) + retType <- (symbol "->" *> typeP) <|> pure VoidType + body <- braces (many stmtP) + return $ MemberFunc self name args retType body + +organize :: [TL] -> Program +organize tls = Program structs enums funcs memberFuncs + where + structs = [s | TLStruct s <- tls] + enums = [e | TLEnum e <- tls] + funcs = [f | TLFunc f <- tls] + memberFuncs = [mf | TLMemberFunc mf <- tls] + +programP :: Parser Program +programP = between sc eof $ do + tls <- many (TLStruct <$> structP <|> TLEnum <$> enumP <|> TLMemberFunc <$> memberFuncP <|> TLFunc <$> funcP) + return $ organize tls \ No newline at end of file diff --git a/test/hello.w12 b/test/hello.w12 new file mode 100644 index 0000000..4b456e8 --- /dev/null +++ b/test/hello.w12 @@ -0,0 +1,16 @@ +# Create an enum +enum AnimalType { +Dog, +Cat, +} +# Create a struct +struct Pet { +name: [char], # A list of characters +age: uint, # An unsigned 32-bit integer +type: AnimalType, +living: bool, +} +# Create a function that can be called on a Pet +fn on Pet rename(newName: [char]) { + return "test"; +} \ No newline at end of file diff --git a/windows12.cabal b/windows12.cabal index 2ee9a6b..cd5a1f5 100644 --- a/windows12.cabal +++ b/windows12.cabal @@ -63,7 +63,11 @@ executable windows12 main-is: Main.hs -- Modules included in this executable, other than Main. - -- other-modules: + other-modules: + Windows12 + Windows12.Ast + Windows12.Lexer + Windows12.Parser -- LANGUAGE extensions used by modules in this package. -- other-extensions: @@ -72,7 +76,9 @@ executable windows12 build-depends: base >= 4.15.1 && < 4.16, llvm-hs-pure >= 9.0.0 && < 9.1, - + megaparsec >= 9.6.1 && < 9.7, + text >= 1.2.5 && < 1.3, + parser-combinators >= 1.3.0 && < 1.4, -- Directories containing source files. hs-source-dirs: src