Fixed Parser issues and simplified AST

This commit is contained in:
Carter Bertolini 2024-11-21 19:08:50 -05:00
parent aa48976e31
commit 6a9f272cac
5 changed files with 251 additions and 66 deletions

View File

@ -3,10 +3,13 @@
module Main where module Main where
import qualified Data.Text.IO as T import qualified Data.Text.IO as T
import Text.Megaparsec (parseTest) import Prettyprinter
import Text.Megaparsec (parse)
import Windows12.Parser (programP) import Windows12.Parser (programP)
main :: IO () main :: IO ()
main = do main = do
test <- T.readFile "test/hello.w12" test <- T.readFile "test/hello.w12"
parseTest programP test case parse programP "test/hello.w12" test of
Left err -> print err
Right ast -> print (pretty ast)

View File

@ -1,6 +1,9 @@
{-# LANGUAGE OverloadedStrings #-}
module Windows12.Ast where module Windows12.Ast where
import Data.Text (Text) import Data.Text (Text)
import Prettyprinter
data BinOp data BinOp
= Add = Add
@ -31,6 +34,20 @@ data UnOp
| AddrOf | AddrOf
deriving (Show, Eq) deriving (Show, Eq)
data AssignOp
= BaseAssign
| AddAssign
| SubAssign
| MulAssign
| DivAssign
| ModAssign
| BitAndAssign
| BitOrAssign
| BitXorAssign
| ShiftLAssign
| ShiftRAssign
deriving (Show, Eq)
data Expr data Expr
= Id Text = Id Text
| IntLit Int | IntLit Int
@ -41,7 +58,7 @@ data Expr
| CharLit Char | CharLit Char
| BinOp BinOp Expr Expr | BinOp BinOp Expr Expr
| UnOp UnOp Expr | UnOp UnOp Expr
| Call Text [Expr] | Call Expr [Expr]
| Index Expr Expr | Index Expr Expr
| Member Expr Expr | Member Expr Expr
| Cast Type Expr | Cast Type Expr
@ -52,12 +69,11 @@ data Expr
data Stmt data Stmt
= Expr Expr = Expr Expr
| Return Expr | Return Expr
| If Expr Stmt (Maybe Stmt) | If Expr [Stmt] (Maybe [Stmt])
| While Expr Stmt | While Expr [Stmt]
| For (Maybe Stmt) (Maybe Expr) (Maybe Expr) Stmt | Assign AssignOp Expr Expr
| Assign Expr Expr
| Block [Stmt] | Block [Stmt]
| Var Bind Expr | Var Text (Maybe Type) (Maybe Expr)
deriving (Show, Eq) deriving (Show, Eq)
data Type data Type
@ -86,11 +102,112 @@ data TLEnum = Enum {enumName :: Text, enumFields :: [Text]}
data TLFunc = Func {funcName :: Text, funcArgs :: [Bind], funcRetType :: Type, funcBody :: [Stmt]} data TLFunc = Func {funcName :: Text, funcArgs :: [Bind], funcRetType :: Type, funcBody :: [Stmt]}
deriving (Show, Eq) deriving (Show, Eq)
data TLMemberFunc = MemberFunc {memberFuncSelf :: Type, memberFuncName :: Text, memberFuncArgs :: [Bind], memberFuncRetType :: Type, memberFuncBody :: [Stmt]} data TL = TLStruct TLStruct | TLEnum TLEnum | TLFunc TLFunc
deriving (Show, Eq) deriving (Show, Eq)
data TL = TLStruct TLStruct | TLEnum TLEnum | TLFunc TLFunc | TLMemberFunc TLMemberFunc data Program = Program [TLStruct] [TLEnum] [TLFunc]
deriving (Show, Eq) deriving (Show, Eq)
data Program = Program [TLStruct] [TLEnum] [TLFunc] [TLMemberFunc] -- Pretty printing
deriving (Show, Eq) instance Pretty BinOp where
pretty Add = "+"
pretty Sub = "-"
pretty Mul = "*"
pretty Div = "/"
pretty Mod = "%"
pretty Eq = "=="
pretty Ne = "!="
pretty Lt = "<"
pretty Gt = ">"
pretty Le = "<="
pretty Ge = ">="
pretty And = "&&"
pretty Or = "||"
pretty BitAnd = "&"
pretty BitOr = "|"
pretty BitXor = "^"
pretty ShiftL = "<<"
pretty ShiftR = ">>"
instance Pretty UnOp where
pretty Neg = "-"
pretty Not = "!"
pretty BitNot = "~"
pretty Deref = "*"
pretty AddrOf = "&"
instance Pretty AssignOp where
pretty BaseAssign = "="
pretty AddAssign = "+="
pretty SubAssign = "-="
pretty MulAssign = "*="
pretty DivAssign = "/="
pretty ModAssign = "%="
pretty BitAndAssign = "&="
pretty BitOrAssign = "|="
pretty BitXorAssign = "^="
pretty ShiftLAssign = "<<="
pretty ShiftRAssign = ">>="
instance Pretty Expr where
pretty (Id x) = pretty x
pretty (IntLit x) = pretty x
pretty (UIntLit x) = pretty x
pretty (FloatLit x) = pretty x
pretty (StrLit x) = dquotes (pretty x)
pretty (BoolLit x) = pretty x
pretty (CharLit x) = squotes (pretty x)
pretty (BinOp op l r) = parens (pretty l <+> pretty op <+> pretty r)
pretty (UnOp op e) = pretty op <> parens (pretty e)
pretty (Call f args) = parens (pretty f) <> parens (hsep (punctuate comma (map pretty args)))
pretty (Index arr idx) = parens (pretty arr) <> brackets (pretty idx)
pretty (Member e m) = pretty e <> "." <> pretty m
pretty (Cast t e) = parens (pretty t) <> parens (pretty e)
pretty (Sizeof t) = "sizeof" <> parens (pretty t)
pretty (StructInit s fields) = pretty s <+> lbrace <> line <> indent 4 (vsep (punctuate comma (map (\(n, e) -> pretty n <+> "=" <+> pretty e) fields))) <> line <> rbrace
instance Pretty Stmt where
pretty (Expr e) = pretty e <> semi
pretty (Return e) = "return" <+> pretty e <> semi
pretty (If cond t f) = "if" <+> pretty cond <+> prettyBlock t <+> maybe "" (\f' -> "else" <+> prettyBlock f') f
pretty (While cond body) = "while" <+> pretty cond <+> prettyBlock body
pretty (Assign op l r) = pretty l <+> pretty op <+> pretty r <> semi
pretty (Block stmts) = braces (vsep (map pretty stmts))
pretty (Var n t e) = pretty n <+> maybe "" (\t' -> ":" <+> pretty t') t <+> maybe "" (\e' -> "=" <+> pretty e') e <> semi
instance Pretty Type where
pretty IntType = "int"
pretty UIntType = "uint"
pretty FloatType = "float"
pretty StrType = "str"
pretty BoolType = "bool"
pretty CharType = "char"
pretty (PtrType t) = pretty t <> "*"
pretty (ArrayType t) = pretty t <> "[]"
pretty (StructType s) = pretty s
pretty (EnumType e) = pretty e
pretty VoidType = "void"
instance Pretty Bind where
pretty (Bind n t) = pretty n <+> ":" <+> pretty t
instance Pretty TLStruct where
pretty (Struct n fields) = "struct" <+> pretty n <+> prettyBlock fields
instance Pretty TLEnum where
pretty (Enum n fields) = "enum" <+> pretty n <+> prettyBlock fields
instance Pretty TLFunc where
pretty (Func n args ret body) =
pretty n <> parens (hsep (punctuate comma (map pretty args))) <+> "->" <+> pretty ret <+> prettyBlock body
instance Pretty TL where
pretty (TLStruct s) = pretty s
pretty (TLEnum e) = pretty e
pretty (TLFunc f) = pretty f
instance Pretty Program where
pretty (Program structs enums funcs) = vsep (map pretty structs) <> line <> vsep (map pretty enums) <> line <> vsep (map pretty funcs)
prettyBlock :: (Pretty a) => [a] -> Doc ann
prettyBlock stmts = lbrace <> line <> indent 4 (vsep (map pretty stmts)) <> line <> rbrace

View File

@ -2,55 +2,62 @@
module Windows12.Parser (programP) where module Windows12.Parser (programP) where
import Control.Monad (void)
import Control.Monad.Combinators.Expr import Control.Monad.Combinators.Expr
import Data.Text (Text)
import Text.Megaparsec import Text.Megaparsec
import Windows12.Ast import Windows12.Ast
import Windows12.Ast (Expr (UnOp))
import Windows12.Lexer import Windows12.Lexer
opTable :: [[Operator Parser Expr]] opTable :: [[Operator Parser Expr]]
opTable = opTable =
[ [ InfixL $ Member <$ symbol ".", [ [ InfixL $ Member <$ symbol ".",
InfixL $ (\l r -> Member (UnOp Deref l) r) <$ symbol "->", InfixL $ (\l r -> Member (UnOp Deref l) r) <$ symbol "->"
Postfix $ Index <$> (symbol "[" *> exprP <* symbol "]")
], ],
[ Prefix (UnOp Neg <$ symbol "-"), [ unary (UnOp Neg) "-",
Prefix (UnOp Not <$ symbol "!"), unary (UnOp Not) "!",
Prefix (UnOp BitNot <$ symbol "~"), unary (UnOp BitNot) "~",
Prefix (UnOp Deref <$ symbol "*"), unary (UnOp Deref) "*",
Prefix (UnOp AddrOf <$ symbol "&") unary (UnOp AddrOf) "&"
], ],
[ InfixL (BinOp Mul <$ symbol "*"), [ Postfix $ flip Index <$> (symbol "[" *> exprP <* symbol "]")
InfixL (BinOp Div <$ symbol "/"),
InfixL (BinOp Mod <$ symbol "%")
], ],
[ InfixL (BinOp Add <$ symbol "+"), [ Postfix $ flip Call <$> parens (sepBy exprP (symbol ","))
InfixL (BinOp Sub <$ symbol "-")
], ],
[ InfixL (BinOp ShiftL <$ symbol "<<"), [ infixL' Mul "*",
InfixL (BinOp ShiftR <$ symbol ">>") infixL' Div "/",
infixL' Mod "%"
], ],
[ InfixL (BinOp Lt <$ symbol "<"), [ infixL' Add "+",
InfixL (BinOp Gt <$ symbol ">"), infixL' Sub "-"
InfixL (BinOp Le <$ symbol "<="),
InfixL (BinOp Ge <$ symbol ">=")
], ],
[ InfixL (BinOp Eq <$ symbol "=="), [ infixL' ShiftL "<<",
InfixL (BinOp Ne <$ symbol "!=") infixL' ShiftR ">>"
], ],
[ InfixL (BinOp BitAnd <$ symbol "&") [ infixL Le "<=",
infixL Lt "<",
infixL Ge ">=",
infixL Gt ">"
], ],
[ InfixL (BinOp BitXor <$ symbol "^") [ infixL Eq "==",
infixL Ne "!="
], ],
[ InfixL (BinOp BitOr <$ symbol "|") [ infixL' BitAnd "&"
], ],
[ InfixL (BinOp And <$ symbol "&&") [ infixL' BitXor "^"
], ],
[ InfixL (BinOp Or <$ symbol "||") [ infixL' BitOr "|"
],
[ infixL And "&&"
],
[ infixL Or "||"
] ]
] ]
where
unary op sym = Prefix $ foldr1 (.) <$> some (op <$ symbol sym)
infixL op sym = InfixL $ BinOp op <$ symbol sym
infixL' op sym = InfixL $ BinOp op <$ operator sym
infixR op sym = InfixR $ BinOp op <$ symbol sym
operator sym = lexeme $ try $ (symbol sym <* notFollowedBy opChar)
opChar = oneOf ("+-*/%<>&|^=!~" :: [Char])
termP :: Parser Expr termP :: Parser Expr
termP = termP =
@ -62,8 +69,7 @@ termP =
<|> BoolLit <$> (reserved "true" *> pure True <|> reserved "false" *> pure False) <|> BoolLit <$> (reserved "true" *> pure True <|> reserved "false" *> pure False)
<|> CharLit <$> charLiteral <|> CharLit <$> charLiteral
<|> try (Sizeof <$> (reserved "sizeof" *> typeP)) <|> try (Sizeof <$> (reserved "sizeof" *> typeP))
<|> try (Cast <$> (reserved "cast" *> typeP) <*> parens exprP) <|> try (Cast <$> (parens typeP) <*> termP)
<|> try (Call <$> identifier <*> parens (sepBy exprP (symbol ",")))
<|> try (StructInit <$> identifier <*> braces (sepEndBy1 ((,) <$> identifier <* symbol ":" <*> exprP) (symbol ","))) <|> try (StructInit <$> identifier <*> braces (sepEndBy1 ((,) <$> identifier <* symbol ":" <*> exprP) (symbol ",")))
<|> Id <$> identifier <|> Id <$> identifier
@ -90,21 +96,32 @@ typeP = do
<|> StructType <$> identifier <|> StructType <$> identifier
foldr (const PtrType) t <$> many (symbol "*") foldr (const PtrType) t <$> many (symbol "*")
assignP :: Parser Stmt
assignP = do
lhs <- exprP
op <-
AddAssign <$ symbol "+="
<|> SubAssign <$ symbol "-="
<|> MulAssign <$ symbol "*="
<|> DivAssign <$ symbol "/="
<|> ModAssign <$ symbol "%="
<|> BitAndAssign <$ symbol "&="
<|> BitOrAssign <$ symbol "|="
<|> BitXorAssign <$ symbol "^="
<|> ShiftLAssign <$ symbol "<<="
<|> ShiftRAssign <$ symbol ">>="
<|> BaseAssign <$ symbol "="
Assign op lhs <$> exprP <* symbol ";"
stmtP :: Parser Stmt stmtP :: Parser Stmt
stmtP = stmtP =
Return <$> (reserved "return" *> exprP <* symbol ";") Return <$> (reserved "return" *> exprP <* symbol ";")
<|> (If <$> (reserved "if" *> parens exprP) <*> stmtP <*> optional (reserved "else" *> stmtP)) <|> (If <$> (reserved "if" *> exprP) <*> braces (many stmtP) <*> optional (reserved "else" *> braces (many stmtP)))
<|> (While <$> (reserved "while" *> parens exprP) <*> stmtP) <|> (While <$> (reserved "while" *> parens exprP) <*> braces (many stmtP))
<|> ( For <|> (Var <$> (reserved "var" *> identifier) <*> optional (symbol ":" *> typeP) <*> optional (symbol "=" *> exprP) <* symbol ";")
<$> (reserved "for" *> symbol "(" *> optional stmtP <* symbol ";") <|> try assignP
<*> (optional exprP <* symbol ";") <|> Expr <$> exprP <* symbol ";"
<*> (optional exprP <* symbol ")") <|> Block <$> braces (many stmtP)
<*> stmtP
)
<|> (Var <$> (reserved "var" *> (Bind <$> identifier <* symbol ":" <*> typeP)) <* symbol "=" <*> exprP <* symbol ";")
<|> (Assign <$> exprP <* symbol "=" <*> exprP <* symbol ";")
-- <|> Block <$> braces (many stmtP)
funcP :: Parser TLFunc funcP :: Parser TLFunc
funcP = do funcP = do
@ -122,7 +139,7 @@ enumP = do
fields <- braces (sepEndBy1 identifier (symbol ",")) fields <- braces (sepEndBy1 identifier (symbol ","))
return $ Enum name fields return $ Enum name fields
memberFuncP :: Parser TLMemberFunc memberFuncP :: Parser TLFunc
memberFuncP = do memberFuncP = do
reserved "fn" reserved "fn"
reserved "on" reserved "on"
@ -131,17 +148,16 @@ memberFuncP = do
args <- parens (sepBy (Bind <$> identifier <* symbol ":" <*> typeP) (symbol ",")) args <- parens (sepBy (Bind <$> identifier <* symbol ":" <*> typeP) (symbol ","))
retType <- (symbol "->" *> typeP) <|> pure VoidType retType <- (symbol "->" *> typeP) <|> pure VoidType
body <- braces (many stmtP) body <- braces (many stmtP)
return $ MemberFunc self name args retType body return $ Func name (Bind "self" self : args) retType body
organize :: [TL] -> Program organize :: [TL] -> Program
organize tls = Program structs enums funcs memberFuncs organize tls = Program structs enums funcs
where where
structs = [s | TLStruct s <- tls] structs = [s | TLStruct s <- tls]
enums = [e | TLEnum e <- tls] enums = [e | TLEnum e <- tls]
funcs = [f | TLFunc f <- tls] funcs = [f | TLFunc f <- tls]
memberFuncs = [mf | TLMemberFunc mf <- tls]
programP :: Parser Program programP :: Parser Program
programP = between sc eof $ do programP = between sc eof $ do
tls <- many (TLStruct <$> structP <|> TLEnum <$> enumP <|> TLMemberFunc <$> memberFuncP <|> TLFunc <$> funcP) tls <- many (TLStruct <$> structP <|> TLEnum <$> enumP <|> try (TLFunc <$> memberFuncP) <|> TLFunc <$> funcP)
return $ organize tls return $ organize tls

View File

@ -1,16 +1,64 @@
# Create an enum # Create an enum
enum AnimalType { enum AnimalType {
Dog, Dog,
Cat, Cat,
} }
# Create a struct # Create a struct
struct Pet { struct Pet {
name: [char], # A list of characters name: [char], # A list of characters
age: uint, # An unsigned 32-bit integer age: uint, # An unsigned 32-bit integer
type: AnimalType, type: AnimalType,
living: bool, living: bool,
} }
# Create a function that can be called on a Pet # Create a function that can be called on a Pet
fn on Pet rename(newName: [char]) { fn on Pet rename(newName: [char]) {
return "test"; self.name = newName;
}
# Create another struct
struct Person {
pet: Pet,
name: [char],
age: uint,
living: bool,
}
fn on Person growUp() {
self.age += 1;
}
fn main() -> int {
# Create an instance of Pet
# "let" creates an immutable binding
var dog = Pet {
name: "Fido",
age: 3,
type: AnimalType.Dog,
};
# Create a (variable) instance of Person
# "var" creates a mutable binding
var person = Person {
pet: dog,
name: "Fred",
age: 41,
};
# Create a new name for the Pet
var new_name = "George";
person.pet.rename(new_name);
# Print out the person's name and age
# Uses C for I/O
printf("Person %s is %u years old.\n", person.name, person.age);
if person.age % 2 == 0 {
printf("Age is even\n");
} else {
printf("Age is odd\n");
}
*test[12](3);
return 0;
} }

View File

@ -79,6 +79,7 @@ executable windows12
megaparsec >= 9.6.1 && < 9.7, megaparsec >= 9.6.1 && < 9.7,
text >= 1.2.5 && < 1.3, text >= 1.2.5 && < 1.3,
parser-combinators >= 1.3.0 && < 1.4, parser-combinators >= 1.3.0 && < 1.4,
prettyprinter >= 1.7.1 && < 1.8,
-- Directories containing source files. -- Directories containing source files.
hs-source-dirs: src hs-source-dirs: src