Fixed Parser issues and simplified AST

This commit is contained in:
Carter Bertolini 2024-11-21 19:08:50 -05:00
parent aa48976e31
commit 6a9f272cac
5 changed files with 251 additions and 66 deletions

View File

@ -3,10 +3,13 @@
module Main where
import qualified Data.Text.IO as T
import Text.Megaparsec (parseTest)
import Prettyprinter
import Text.Megaparsec (parse)
import Windows12.Parser (programP)
main :: IO ()
main = do
test <- T.readFile "test/hello.w12"
parseTest programP test
case parse programP "test/hello.w12" test of
Left err -> print err
Right ast -> print (pretty ast)

View File

@ -1,6 +1,9 @@
{-# LANGUAGE OverloadedStrings #-}
module Windows12.Ast where
import Data.Text (Text)
import Prettyprinter
data BinOp
= Add
@ -31,6 +34,20 @@ data UnOp
| AddrOf
deriving (Show, Eq)
data AssignOp
= BaseAssign
| AddAssign
| SubAssign
| MulAssign
| DivAssign
| ModAssign
| BitAndAssign
| BitOrAssign
| BitXorAssign
| ShiftLAssign
| ShiftRAssign
deriving (Show, Eq)
data Expr
= Id Text
| IntLit Int
@ -41,7 +58,7 @@ data Expr
| CharLit Char
| BinOp BinOp Expr Expr
| UnOp UnOp Expr
| Call Text [Expr]
| Call Expr [Expr]
| Index Expr Expr
| Member Expr Expr
| Cast Type Expr
@ -52,12 +69,11 @@ data Expr
data Stmt
= Expr Expr
| Return Expr
| If Expr Stmt (Maybe Stmt)
| While Expr Stmt
| For (Maybe Stmt) (Maybe Expr) (Maybe Expr) Stmt
| Assign Expr Expr
| If Expr [Stmt] (Maybe [Stmt])
| While Expr [Stmt]
| Assign AssignOp Expr Expr
| Block [Stmt]
| Var Bind Expr
| Var Text (Maybe Type) (Maybe Expr)
deriving (Show, Eq)
data Type
@ -86,11 +102,112 @@ data TLEnum = Enum {enumName :: Text, enumFields :: [Text]}
data TLFunc = Func {funcName :: Text, funcArgs :: [Bind], funcRetType :: Type, funcBody :: [Stmt]}
deriving (Show, Eq)
data TLMemberFunc = MemberFunc {memberFuncSelf :: Type, memberFuncName :: Text, memberFuncArgs :: [Bind], memberFuncRetType :: Type, memberFuncBody :: [Stmt]}
data TL = TLStruct TLStruct | TLEnum TLEnum | TLFunc TLFunc
deriving (Show, Eq)
data TL = TLStruct TLStruct | TLEnum TLEnum | TLFunc TLFunc | TLMemberFunc TLMemberFunc
data Program = Program [TLStruct] [TLEnum] [TLFunc]
deriving (Show, Eq)
data Program = Program [TLStruct] [TLEnum] [TLFunc] [TLMemberFunc]
deriving (Show, Eq)
-- Pretty printing
instance Pretty BinOp where
pretty Add = "+"
pretty Sub = "-"
pretty Mul = "*"
pretty Div = "/"
pretty Mod = "%"
pretty Eq = "=="
pretty Ne = "!="
pretty Lt = "<"
pretty Gt = ">"
pretty Le = "<="
pretty Ge = ">="
pretty And = "&&"
pretty Or = "||"
pretty BitAnd = "&"
pretty BitOr = "|"
pretty BitXor = "^"
pretty ShiftL = "<<"
pretty ShiftR = ">>"
instance Pretty UnOp where
pretty Neg = "-"
pretty Not = "!"
pretty BitNot = "~"
pretty Deref = "*"
pretty AddrOf = "&"
instance Pretty AssignOp where
pretty BaseAssign = "="
pretty AddAssign = "+="
pretty SubAssign = "-="
pretty MulAssign = "*="
pretty DivAssign = "/="
pretty ModAssign = "%="
pretty BitAndAssign = "&="
pretty BitOrAssign = "|="
pretty BitXorAssign = "^="
pretty ShiftLAssign = "<<="
pretty ShiftRAssign = ">>="
instance Pretty Expr where
pretty (Id x) = pretty x
pretty (IntLit x) = pretty x
pretty (UIntLit x) = pretty x
pretty (FloatLit x) = pretty x
pretty (StrLit x) = dquotes (pretty x)
pretty (BoolLit x) = pretty x
pretty (CharLit x) = squotes (pretty x)
pretty (BinOp op l r) = parens (pretty l <+> pretty op <+> pretty r)
pretty (UnOp op e) = pretty op <> parens (pretty e)
pretty (Call f args) = parens (pretty f) <> parens (hsep (punctuate comma (map pretty args)))
pretty (Index arr idx) = parens (pretty arr) <> brackets (pretty idx)
pretty (Member e m) = pretty e <> "." <> pretty m
pretty (Cast t e) = parens (pretty t) <> parens (pretty e)
pretty (Sizeof t) = "sizeof" <> parens (pretty t)
pretty (StructInit s fields) = pretty s <+> lbrace <> line <> indent 4 (vsep (punctuate comma (map (\(n, e) -> pretty n <+> "=" <+> pretty e) fields))) <> line <> rbrace
instance Pretty Stmt where
pretty (Expr e) = pretty e <> semi
pretty (Return e) = "return" <+> pretty e <> semi
pretty (If cond t f) = "if" <+> pretty cond <+> prettyBlock t <+> maybe "" (\f' -> "else" <+> prettyBlock f') f
pretty (While cond body) = "while" <+> pretty cond <+> prettyBlock body
pretty (Assign op l r) = pretty l <+> pretty op <+> pretty r <> semi
pretty (Block stmts) = braces (vsep (map pretty stmts))
pretty (Var n t e) = pretty n <+> maybe "" (\t' -> ":" <+> pretty t') t <+> maybe "" (\e' -> "=" <+> pretty e') e <> semi
instance Pretty Type where
pretty IntType = "int"
pretty UIntType = "uint"
pretty FloatType = "float"
pretty StrType = "str"
pretty BoolType = "bool"
pretty CharType = "char"
pretty (PtrType t) = pretty t <> "*"
pretty (ArrayType t) = pretty t <> "[]"
pretty (StructType s) = pretty s
pretty (EnumType e) = pretty e
pretty VoidType = "void"
instance Pretty Bind where
pretty (Bind n t) = pretty n <+> ":" <+> pretty t
instance Pretty TLStruct where
pretty (Struct n fields) = "struct" <+> pretty n <+> prettyBlock fields
instance Pretty TLEnum where
pretty (Enum n fields) = "enum" <+> pretty n <+> prettyBlock fields
instance Pretty TLFunc where
pretty (Func n args ret body) =
pretty n <> parens (hsep (punctuate comma (map pretty args))) <+> "->" <+> pretty ret <+> prettyBlock body
instance Pretty TL where
pretty (TLStruct s) = pretty s
pretty (TLEnum e) = pretty e
pretty (TLFunc f) = pretty f
instance Pretty Program where
pretty (Program structs enums funcs) = vsep (map pretty structs) <> line <> vsep (map pretty enums) <> line <> vsep (map pretty funcs)
prettyBlock :: (Pretty a) => [a] -> Doc ann
prettyBlock stmts = lbrace <> line <> indent 4 (vsep (map pretty stmts)) <> line <> rbrace

View File

@ -2,55 +2,62 @@
module Windows12.Parser (programP) where
import Control.Monad (void)
import Control.Monad.Combinators.Expr
import Data.Text (Text)
import Text.Megaparsec
import Windows12.Ast
import Windows12.Ast (Expr (UnOp))
import Windows12.Lexer
opTable :: [[Operator Parser Expr]]
opTable =
[ [ InfixL $ Member <$ symbol ".",
InfixL $ (\l r -> Member (UnOp Deref l) r) <$ symbol "->",
Postfix $ Index <$> (symbol "[" *> exprP <* symbol "]")
InfixL $ (\l r -> Member (UnOp Deref l) r) <$ symbol "->"
],
[ Prefix (UnOp Neg <$ symbol "-"),
Prefix (UnOp Not <$ symbol "!"),
Prefix (UnOp BitNot <$ symbol "~"),
Prefix (UnOp Deref <$ symbol "*"),
Prefix (UnOp AddrOf <$ symbol "&")
[ unary (UnOp Neg) "-",
unary (UnOp Not) "!",
unary (UnOp BitNot) "~",
unary (UnOp Deref) "*",
unary (UnOp AddrOf) "&"
],
[ InfixL (BinOp Mul <$ symbol "*"),
InfixL (BinOp Div <$ symbol "/"),
InfixL (BinOp Mod <$ symbol "%")
[ Postfix $ flip Index <$> (symbol "[" *> exprP <* symbol "]")
],
[ InfixL (BinOp Add <$ symbol "+"),
InfixL (BinOp Sub <$ symbol "-")
[ Postfix $ flip Call <$> parens (sepBy exprP (symbol ","))
],
[ InfixL (BinOp ShiftL <$ symbol "<<"),
InfixL (BinOp ShiftR <$ symbol ">>")
[ infixL' Mul "*",
infixL' Div "/",
infixL' Mod "%"
],
[ InfixL (BinOp Lt <$ symbol "<"),
InfixL (BinOp Gt <$ symbol ">"),
InfixL (BinOp Le <$ symbol "<="),
InfixL (BinOp Ge <$ symbol ">=")
[ infixL' Add "+",
infixL' Sub "-"
],
[ InfixL (BinOp Eq <$ symbol "=="),
InfixL (BinOp Ne <$ symbol "!=")
[ infixL' ShiftL "<<",
infixL' ShiftR ">>"
],
[ InfixL (BinOp BitAnd <$ symbol "&")
[ infixL Le "<=",
infixL Lt "<",
infixL Ge ">=",
infixL Gt ">"
],
[ InfixL (BinOp BitXor <$ symbol "^")
[ infixL Eq "==",
infixL Ne "!="
],
[ InfixL (BinOp BitOr <$ symbol "|")
[ infixL' BitAnd "&"
],
[ InfixL (BinOp And <$ symbol "&&")
[ infixL' BitXor "^"
],
[ InfixL (BinOp Or <$ symbol "||")
[ infixL' BitOr "|"
],
[ infixL And "&&"
],
[ infixL Or "||"
]
]
where
unary op sym = Prefix $ foldr1 (.) <$> some (op <$ symbol sym)
infixL op sym = InfixL $ BinOp op <$ symbol sym
infixL' op sym = InfixL $ BinOp op <$ operator sym
infixR op sym = InfixR $ BinOp op <$ symbol sym
operator sym = lexeme $ try $ (symbol sym <* notFollowedBy opChar)
opChar = oneOf ("+-*/%<>&|^=!~" :: [Char])
termP :: Parser Expr
termP =
@ -62,8 +69,7 @@ termP =
<|> BoolLit <$> (reserved "true" *> pure True <|> reserved "false" *> pure False)
<|> CharLit <$> charLiteral
<|> try (Sizeof <$> (reserved "sizeof" *> typeP))
<|> try (Cast <$> (reserved "cast" *> typeP) <*> parens exprP)
<|> try (Call <$> identifier <*> parens (sepBy exprP (symbol ",")))
<|> try (Cast <$> (parens typeP) <*> termP)
<|> try (StructInit <$> identifier <*> braces (sepEndBy1 ((,) <$> identifier <* symbol ":" <*> exprP) (symbol ",")))
<|> Id <$> identifier
@ -90,21 +96,32 @@ typeP = do
<|> StructType <$> identifier
foldr (const PtrType) t <$> many (symbol "*")
assignP :: Parser Stmt
assignP = do
lhs <- exprP
op <-
AddAssign <$ symbol "+="
<|> SubAssign <$ symbol "-="
<|> MulAssign <$ symbol "*="
<|> DivAssign <$ symbol "/="
<|> ModAssign <$ symbol "%="
<|> BitAndAssign <$ symbol "&="
<|> BitOrAssign <$ symbol "|="
<|> BitXorAssign <$ symbol "^="
<|> ShiftLAssign <$ symbol "<<="
<|> ShiftRAssign <$ symbol ">>="
<|> BaseAssign <$ symbol "="
Assign op lhs <$> exprP <* symbol ";"
stmtP :: Parser Stmt
stmtP =
Return <$> (reserved "return" *> exprP <* symbol ";")
<|> (If <$> (reserved "if" *> parens exprP) <*> stmtP <*> optional (reserved "else" *> stmtP))
<|> (While <$> (reserved "while" *> parens exprP) <*> stmtP)
<|> ( For
<$> (reserved "for" *> symbol "(" *> optional stmtP <* symbol ";")
<*> (optional exprP <* symbol ";")
<*> (optional exprP <* symbol ")")
<*> stmtP
)
<|> (Var <$> (reserved "var" *> (Bind <$> identifier <* symbol ":" <*> typeP)) <* symbol "=" <*> exprP <* symbol ";")
<|> (Assign <$> exprP <* symbol "=" <*> exprP <* symbol ";")
-- <|> Block <$> braces (many stmtP)
<|> (If <$> (reserved "if" *> exprP) <*> braces (many stmtP) <*> optional (reserved "else" *> braces (many stmtP)))
<|> (While <$> (reserved "while" *> parens exprP) <*> braces (many stmtP))
<|> (Var <$> (reserved "var" *> identifier) <*> optional (symbol ":" *> typeP) <*> optional (symbol "=" *> exprP) <* symbol ";")
<|> try assignP
<|> Expr <$> exprP <* symbol ";"
<|> Block <$> braces (many stmtP)
funcP :: Parser TLFunc
funcP = do
@ -122,7 +139,7 @@ enumP = do
fields <- braces (sepEndBy1 identifier (symbol ","))
return $ Enum name fields
memberFuncP :: Parser TLMemberFunc
memberFuncP :: Parser TLFunc
memberFuncP = do
reserved "fn"
reserved "on"
@ -131,17 +148,16 @@ memberFuncP = do
args <- parens (sepBy (Bind <$> identifier <* symbol ":" <*> typeP) (symbol ","))
retType <- (symbol "->" *> typeP) <|> pure VoidType
body <- braces (many stmtP)
return $ MemberFunc self name args retType body
return $ Func name (Bind "self" self : args) retType body
organize :: [TL] -> Program
organize tls = Program structs enums funcs memberFuncs
organize tls = Program structs enums funcs
where
structs = [s | TLStruct s <- tls]
enums = [e | TLEnum e <- tls]
funcs = [f | TLFunc f <- tls]
memberFuncs = [mf | TLMemberFunc mf <- tls]
programP :: Parser Program
programP = between sc eof $ do
tls <- many (TLStruct <$> structP <|> TLEnum <$> enumP <|> TLMemberFunc <$> memberFuncP <|> TLFunc <$> funcP)
tls <- many (TLStruct <$> structP <|> TLEnum <$> enumP <|> try (TLFunc <$> memberFuncP) <|> TLFunc <$> funcP)
return $ organize tls

View File

@ -1,16 +1,64 @@
# Create an enum
enum AnimalType {
Dog,
Cat,
Dog,
Cat,
}
# Create a struct
struct Pet {
name: [char], # A list of characters
age: uint, # An unsigned 32-bit integer
type: AnimalType,
living: bool,
name: [char], # A list of characters
age: uint, # An unsigned 32-bit integer
type: AnimalType,
living: bool,
}
# Create a function that can be called on a Pet
fn on Pet rename(newName: [char]) {
return "test";
self.name = newName;
}
# Create another struct
struct Person {
pet: Pet,
name: [char],
age: uint,
living: bool,
}
fn on Person growUp() {
self.age += 1;
}
fn main() -> int {
# Create an instance of Pet
# "let" creates an immutable binding
var dog = Pet {
name: "Fido",
age: 3,
type: AnimalType.Dog,
};
# Create a (variable) instance of Person
# "var" creates a mutable binding
var person = Person {
pet: dog,
name: "Fred",
age: 41,
};
# Create a new name for the Pet
var new_name = "George";
person.pet.rename(new_name);
# Print out the person's name and age
# Uses C for I/O
printf("Person %s is %u years old.\n", person.name, person.age);
if person.age % 2 == 0 {
printf("Age is even\n");
} else {
printf("Age is odd\n");
}
*test[12](3);
return 0;
}

View File

@ -79,6 +79,7 @@ executable windows12
megaparsec >= 9.6.1 && < 9.7,
text >= 1.2.5 && < 1.3,
parser-combinators >= 1.3.0 && < 1.4,
prettyprinter >= 1.7.1 && < 1.8,
-- Directories containing source files.
hs-source-dirs: src