Implemented Basic Parser

This commit is contained in:
Carter Bertolini 2024-11-14 14:05:47 -05:00
parent 923da9e747
commit aa48976e31
7 changed files with 363 additions and 3 deletions

View File

@ -1,4 +1,12 @@
{-# LANGUAGE OverloadedStrings #-}
module Main where module Main where
import qualified Data.Text.IO as T
import Text.Megaparsec (parseTest)
import Windows12.Parser (programP)
main :: IO () main :: IO ()
main = putStrLn "Hello, Haskell!" main = do
test <- T.readFile "test/hello.w12"
parseTest programP test

5
src/Windows12.hs Normal file
View File

@ -0,0 +1,5 @@
module Windows12 where
import Windows12.Ast
import Windows12.Lexer
import Windows12.Parser

96
src/Windows12/Ast.hs Normal file
View File

@ -0,0 +1,96 @@
module Windows12.Ast where
import Data.Text (Text)
data BinOp
= Add
| Sub
| Mul
| Div
| Mod
| Eq
| Ne
| Lt
| Gt
| Le
| Ge
| And
| Or
| BitAnd
| BitOr
| BitXor
| ShiftL
| ShiftR
deriving (Show, Eq)
data UnOp
= Neg
| Not
| BitNot
| Deref
| AddrOf
deriving (Show, Eq)
data Expr
= Id Text
| IntLit Int
| UIntLit Word
| FloatLit Double
| StrLit Text
| BoolLit Bool
| CharLit Char
| BinOp BinOp Expr Expr
| UnOp UnOp Expr
| Call Text [Expr]
| Index Expr Expr
| Member Expr Expr
| Cast Type Expr
| Sizeof Type
| StructInit Text [(Text, Expr)]
deriving (Show, Eq)
data Stmt
= Expr Expr
| Return Expr
| If Expr Stmt (Maybe Stmt)
| While Expr Stmt
| For (Maybe Stmt) (Maybe Expr) (Maybe Expr) Stmt
| Assign Expr Expr
| Block [Stmt]
| Var Bind Expr
deriving (Show, Eq)
data Type
= IntType
| UIntType
| FloatType
| StrType
| BoolType
| CharType
| PtrType Type
| ArrayType Type
| StructType Text
| EnumType Text
| VoidType
deriving (Show, Eq)
data Bind = Bind {bindName :: Text, bindType :: Type}
deriving (Show, Eq)
data TLStruct = Struct {structName :: Text, structFields :: [Bind]}
deriving (Show, Eq)
data TLEnum = Enum {enumName :: Text, enumFields :: [Text]}
deriving (Show, Eq)
data TLFunc = Func {funcName :: Text, funcArgs :: [Bind], funcRetType :: Type, funcBody :: [Stmt]}
deriving (Show, Eq)
data TLMemberFunc = MemberFunc {memberFuncSelf :: Type, memberFuncName :: Text, memberFuncArgs :: [Bind], memberFuncRetType :: Type, memberFuncBody :: [Stmt]}
deriving (Show, Eq)
data TL = TLStruct TLStruct | TLEnum TLEnum | TLFunc TLFunc | TLMemberFunc TLMemberFunc
deriving (Show, Eq)
data Program = Program [TLStruct] [TLEnum] [TLFunc] [TLMemberFunc]
deriving (Show, Eq)

82
src/Windows12/Lexer.hs Normal file
View File

@ -0,0 +1,82 @@
{-# LANGUAGE OverloadedStrings #-}
module Windows12.Lexer where
import Data.Text (Text)
import qualified Data.Text as T
import Data.Void
import Text.Megaparsec
import Text.Megaparsec.Char
import qualified Text.Megaparsec.Char.Lexer as L
type Parser = Parsec Void Text
sc :: Parser ()
sc =
L.space
space1
(L.skipLineComment "#")
(L.skipBlockComment "/*" "*/")
lexeme :: Parser a -> Parser a
lexeme = L.lexeme sc
symbol :: Text -> Parser Text
symbol = L.symbol sc
charLiteral :: Parser Char
charLiteral = between (char '\'') (char '\'') L.charLiteral
stringLiteral :: Parser Text
stringLiteral = T.pack <$> (char '"' *> manyTill L.charLiteral (char '"'))
intLiteral :: Parser Int
intLiteral = lexeme L.decimal
uintLiteral :: Parser Word
uintLiteral = lexeme L.decimal <* char 'u'
floatLiteral :: Parser Double
floatLiteral = lexeme L.float
reserved :: Text -> Parser ()
reserved word = lexeme (string word *> notFollowedBy alphaNumChar)
reservedWords :: [Text]
reservedWords =
[ "if",
"else",
"while",
"for",
"return",
"int",
"uint",
"float",
"char",
"bool",
"struct",
"sizeof",
"true",
"false",
"fn",
"on",
"var"
]
identifier :: Parser Text
identifier = (lexeme . try) (p >>= check)
where
p = fmap T.pack $ (:) <$> letterChar <*> many (alphaNumChar <|> char '_')
check x =
if x `elem` reservedWords
then fail $ "keyword " <> show x <> " cannot be an identifier"
else return x
parens :: Parser a -> Parser a
parens = between (symbol "(") (symbol ")")
braces :: Parser a -> Parser a
braces = between (symbol "{") (symbol "}")
brackets :: Parser a -> Parser a
brackets = between (symbol "[") (symbol "]")

147
src/Windows12/Parser.hs Normal file
View File

@ -0,0 +1,147 @@
{-# LANGUAGE OverloadedStrings #-}
module Windows12.Parser (programP) where
import Control.Monad (void)
import Control.Monad.Combinators.Expr
import Data.Text (Text)
import Text.Megaparsec
import Windows12.Ast
import Windows12.Ast (Expr (UnOp))
import Windows12.Lexer
opTable :: [[Operator Parser Expr]]
opTable =
[ [ InfixL $ Member <$ symbol ".",
InfixL $ (\l r -> Member (UnOp Deref l) r) <$ symbol "->",
Postfix $ Index <$> (symbol "[" *> exprP <* symbol "]")
],
[ Prefix (UnOp Neg <$ symbol "-"),
Prefix (UnOp Not <$ symbol "!"),
Prefix (UnOp BitNot <$ symbol "~"),
Prefix (UnOp Deref <$ symbol "*"),
Prefix (UnOp AddrOf <$ symbol "&")
],
[ InfixL (BinOp Mul <$ symbol "*"),
InfixL (BinOp Div <$ symbol "/"),
InfixL (BinOp Mod <$ symbol "%")
],
[ InfixL (BinOp Add <$ symbol "+"),
InfixL (BinOp Sub <$ symbol "-")
],
[ InfixL (BinOp ShiftL <$ symbol "<<"),
InfixL (BinOp ShiftR <$ symbol ">>")
],
[ InfixL (BinOp Lt <$ symbol "<"),
InfixL (BinOp Gt <$ symbol ">"),
InfixL (BinOp Le <$ symbol "<="),
InfixL (BinOp Ge <$ symbol ">=")
],
[ InfixL (BinOp Eq <$ symbol "=="),
InfixL (BinOp Ne <$ symbol "!=")
],
[ InfixL (BinOp BitAnd <$ symbol "&")
],
[ InfixL (BinOp BitXor <$ symbol "^")
],
[ InfixL (BinOp BitOr <$ symbol "|")
],
[ InfixL (BinOp And <$ symbol "&&")
],
[ InfixL (BinOp Or <$ symbol "||")
]
]
termP :: Parser Expr
termP =
parens exprP
<|> IntLit <$> intLiteral
<|> UIntLit <$> uintLiteral
<|> try (FloatLit <$> floatLiteral)
<|> StrLit <$> stringLiteral
<|> BoolLit <$> (reserved "true" *> pure True <|> reserved "false" *> pure False)
<|> CharLit <$> charLiteral
<|> try (Sizeof <$> (reserved "sizeof" *> typeP))
<|> try (Cast <$> (reserved "cast" *> typeP) <*> parens exprP)
<|> try (Call <$> identifier <*> parens (sepBy exprP (symbol ",")))
<|> try (StructInit <$> identifier <*> braces (sepEndBy1 ((,) <$> identifier <* symbol ":" <*> exprP) (symbol ",")))
<|> Id <$> identifier
exprP :: Parser Expr
exprP = makeExprParser termP opTable
structP :: Parser TLStruct
structP = do
reserved "struct"
name <- identifier
fields <- braces (sepEndBy (Bind <$> identifier <* symbol ":" <*> typeP) (symbol ","))
return $ Struct name fields
typeP :: Parser Type
typeP = do
t <-
ArrayType <$> (brackets typeP)
<|> IntType <$ reserved "int"
<|> UIntType <$ reserved "uint"
<|> FloatType <$ reserved "float"
<|> StrType <$ reserved "str"
<|> BoolType <$ reserved "bool"
<|> CharType <$ reserved "char"
<|> StructType <$> identifier
foldr (const PtrType) t <$> many (symbol "*")
stmtP :: Parser Stmt
stmtP =
Return <$> (reserved "return" *> exprP <* symbol ";")
<|> (If <$> (reserved "if" *> parens exprP) <*> stmtP <*> optional (reserved "else" *> stmtP))
<|> (While <$> (reserved "while" *> parens exprP) <*> stmtP)
<|> ( For
<$> (reserved "for" *> symbol "(" *> optional stmtP <* symbol ";")
<*> (optional exprP <* symbol ";")
<*> (optional exprP <* symbol ")")
<*> stmtP
)
<|> (Var <$> (reserved "var" *> (Bind <$> identifier <* symbol ":" <*> typeP)) <* symbol "=" <*> exprP <* symbol ";")
<|> (Assign <$> exprP <* symbol "=" <*> exprP <* symbol ";")
-- <|> Block <$> braces (many stmtP)
funcP :: Parser TLFunc
funcP = do
reserved "fn"
name <- identifier
args <- parens (sepBy (Bind <$> identifier <* symbol ":" <*> typeP) (symbol ","))
retType <- (symbol "->" *> typeP) <|> pure VoidType
body <- braces (many stmtP)
return $ Func name args retType body
enumP :: Parser TLEnum
enumP = do
reserved "enum"
name <- identifier
fields <- braces (sepEndBy1 identifier (symbol ","))
return $ Enum name fields
memberFuncP :: Parser TLMemberFunc
memberFuncP = do
reserved "fn"
reserved "on"
self <- typeP
name <- identifier
args <- parens (sepBy (Bind <$> identifier <* symbol ":" <*> typeP) (symbol ","))
retType <- (symbol "->" *> typeP) <|> pure VoidType
body <- braces (many stmtP)
return $ MemberFunc self name args retType body
organize :: [TL] -> Program
organize tls = Program structs enums funcs memberFuncs
where
structs = [s | TLStruct s <- tls]
enums = [e | TLEnum e <- tls]
funcs = [f | TLFunc f <- tls]
memberFuncs = [mf | TLMemberFunc mf <- tls]
programP :: Parser Program
programP = between sc eof $ do
tls <- many (TLStruct <$> structP <|> TLEnum <$> enumP <|> TLMemberFunc <$> memberFuncP <|> TLFunc <$> funcP)
return $ organize tls

16
test/hello.w12 Normal file
View File

@ -0,0 +1,16 @@
# Create an enum
enum AnimalType {
Dog,
Cat,
}
# Create a struct
struct Pet {
name: [char], # A list of characters
age: uint, # An unsigned 32-bit integer
type: AnimalType,
living: bool,
}
# Create a function that can be called on a Pet
fn on Pet rename(newName: [char]) {
return "test";
}

View File

@ -63,7 +63,11 @@ executable windows12
main-is: Main.hs main-is: Main.hs
-- Modules included in this executable, other than Main. -- Modules included in this executable, other than Main.
-- other-modules: other-modules:
Windows12
Windows12.Ast
Windows12.Lexer
Windows12.Parser
-- LANGUAGE extensions used by modules in this package. -- LANGUAGE extensions used by modules in this package.
-- other-extensions: -- other-extensions:
@ -72,7 +76,9 @@ executable windows12
build-depends: build-depends:
base >= 4.15.1 && < 4.16, base >= 4.15.1 && < 4.16,
llvm-hs-pure >= 9.0.0 && < 9.1, llvm-hs-pure >= 9.0.0 && < 9.1,
megaparsec >= 9.6.1 && < 9.7,
text >= 1.2.5 && < 1.3,
parser-combinators >= 1.3.0 && < 1.4,
-- Directories containing source files. -- Directories containing source files.
hs-source-dirs: src hs-source-dirs: src