add lexer, parser, and ast tree

This commit is contained in:
2022-10-17 22:26:36 -03:00
parent fa2d69de2b
commit 950efa945e
18 changed files with 961 additions and 0 deletions

View File

@@ -0,0 +1,34 @@
#ifndef OBELISK_AST_CALL_EXPRESSION_AST_H
#define OBELISK_AST_CALL_EXPRESSION_AST_H
#include "ast/expression_ast.h"
#include <memory>
#include <string>
#include <vector>
namespace obelisk
{
class CallExpressionAST : public ExpressionAST
{
private:
std::string callee_;
std::vector<std::unique_ptr<ExpressionAST>> args_;
std::string getCallee();
void setCallee(std::string callee);
std::vector<std::unique_ptr<ExpressionAST>> getArgs();
void setArgs(std::vector<std::unique_ptr<ExpressionAST>> args);
public:
CallExpressionAST(const std::string &callee,
std::vector<std::unique_ptr<ExpressionAST>> args) :
callee_(callee),
args_(std::move(args))
{
}
};
} // namespace obelisk
#endif

13
src/ast/expression_ast.h Normal file
View File

@@ -0,0 +1,13 @@
#ifndef OBELISK_AST_EXPRESSION_AST_H
#define OBELISK_AST_EXPRESSION_AST_H
namespace obelisk
{
class ExpressionAST
{
public:
virtual ~ExpressionAST() = default;
};
} // namespace obelisk
#endif

30
src/ast/function_ast.h Normal file
View File

@@ -0,0 +1,30 @@
#ifndef OBELISK_AST_FUNCTION_AST_H
#define OBELISK_AST_FUNCTION_AST_H
#include "ast/expression_ast.h"
#include "ast/prototype_ast.h"
#include <memory>
namespace obelisk
{
class FunctionAST
{
private:
std::unique_ptr<PrototypeAST> prototype_;
std::unique_ptr<ExpressionAST> body_;
std::unique_ptr<PrototypeAST> getPrototype();
void setPrototype(std::unique_ptr<PrototypeAST> prototype);
public:
FunctionAST(std::unique_ptr<PrototypeAST> prototype,
std::unique_ptr<ExpressionAST> body) :
prototype_(std::move(prototype)),
body_(std::move(body))
{
}
};
} // namespace obelisk
#endif

8
src/ast/meson.build Normal file
View File

@@ -0,0 +1,8 @@
obelisk_ast_sources = files(
'call_expression_ast.h',
'expression_ast.h',
'function_ast.h',
'number_expression_ast.h',
'prototype_ast.h',
'variable_expression_ast.h'
)

View File

@@ -0,0 +1,24 @@
#ifndef OBELISK_AST_NUMBER_EXPRESSION_AST_H
#define OBELISK_AST_NUMBER_EXPRESSION_AST_H
#include "ast/expression_ast.h"
namespace obelisk
{
class NumberExpressionAST : public ExpressionAST
{
private:
double number_;
double getNumber();
void setNumber(double number);
public:
NumberExpressionAST(double number) :
number_(number)
{
}
};
} // namespace obelisk
#endif

34
src/ast/prototype_ast.h Normal file
View File

@@ -0,0 +1,34 @@
#ifndef OBELISK_AST_PROTOTYPE_AST_H
#define OBELISK_AST_PROTOTYPE_AST_H
#include <string>
#include <vector>
namespace obelisk
{
class PrototypeAST
{
private:
std::string name_;
std::vector<std::string> args_;
void setName(const std::string& name);
std::vector<std::string> getArgs();
void setArgs(std::vector<std::string> args);
public:
PrototypeAST(const std::string& name,
std::vector<std::string> args) :
name_(name),
args_(std::move(args))
{
}
const std::string& getName() const
{
return name_;
}
};
} //namespace obelisk
#endif

View File

@@ -0,0 +1,25 @@
#ifndef OBELISK_AST_VARIABLE_EXPRESSION_AST_H
#define OBELISK_AST_VARIABLE_EXPRESSION_AST_H
#include "ast/expression_ast.h"
#include <string>
namespace obelisk
{
class VariableExpressionAST : public ExpressionAST
{
private:
std::string name_;
std::string getName();
void setName(const std::string name);
public:
VariableExpressionAST(const std::string &name) :
name_(name)
{
}
};
} //namespace obelisk
#endif

121
src/lexer.cpp Normal file
View File

@@ -0,0 +1,121 @@
#include "lexer.h"
#include <iostream>
int obelisk::Lexer::getToken()
{
static int lastChar = ' ';
while (isspace(lastChar))
{
lastChar = getchar();
}
if (isalpha(lastChar))
{
eraseIdentifier();
appendIdentifier(lastChar);
while (isalnum((lastChar = getchar())))
{
appendIdentifier(lastChar);
}
if (getIdentifier() == "def")
{
return Token::kTokenDef;
}
if (getIdentifier() == "extern")
{
return kTokenExtern;
}
return kTokenIdentifier;
}
if (isdigit(lastChar) || lastChar == '.')
{
std::string numberStr;
do
{
numberStr += lastChar;
lastChar = getchar();
}
while (isdigit(lastChar) || lastChar == '.');
setNumberValue(strtod(numberStr.c_str(), nullptr));
return kTokenNumber;
}
if (lastChar == '#')
{
commentLine(&lastChar);
if (lastChar != EOF)
{
return getToken();
}
}
else if (lastChar == '/')
{
lastChar = getchar();
if (lastChar == '/')
{
commentLine(&lastChar);
if (lastChar != EOF)
{
return getToken();
}
}
}
if (lastChar == EOF)
{
return kTokenEof;
}
int thisChar = lastChar;
lastChar = getchar();
return thisChar;
}
void obelisk::Lexer::commentLine(int* lastChar)
{
do
{
*lastChar = getchar();
}
while (*lastChar != EOF && *lastChar != '\n' && *lastChar != '\r');
}
std::string obelisk::Lexer::getIdentifier()
{
return identifier_;
}
void obelisk::Lexer::setIdentifier(const std::string identifier)
{
identifier_ = identifier;
}
void obelisk::Lexer::eraseIdentifier()
{
identifier_ = "";
}
void obelisk::Lexer::appendIdentifier(int lastChar)
{
identifier_ += lastChar;
}
double obelisk::Lexer::getNumberValue()
{
return numberValue_;
}
void obelisk::Lexer::setNumberValue(double numberValue)
{
numberValue_ = numberValue;
}

46
src/lexer.h Normal file
View File

@@ -0,0 +1,46 @@
#ifndef OBELISK_LEXER_H
#define OBELISK_LEXER_H
#include <string>
namespace obelisk
{
class Lexer
{
private:
std::string identifier_;
double numberValue_;
void setIdentifier(const std::string identifier);
void eraseIdentifier();
void appendIdentifier(int lastChar);
void setNumberValue(double numberValue);
void commentLine(int* lastChar);
public:
enum Token
{
kTokenInvalid = -1,
kTokenEof = -2,
// commands
kTokenFact = -3,
kTokenRule = -4,
kTokenDef = -5,
kTokenExtern = -6,
// primary
kTokenIdentifier = -7,
kTokenNumber = -8,
kTokenString = -9
};
int getToken();
std::string getIdentifier();
double getNumberValue();
};
} // namespace obelisk
#endif

13
src/meson.build Normal file
View File

@@ -0,0 +1,13 @@
obelisk_sources = files(
'obelisk.cpp',
'lexer.cpp',
'parser.cpp'
)
subdir('ast')
obelisk_sources += obelisk_ast_sources
executable('obelisk',
obelisk_sources,
install : true
)

50
src/obelisk.cpp Normal file
View File

@@ -0,0 +1,50 @@
#include "lexer.h"
#include "obelisk.h"
#include "parser.h"
#include <iostream>
static void mainLoop()
{
obelisk::Parser* parser = new obelisk::Parser();
// Prime the first token.
fprintf(stderr, "ready> ");
parser->getNextToken();
while (true)
{
fprintf(stderr, "ready> ");
switch (parser->getCurrentToken())
{
case obelisk::Lexer::kTokenEof :
return;
case obelisk::Lexer::kTokenInvalid :
std::cerr << "Invalid token!\n";
parser->getNextToken();
return;
case ';' : // ignore top-level semicolons.
std::cout << "Identifier: "
<< parser->getLexer()->getIdentifier() << std::endl;
std::cout << "Num: " << parser->getLexer()->getNumberValue()
<< std::endl;
parser->getNextToken();
break;
default :
parser->getNextToken();
break;
}
}
}
int main(int argc, char** argv)
{
for (int i = 1; i < argc; i++)
{
std::cout << argv[i] << std::endl;
}
mainLoop();
return EXIT_SUCCESS;
}

1
src/obelisk.h Normal file
View File

@@ -0,0 +1 @@
static void mainLoop();

225
src/parser.cpp Normal file
View File

@@ -0,0 +1,225 @@
#include "ast/call_expression_ast.h"
#include "ast/number_expression_ast.h"
#include "ast/variable_expression_ast.h"
#include "parser.h"
obelisk::Parser::Parser()
{
lexer_ = new obelisk::Lexer();
}
obelisk::Parser::Parser(obelisk::Lexer* lexer)
{
if (lexer != nullptr)
{
lexer_ = lexer;
}
else
{
Parser();
}
}
obelisk::Parser::~Parser()
{
delete lexer_;
}
obelisk::Lexer* obelisk::Parser::getLexer()
{
return lexer_;
}
int obelisk::Parser::getNextToken()
{
setCurrentToken(getLexer()->getToken());
return getCurrentToken();
}
int obelisk::Parser::getCurrentToken()
{
return currentToken_;
}
void obelisk::Parser::setCurrentToken(int currentToken)
{
currentToken_ = currentToken;
}
std::unique_ptr<obelisk::ExpressionAST> obelisk::Parser::logError(
const char* str)
{
fprintf(stderr, "Error: %s\n", str);
return nullptr;
}
std::unique_ptr<obelisk::PrototypeAST> obelisk::Parser::logErrorPrototype(
const char* str)
{
logError(str);
return nullptr;
}
std::unique_ptr<obelisk::ExpressionAST> obelisk::Parser::parseExpression()
{
auto LHS = parsePrimary();
if (!LHS)
{
return nullptr;
}
return LHS;
}
std::unique_ptr<obelisk::ExpressionAST> obelisk::Parser::parsePrimary()
{
switch (getCurrentToken())
{
case obelisk::Lexer::kTokenIdentifier :
return parseIdentifierExpression();
case obelisk::Lexer::kTokenNumber :
return parseNumberExpression();
case '(' :
return parseParenthesisExpression();
default :
return logError("unknown token when expecting and expression");
}
}
std::unique_ptr<obelisk::ExpressionAST> obelisk::Parser::parseNumberExpression()
{
auto result = std::make_unique<obelisk::NumberExpressionAST>(
getLexer()->getNumberValue());
getNextToken();
return std::move(result);
}
std::unique_ptr<obelisk::ExpressionAST>
obelisk::Parser::parseParenthesisExpression()
{
getNextToken();
auto v = parseExpression();
if (!v)
{
return nullptr;
}
if (getCurrentToken() != ')')
{
return logError("expected ')'");
}
getNextToken();
return v;
}
std::unique_ptr<obelisk::ExpressionAST>
obelisk::Parser::parseIdentifierExpression()
{
std::string idName = getLexer()->getIdentifier();
getNextToken();
if (getCurrentToken() != '(')
{
return std::make_unique<obelisk::VariableExpressionAST>(idName);
}
getNextToken();
std::vector<std::unique_ptr<obelisk::ExpressionAST>> args;
if (getCurrentToken() != ')')
{
while (true)
{
if (auto arg = parseExpression())
{
args.push_back(std::move(arg));
}
else
{
return nullptr;
}
if (getCurrentToken() == ')')
{
break;
}
if (getCurrentToken() != ',')
{
return logError("Expected ')' or ',' in argument list");
}
getNextToken();
}
}
getNextToken();
return std::make_unique<CallExpressionAST>(idName, std::move(args));
}
std::unique_ptr<obelisk::PrototypeAST> obelisk::Parser::parsePrototype()
{
if (getCurrentToken() != obelisk::Lexer::kTokenIdentifier)
{
return logErrorPrototype("Expected function name in prototype");
}
std::string functionName = getLexer()->getIdentifier();
getNextToken();
if (getCurrentToken() != '(')
{
return logErrorPrototype("Expected '(' in prototype");
}
std::vector<std::string> argNames;
while (getNextToken() == obelisk::Lexer::kTokenIdentifier)
{
argNames.push_back(getLexer()->getIdentifier());
}
if (getCurrentToken() != ')')
{
return logErrorPrototype("Expected ')' in prototype");
}
getNextToken();
return std::make_unique<obelisk::PrototypeAST>(functionName,
std::move(argNames));
}
std::unique_ptr<obelisk::FunctionAST> obelisk::Parser::parseDefinition()
{
getNextToken();
auto prototype = parsePrototype();
if (!prototype)
{
return nullptr;
}
if (auto expression = parseExpression())
{
return std::make_unique<FunctionAST>(std::move(prototype),
std::move(expression));
}
return nullptr;
}
std::unique_ptr<obelisk::FunctionAST> obelisk::Parser::parseTopLevelExpression()
{
if (auto expression = parseExpression())
{
// Make an anonymous prototype
auto prototype = std::make_unique<obelisk::PrototypeAST>("__anon_expr",
std::vector<std::string>());
return std::make_unique<obelisk::FunctionAST>(std::move(prototype),
std::move(expression));
}
return nullptr;
}
std::unique_ptr<obelisk::PrototypeAST> obelisk::Parser::parseExtern()
{
getNextToken();
return parsePrototype();
}

53
src/parser.h Normal file
View File

@@ -0,0 +1,53 @@
#ifndef OBELISK_PARSER_H
#define OBELISK_PARSER_H
#include "ast/expression_ast.h"
#include "ast/function_ast.h"
#include "ast/prototype_ast.h"
#include "lexer.h"
#include <memory>
namespace obelisk
{
class Parser
{
private:
obelisk::Lexer* lexer_;
int currentToken_;
void setCurrentToken(int currentToken);
std::unique_ptr<obelisk::ExpressionAST> logError(const char* str);
std::unique_ptr<obelisk::PrototypeAST> logErrorPrototype(
const char* str);
std::unique_ptr<obelisk::ExpressionAST> parseExpression();
std::unique_ptr<obelisk::ExpressionAST> parseNumberExpression();
std::unique_ptr<obelisk::ExpressionAST>
parseParenthesisExpression();
std::unique_ptr<obelisk::ExpressionAST> parseIdentifierExpression();
std::unique_ptr<obelisk::ExpressionAST> parsePrimary();
std::unique_ptr<obelisk::PrototypeAST> parsePrototype();
std::unique_ptr<obelisk::FunctionAST> parseDefinition();
std::unique_ptr<obelisk::FunctionAST> parseTopLevelExpression();
std::unique_ptr<obelisk::PrototypeAST> parseExtern();
public:
Parser();
Parser(obelisk::Lexer* lexer);
~Parser();
obelisk::Lexer* getLexer();
int getCurrentToken();
int getNextToken();
void handleDefinition();
void handleExtern();
void handleTopLevelExpression();
};
} // namespace obelisk
#endif