obelisk/src/parser.cpp

442 lines
11 KiB
C++
Raw Normal View History

2022-10-17 22:26:36 -03:00
#include "ast/call_expression_ast.h"
#include "ast/number_expression_ast.h"
#include "ast/variable_expression_ast.h"
2022-11-26 00:32:06 -03:00
#include "models/entity.h"
#include "models/fact.h"
#include "models/verb.h"
2022-10-17 22:26:36 -03:00
#include "parser.h"
2022-11-04 11:19:53 -03:00
#include <memory>
2022-11-08 22:27:26 -03:00
#include <stack>
2022-11-22 11:34:07 -03:00
#include <string>
2022-11-08 22:27:26 -03:00
#include <vector>
std::shared_ptr<obelisk::Lexer> obelisk::Parser::getLexer()
2022-10-17 22:26:36 -03:00
{
return lexer_;
2022-10-17 22:26:36 -03:00
}
void obelisk::Parser::setLexer(std::shared_ptr<obelisk::Lexer> lexer)
2022-10-17 22:26:36 -03:00
{
lexer_ = lexer;
currentToken_ = 0;
2022-10-17 22:26:36 -03:00
}
int obelisk::Parser::getNextToken()
{
try
{
setCurrentToken(getLexer()->getToken());
}
catch (obelisk::LexerException& exception)
{
throw;
}
2022-10-17 22:26:36 -03:00
return getCurrentToken();
}
int obelisk::Parser::getCurrentToken()
{
return currentToken_;
}
void obelisk::Parser::setCurrentToken(int currentToken)
{
currentToken_ = currentToken;
}
2022-12-13 17:35:14 -03:00
std::unique_ptr<obelisk::ExpressionAST> obelisk::Parser::logError(const char* str)
2022-10-17 22:26:36 -03:00
{
fprintf(stderr, "Error: %s\n", str);
return nullptr;
}
2022-12-13 17:35:14 -03:00
std::unique_ptr<obelisk::PrototypeAST> obelisk::Parser::logErrorPrototype(const char* str)
2022-10-17 22:26:36 -03:00
{
logError(str);
return nullptr;
}
std::unique_ptr<obelisk::ExpressionAST> obelisk::Parser::parseExpression()
{
auto LHS = parsePrimary();
if (!LHS)
{
return nullptr;
}
return LHS;
}
std::unique_ptr<obelisk::ExpressionAST> obelisk::Parser::parsePrimary()
{
switch (getCurrentToken())
{
case obelisk::Lexer::kTokenIdentifier :
return parseIdentifierExpression();
case obelisk::Lexer::kTokenNumber :
return parseNumberExpression();
case '(' :
return parseParenthesisExpression();
default :
return logError("unknown token when expecting and expression");
}
}
std::unique_ptr<obelisk::ExpressionAST> obelisk::Parser::parseNumberExpression()
{
2022-12-13 17:35:14 -03:00
auto result = std::make_unique<obelisk::NumberExpressionAST>(getLexer()->getNumberValue());
2022-10-17 22:26:36 -03:00
getNextToken();
2022-11-04 11:19:53 -03:00
return result;
2022-10-17 22:26:36 -03:00
}
2022-12-13 17:35:14 -03:00
std::unique_ptr<obelisk::ExpressionAST> obelisk::Parser::parseParenthesisExpression()
2022-10-17 22:26:36 -03:00
{
getNextToken();
auto v = parseExpression();
if (!v)
{
return nullptr;
}
if (getCurrentToken() != ')')
{
return logError("expected ')'");
}
getNextToken();
return v;
}
2022-12-13 17:35:14 -03:00
std::unique_ptr<obelisk::ExpressionAST> obelisk::Parser::parseIdentifierExpression()
2022-10-17 22:26:36 -03:00
{
std::string idName = getLexer()->getIdentifier();
getNextToken();
if (getCurrentToken() != '(')
{
return std::make_unique<obelisk::VariableExpressionAST>(idName);
}
getNextToken();
std::vector<std::unique_ptr<obelisk::ExpressionAST>> args;
if (getCurrentToken() != ')')
{
while (true)
{
if (auto arg = parseExpression())
{
args.push_back(std::move(arg));
}
else
{
return nullptr;
}
if (getCurrentToken() == ')')
{
break;
}
if (getCurrentToken() != ',')
{
return logError("Expected ')' or ',' in argument list");
}
getNextToken();
}
}
getNextToken();
return std::make_unique<CallExpressionAST>(idName, std::move(args));
}
std::unique_ptr<obelisk::PrototypeAST> obelisk::Parser::parsePrototype()
{
if (getCurrentToken() != obelisk::Lexer::kTokenIdentifier)
{
return logErrorPrototype("Expected function name in prototype");
}
std::string functionName = getLexer()->getIdentifier();
getNextToken();
if (getCurrentToken() != '(')
{
return logErrorPrototype("Expected '(' in prototype");
}
std::vector<std::string> argNames;
while (getNextToken() == obelisk::Lexer::kTokenIdentifier)
{
argNames.push_back(getLexer()->getIdentifier());
}
if (getCurrentToken() != ')')
{
return logErrorPrototype("Expected ')' in prototype");
}
getNextToken();
2022-12-13 17:35:14 -03:00
return std::make_unique<obelisk::PrototypeAST>(functionName, std::move(argNames));
2022-10-17 22:26:36 -03:00
}
std::unique_ptr<obelisk::FunctionAST> obelisk::Parser::parseDefinition()
{
getNextToken();
auto prototype = parsePrototype();
if (!prototype)
{
return nullptr;
}
if (auto expression = parseExpression())
{
2022-12-13 17:35:14 -03:00
return std::make_unique<FunctionAST>(std::move(prototype), std::move(expression));
2022-10-17 22:26:36 -03:00
}
return nullptr;
}
std::unique_ptr<obelisk::FunctionAST> obelisk::Parser::parseTopLevelExpression()
{
if (auto expression = parseExpression())
{
// Make an anonymous prototype
2022-12-13 17:35:14 -03:00
auto prototype = std::make_unique<obelisk::PrototypeAST>("__anon_expr", std::vector<std::string>());
return std::make_unique<obelisk::FunctionAST>(std::move(prototype), std::move(expression));
2022-10-17 22:26:36 -03:00
}
return nullptr;
}
std::unique_ptr<obelisk::PrototypeAST> obelisk::Parser::parseExtern()
{
getNextToken();
return parsePrototype();
}
2022-11-08 22:27:26 -03:00
2022-11-26 00:32:06 -03:00
//action("martin" is "dangerous" then "avoid" or "ignore");
2022-11-08 22:27:26 -03:00
std::unique_ptr<obelisk::ExpressionAST> obelisk::Parser::parseAction()
{
}
2022-11-26 00:32:06 -03:00
//rule("chris" and "martin" is "happy" if "chris" plays "playstation");
2022-11-08 22:27:26 -03:00
std::unique_ptr<obelisk::ExpressionAST> obelisk::Parser::parseRule()
{
}
// fact("chris cromer" and "martin" and "Isabella" can "program" and "speak english");
// fact("" and "martin")
2022-11-26 00:32:06 -03:00
void obelisk::Parser::parseFact(std::vector<obelisk::Fact>& facts)
2022-11-08 22:27:26 -03:00
{
std::stack<char> syntax;
getNextToken();
if (getCurrentToken() != '(')
{
2022-12-13 17:35:14 -03:00
throw obelisk::ParserException("expected '(' but got '" + std::to_string(getCurrentToken()) + "'");
2022-11-08 22:27:26 -03:00
}
syntax.push('(');
// ("
bool getEntity {true};
std::vector<std::string> leftEntities;
2022-11-09 16:49:58 -03:00
std::vector<std::string> rightEntities;
2022-11-08 22:27:26 -03:00
std::string entityName {""};
2022-11-09 16:49:58 -03:00
std::string verb {""};
2022-11-08 22:27:26 -03:00
getNextToken();
while (true) //left side of fact
{
if (getEntity)
{
if (getCurrentToken() == '"')
{
if (syntax.top() != '"')
{
// open a double quote
syntax.push('"');
2022-11-09 16:49:58 -03:00
getNextToken();
2022-11-08 22:27:26 -03:00
}
else if (syntax.top() == '"')
{
// close a double quote
syntax.pop();
2022-11-09 16:49:58 -03:00
if (verb == "")
{
leftEntities.push_back(entityName);
}
else
{
rightEntities.push_back(entityName);
}
2022-11-08 22:27:26 -03:00
entityName = "";
2022-11-09 16:49:58 -03:00
getEntity = false;
getNextToken();
continue;
2022-11-08 22:27:26 -03:00
}
}
if (syntax.top() == '"')
{
if (entityName != "")
{
entityName += " ";
}
entityName += getLexer()->getIdentifier();
}
getNextToken();
}
else
{
2022-11-09 16:49:58 -03:00
if (getCurrentToken() == ')')
{
2022-11-22 11:34:07 -03:00
// closing parenthesis found, make sure we have everything needed
if (verb == "")
{
throw obelisk::ParserException("verb is empty");
}
if (leftEntities.size() == 0)
{
2022-12-13 17:35:14 -03:00
throw obelisk::ParserException("missing left side entities");
2022-11-22 11:34:07 -03:00
}
if (rightEntities.size() == 0)
{
2022-12-13 17:35:14 -03:00
throw obelisk::ParserException("missing right side entities");
2022-11-22 11:34:07 -03:00
}
2022-11-09 16:49:58 -03:00
break;
}
2022-11-08 22:27:26 -03:00
2022-11-09 16:49:58 -03:00
if (getCurrentToken() == '"')
{
2022-11-22 11:34:07 -03:00
throw obelisk::ParserException("unexpected '\"'");
2022-11-09 16:49:58 -03:00
break;
}
if (getLexer()->getIdentifier() == "and")
{
getNextToken();
getEntity = true;
continue;
}
else
{
verb = getLexer()->getIdentifier();
// TODO: make sure verb is alphabetic
2022-11-09 16:49:58 -03:00
getEntity = true;
continue;
}
}
2022-11-08 22:27:26 -03:00
}
2022-11-09 16:49:58 -03:00
2022-11-26 00:32:06 -03:00
for (auto& leftEntity : leftEntities)
{
for (auto& rightEntity : rightEntities)
{
2022-12-13 17:35:14 -03:00
facts.push_back(
obelisk::Fact(obelisk::Entity(leftEntity), obelisk::Entity(rightEntity), obelisk::Verb(verb)));
2022-11-26 00:32:06 -03:00
}
}
2022-11-08 22:27:26 -03:00
}
2022-11-26 00:32:06 -03:00
void obelisk::Parser::handleAction(std::unique_ptr<obelisk::KnowledgeBase>& kb)
2022-11-08 22:27:26 -03:00
{
}
2022-11-26 00:32:06 -03:00
void obelisk::Parser::handleRule(std::unique_ptr<obelisk::KnowledgeBase>& kb)
2022-11-08 22:27:26 -03:00
{
}
2022-11-26 00:32:06 -03:00
void obelisk::Parser::handleFact(std::unique_ptr<obelisk::KnowledgeBase>& kb)
2022-11-08 22:27:26 -03:00
{
2022-11-26 00:32:06 -03:00
std::vector<obelisk::Fact> facts;
parseFact(facts);
int verbId = 0;
for (auto& fact : facts)
{
std::vector<obelisk::Entity> entities {fact.getLeftEntity()};
kb->addEntities(entities);
fact.setLeftEntity(entities.front());
2022-11-27 23:58:40 -03:00
// the id was not inserted, so check if it exists in the database
if (fact.getLeftEntity().getId() == 0)
{
obelisk::Entity entity = fact.getLeftEntity();
kb->getEntity(entity);
if (entity.getId() == 0)
{
2022-12-13 17:35:14 -03:00
throw obelisk::ParserException("left entity could not be inserted into the database");
2022-11-27 23:58:40 -03:00
}
else
{
fact.setLeftEntity(entity);
}
}
2022-11-26 00:32:06 -03:00
entities = {fact.getRightEntity()};
kb->addEntities(entities);
fact.setRightEntity(entities.front());
2022-11-27 23:58:40 -03:00
if (fact.getRightEntity().getId() == 0)
{
obelisk::Entity entity = fact.getRightEntity();
kb->getEntity(entity);
if (entity.getId() == 0)
{
2022-12-13 17:35:14 -03:00
throw obelisk::ParserException("right entity could not be inserted into the database");
2022-11-27 23:58:40 -03:00
}
else
{
fact.setRightEntity(entity);
}
}
2022-11-26 00:32:06 -03:00
if (verbId == 0)
{
std::vector<obelisk::Verb> verbs = {fact.getVerb()};
kb->addVerbs(verbs);
if (verbs.front().getId() != 0)
{
fact.setVerb(verbs.front());
verbId = fact.getVerb().getId();
}
else
{
obelisk::Verb verb = fact.getVerb();
kb->getVerb(verb);
if (verb.getId() == 0)
{
2022-12-13 17:35:14 -03:00
throw obelisk::ParserException("verb could not be inserted into the database");
}
else
{
fact.setVerb(verb);
verbId = fact.getVerb().getId();
}
2022-11-26 00:32:06 -03:00
}
}
else
{
fact.getVerb().setId(verbId);
}
std::vector<obelisk::Fact> facts {fact};
kb->addFacts(facts);
fact = facts.front();
if (fact.getId() == 0)
{
kb->getFact(fact);
if (fact.getId() == 0)
{
2022-12-13 17:35:14 -03:00
throw obelisk::ParserException("fact could not be inserted into the database");
}
}
2022-11-26 00:32:06 -03:00
}
2022-11-08 22:27:26 -03:00
}
2022-11-26 00:32:06 -03:00
// fact("chris cromer" and "martin" and "Isabella" can "program" and "speak english");