2022-10-17 22:26:36 -03:00
|
|
|
#include "ast/call_expression_ast.h"
|
|
|
|
#include "ast/number_expression_ast.h"
|
|
|
|
#include "ast/variable_expression_ast.h"
|
2022-11-26 00:32:06 -03:00
|
|
|
#include "models/entity.h"
|
|
|
|
#include "models/fact.h"
|
|
|
|
#include "models/verb.h"
|
2022-10-17 22:26:36 -03:00
|
|
|
#include "parser.h"
|
|
|
|
|
2022-11-04 11:19:53 -03:00
|
|
|
#include <memory>
|
2022-11-08 22:27:26 -03:00
|
|
|
#include <stack>
|
2022-11-22 11:34:07 -03:00
|
|
|
#include <string>
|
2022-11-08 22:27:26 -03:00
|
|
|
#include <vector>
|
|
|
|
|
2023-02-13 23:03:31 -03:00
|
|
|
std::shared_ptr<obelisk::Lexer> obelisk::Parser::getLexer()
|
2022-10-17 22:26:36 -03:00
|
|
|
{
|
2023-02-13 23:03:31 -03:00
|
|
|
return lexer_;
|
2022-10-17 22:26:36 -03:00
|
|
|
}
|
|
|
|
|
2023-02-13 23:03:31 -03:00
|
|
|
void obelisk::Parser::setLexer(std::shared_ptr<obelisk::Lexer> lexer)
|
2022-10-17 22:26:36 -03:00
|
|
|
{
|
2023-02-13 23:03:31 -03:00
|
|
|
lexer_ = lexer;
|
|
|
|
currentToken_ = 0;
|
2022-10-17 22:26:36 -03:00
|
|
|
}
|
|
|
|
|
|
|
|
int obelisk::Parser::getNextToken()
|
|
|
|
{
|
2022-11-30 22:44:35 -03:00
|
|
|
try
|
|
|
|
{
|
|
|
|
setCurrentToken(getLexer()->getToken());
|
|
|
|
}
|
|
|
|
catch (obelisk::LexerException& exception)
|
|
|
|
{
|
|
|
|
throw;
|
|
|
|
}
|
2022-10-17 22:26:36 -03:00
|
|
|
return getCurrentToken();
|
|
|
|
}
|
|
|
|
|
|
|
|
int obelisk::Parser::getCurrentToken()
|
|
|
|
{
|
|
|
|
return currentToken_;
|
|
|
|
}
|
|
|
|
|
|
|
|
void obelisk::Parser::setCurrentToken(int currentToken)
|
|
|
|
{
|
|
|
|
currentToken_ = currentToken;
|
|
|
|
}
|
|
|
|
|
2022-12-13 17:35:14 -03:00
|
|
|
std::unique_ptr<obelisk::ExpressionAST> obelisk::Parser::logError(const char* str)
|
2022-10-17 22:26:36 -03:00
|
|
|
{
|
|
|
|
fprintf(stderr, "Error: %s\n", str);
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2022-12-13 17:35:14 -03:00
|
|
|
std::unique_ptr<obelisk::PrototypeAST> obelisk::Parser::logErrorPrototype(const char* str)
|
2022-10-17 22:26:36 -03:00
|
|
|
{
|
|
|
|
logError(str);
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::unique_ptr<obelisk::ExpressionAST> obelisk::Parser::parseExpression()
|
|
|
|
{
|
|
|
|
auto LHS = parsePrimary();
|
|
|
|
if (!LHS)
|
|
|
|
{
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
return LHS;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::unique_ptr<obelisk::ExpressionAST> obelisk::Parser::parsePrimary()
|
|
|
|
{
|
|
|
|
switch (getCurrentToken())
|
|
|
|
{
|
|
|
|
case obelisk::Lexer::kTokenIdentifier :
|
|
|
|
return parseIdentifierExpression();
|
|
|
|
case obelisk::Lexer::kTokenNumber :
|
|
|
|
return parseNumberExpression();
|
|
|
|
case '(' :
|
|
|
|
return parseParenthesisExpression();
|
|
|
|
default :
|
|
|
|
return logError("unknown token when expecting and expression");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
std::unique_ptr<obelisk::ExpressionAST> obelisk::Parser::parseNumberExpression()
|
|
|
|
{
|
2022-12-13 17:35:14 -03:00
|
|
|
auto result = std::make_unique<obelisk::NumberExpressionAST>(getLexer()->getNumberValue());
|
2022-10-17 22:26:36 -03:00
|
|
|
getNextToken();
|
2022-11-04 11:19:53 -03:00
|
|
|
return result;
|
2022-10-17 22:26:36 -03:00
|
|
|
}
|
|
|
|
|
2022-12-13 17:35:14 -03:00
|
|
|
std::unique_ptr<obelisk::ExpressionAST> obelisk::Parser::parseParenthesisExpression()
|
2022-10-17 22:26:36 -03:00
|
|
|
{
|
|
|
|
getNextToken();
|
|
|
|
auto v = parseExpression();
|
|
|
|
if (!v)
|
|
|
|
{
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (getCurrentToken() != ')')
|
|
|
|
{
|
|
|
|
return logError("expected ')'");
|
|
|
|
}
|
|
|
|
getNextToken();
|
|
|
|
return v;
|
|
|
|
}
|
|
|
|
|
2022-12-13 17:35:14 -03:00
|
|
|
std::unique_ptr<obelisk::ExpressionAST> obelisk::Parser::parseIdentifierExpression()
|
2022-10-17 22:26:36 -03:00
|
|
|
{
|
|
|
|
std::string idName = getLexer()->getIdentifier();
|
|
|
|
getNextToken();
|
|
|
|
if (getCurrentToken() != '(')
|
|
|
|
{
|
|
|
|
return std::make_unique<obelisk::VariableExpressionAST>(idName);
|
|
|
|
}
|
|
|
|
|
|
|
|
getNextToken();
|
|
|
|
std::vector<std::unique_ptr<obelisk::ExpressionAST>> args;
|
|
|
|
if (getCurrentToken() != ')')
|
|
|
|
{
|
|
|
|
while (true)
|
|
|
|
{
|
|
|
|
if (auto arg = parseExpression())
|
|
|
|
{
|
|
|
|
args.push_back(std::move(arg));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (getCurrentToken() == ')')
|
|
|
|
{
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (getCurrentToken() != ',')
|
|
|
|
{
|
|
|
|
return logError("Expected ')' or ',' in argument list");
|
|
|
|
}
|
|
|
|
|
|
|
|
getNextToken();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
getNextToken();
|
|
|
|
return std::make_unique<CallExpressionAST>(idName, std::move(args));
|
|
|
|
}
|
|
|
|
|
|
|
|
std::unique_ptr<obelisk::PrototypeAST> obelisk::Parser::parsePrototype()
|
|
|
|
{
|
|
|
|
if (getCurrentToken() != obelisk::Lexer::kTokenIdentifier)
|
|
|
|
{
|
|
|
|
return logErrorPrototype("Expected function name in prototype");
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string functionName = getLexer()->getIdentifier();
|
|
|
|
getNextToken();
|
|
|
|
|
|
|
|
if (getCurrentToken() != '(')
|
|
|
|
{
|
|
|
|
return logErrorPrototype("Expected '(' in prototype");
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<std::string> argNames;
|
|
|
|
while (getNextToken() == obelisk::Lexer::kTokenIdentifier)
|
|
|
|
{
|
|
|
|
argNames.push_back(getLexer()->getIdentifier());
|
|
|
|
}
|
|
|
|
|
|
|
|
if (getCurrentToken() != ')')
|
|
|
|
{
|
|
|
|
return logErrorPrototype("Expected ')' in prototype");
|
|
|
|
}
|
|
|
|
|
|
|
|
getNextToken();
|
|
|
|
|
2022-12-13 17:35:14 -03:00
|
|
|
return std::make_unique<obelisk::PrototypeAST>(functionName, std::move(argNames));
|
2022-10-17 22:26:36 -03:00
|
|
|
}
|
|
|
|
|
|
|
|
std::unique_ptr<obelisk::FunctionAST> obelisk::Parser::parseDefinition()
|
|
|
|
{
|
|
|
|
getNextToken();
|
|
|
|
auto prototype = parsePrototype();
|
|
|
|
if (!prototype)
|
|
|
|
{
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (auto expression = parseExpression())
|
|
|
|
{
|
2022-12-13 17:35:14 -03:00
|
|
|
return std::make_unique<FunctionAST>(std::move(prototype), std::move(expression));
|
2022-10-17 22:26:36 -03:00
|
|
|
}
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::unique_ptr<obelisk::FunctionAST> obelisk::Parser::parseTopLevelExpression()
|
|
|
|
{
|
|
|
|
if (auto expression = parseExpression())
|
|
|
|
{
|
|
|
|
// Make an anonymous prototype
|
2022-12-13 17:35:14 -03:00
|
|
|
auto prototype = std::make_unique<obelisk::PrototypeAST>("__anon_expr", std::vector<std::string>());
|
|
|
|
return std::make_unique<obelisk::FunctionAST>(std::move(prototype), std::move(expression));
|
2022-10-17 22:26:36 -03:00
|
|
|
}
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::unique_ptr<obelisk::PrototypeAST> obelisk::Parser::parseExtern()
|
|
|
|
{
|
|
|
|
getNextToken();
|
|
|
|
return parsePrototype();
|
|
|
|
}
|
2022-11-08 22:27:26 -03:00
|
|
|
|
2022-11-26 00:32:06 -03:00
|
|
|
//action("martin" is "dangerous" then "avoid" or "ignore");
|
2022-11-08 22:27:26 -03:00
|
|
|
std::unique_ptr<obelisk::ExpressionAST> obelisk::Parser::parseAction()
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2022-11-26 00:32:06 -03:00
|
|
|
//rule("chris" and "martin" is "happy" if "chris" plays "playstation");
|
2022-11-08 22:27:26 -03:00
|
|
|
std::unique_ptr<obelisk::ExpressionAST> obelisk::Parser::parseRule()
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
// fact("chris cromer" and "martin" and "Isabella" can "program" and "speak english");
|
|
|
|
// fact("" and "martin")
|
2022-11-26 00:32:06 -03:00
|
|
|
void obelisk::Parser::parseFact(std::vector<obelisk::Fact>& facts)
|
2022-11-08 22:27:26 -03:00
|
|
|
{
|
|
|
|
std::stack<char> syntax;
|
|
|
|
|
|
|
|
getNextToken();
|
|
|
|
if (getCurrentToken() != '(')
|
|
|
|
{
|
2022-12-13 17:35:14 -03:00
|
|
|
throw obelisk::ParserException("expected '(' but got '" + std::to_string(getCurrentToken()) + "'");
|
2022-11-08 22:27:26 -03:00
|
|
|
}
|
|
|
|
|
|
|
|
syntax.push('(');
|
|
|
|
|
|
|
|
// ("
|
|
|
|
|
|
|
|
bool getEntity {true};
|
|
|
|
std::vector<std::string> leftEntities;
|
2022-11-09 16:49:58 -03:00
|
|
|
std::vector<std::string> rightEntities;
|
2022-11-08 22:27:26 -03:00
|
|
|
std::string entityName {""};
|
2022-11-09 16:49:58 -03:00
|
|
|
std::string verb {""};
|
2022-11-08 22:27:26 -03:00
|
|
|
getNextToken();
|
|
|
|
while (true) //left side of fact
|
|
|
|
{
|
|
|
|
if (getEntity)
|
|
|
|
{
|
|
|
|
if (getCurrentToken() == '"')
|
|
|
|
{
|
|
|
|
if (syntax.top() != '"')
|
|
|
|
{
|
|
|
|
// open a double quote
|
|
|
|
syntax.push('"');
|
2022-11-09 16:49:58 -03:00
|
|
|
getNextToken();
|
2022-11-08 22:27:26 -03:00
|
|
|
}
|
|
|
|
else if (syntax.top() == '"')
|
|
|
|
{
|
|
|
|
// close a double quote
|
|
|
|
syntax.pop();
|
2022-11-09 16:49:58 -03:00
|
|
|
if (verb == "")
|
|
|
|
{
|
|
|
|
leftEntities.push_back(entityName);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
rightEntities.push_back(entityName);
|
|
|
|
}
|
2022-11-08 22:27:26 -03:00
|
|
|
entityName = "";
|
2022-11-09 16:49:58 -03:00
|
|
|
getEntity = false;
|
|
|
|
getNextToken();
|
|
|
|
continue;
|
2022-11-08 22:27:26 -03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (syntax.top() == '"')
|
|
|
|
{
|
|
|
|
if (entityName != "")
|
|
|
|
{
|
|
|
|
entityName += " ";
|
|
|
|
}
|
|
|
|
entityName += getLexer()->getIdentifier();
|
|
|
|
}
|
|
|
|
getNextToken();
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2022-11-09 16:49:58 -03:00
|
|
|
if (getCurrentToken() == ')')
|
|
|
|
{
|
2022-11-22 11:34:07 -03:00
|
|
|
// closing parenthesis found, make sure we have everything needed
|
|
|
|
if (verb == "")
|
|
|
|
{
|
|
|
|
throw obelisk::ParserException("verb is empty");
|
|
|
|
}
|
|
|
|
|
|
|
|
if (leftEntities.size() == 0)
|
|
|
|
{
|
2022-12-13 17:35:14 -03:00
|
|
|
throw obelisk::ParserException("missing left side entities");
|
2022-11-22 11:34:07 -03:00
|
|
|
}
|
|
|
|
|
|
|
|
if (rightEntities.size() == 0)
|
|
|
|
{
|
2022-12-13 17:35:14 -03:00
|
|
|
throw obelisk::ParserException("missing right side entities");
|
2022-11-22 11:34:07 -03:00
|
|
|
}
|
2022-11-09 16:49:58 -03:00
|
|
|
break;
|
|
|
|
}
|
2022-11-08 22:27:26 -03:00
|
|
|
|
2022-11-09 16:49:58 -03:00
|
|
|
if (getCurrentToken() == '"')
|
|
|
|
{
|
2022-11-22 11:34:07 -03:00
|
|
|
throw obelisk::ParserException("unexpected '\"'");
|
2022-11-09 16:49:58 -03:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (getLexer()->getIdentifier() == "and")
|
|
|
|
{
|
|
|
|
getNextToken();
|
|
|
|
getEntity = true;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
verb = getLexer()->getIdentifier();
|
2022-11-30 22:44:35 -03:00
|
|
|
// TODO: make sure verb is alphabetic
|
2022-11-09 16:49:58 -03:00
|
|
|
getEntity = true;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
2022-11-08 22:27:26 -03:00
|
|
|
}
|
2022-11-09 16:49:58 -03:00
|
|
|
|
2022-11-26 00:32:06 -03:00
|
|
|
for (auto& leftEntity : leftEntities)
|
|
|
|
{
|
|
|
|
for (auto& rightEntity : rightEntities)
|
|
|
|
{
|
2022-12-13 17:35:14 -03:00
|
|
|
facts.push_back(
|
|
|
|
obelisk::Fact(obelisk::Entity(leftEntity), obelisk::Entity(rightEntity), obelisk::Verb(verb)));
|
2022-11-26 00:32:06 -03:00
|
|
|
}
|
|
|
|
}
|
2022-11-08 22:27:26 -03:00
|
|
|
}
|
|
|
|
|
2022-11-26 00:32:06 -03:00
|
|
|
void obelisk::Parser::handleAction(std::unique_ptr<obelisk::KnowledgeBase>& kb)
|
2022-11-08 22:27:26 -03:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2022-11-26 00:32:06 -03:00
|
|
|
void obelisk::Parser::handleRule(std::unique_ptr<obelisk::KnowledgeBase>& kb)
|
2022-11-08 22:27:26 -03:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2022-11-26 00:32:06 -03:00
|
|
|
void obelisk::Parser::handleFact(std::unique_ptr<obelisk::KnowledgeBase>& kb)
|
2022-11-08 22:27:26 -03:00
|
|
|
{
|
2022-11-26 00:32:06 -03:00
|
|
|
std::vector<obelisk::Fact> facts;
|
|
|
|
parseFact(facts);
|
|
|
|
|
|
|
|
int verbId = 0;
|
|
|
|
for (auto& fact : facts)
|
|
|
|
{
|
|
|
|
std::vector<obelisk::Entity> entities {fact.getLeftEntity()};
|
|
|
|
kb->addEntities(entities);
|
|
|
|
fact.setLeftEntity(entities.front());
|
|
|
|
|
2022-11-27 23:58:40 -03:00
|
|
|
// the id was not inserted, so check if it exists in the database
|
|
|
|
if (fact.getLeftEntity().getId() == 0)
|
|
|
|
{
|
|
|
|
obelisk::Entity entity = fact.getLeftEntity();
|
|
|
|
kb->getEntity(entity);
|
|
|
|
if (entity.getId() == 0)
|
|
|
|
{
|
2022-12-13 17:35:14 -03:00
|
|
|
throw obelisk::ParserException("left entity could not be inserted into the database");
|
2022-11-27 23:58:40 -03:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
fact.setLeftEntity(entity);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-26 00:32:06 -03:00
|
|
|
entities = {fact.getRightEntity()};
|
|
|
|
kb->addEntities(entities);
|
|
|
|
fact.setRightEntity(entities.front());
|
|
|
|
|
2022-11-27 23:58:40 -03:00
|
|
|
if (fact.getRightEntity().getId() == 0)
|
|
|
|
{
|
|
|
|
obelisk::Entity entity = fact.getRightEntity();
|
|
|
|
kb->getEntity(entity);
|
|
|
|
if (entity.getId() == 0)
|
|
|
|
{
|
2022-12-13 17:35:14 -03:00
|
|
|
throw obelisk::ParserException("right entity could not be inserted into the database");
|
2022-11-27 23:58:40 -03:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
fact.setRightEntity(entity);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-26 00:32:06 -03:00
|
|
|
if (verbId == 0)
|
|
|
|
{
|
|
|
|
std::vector<obelisk::Verb> verbs = {fact.getVerb()};
|
|
|
|
kb->addVerbs(verbs);
|
|
|
|
if (verbs.front().getId() != 0)
|
|
|
|
{
|
|
|
|
fact.setVerb(verbs.front());
|
|
|
|
verbId = fact.getVerb().getId();
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2022-11-30 22:44:35 -03:00
|
|
|
obelisk::Verb verb = fact.getVerb();
|
|
|
|
kb->getVerb(verb);
|
|
|
|
if (verb.getId() == 0)
|
|
|
|
{
|
2022-12-13 17:35:14 -03:00
|
|
|
throw obelisk::ParserException("verb could not be inserted into the database");
|
2022-11-30 22:44:35 -03:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
fact.setVerb(verb);
|
|
|
|
verbId = fact.getVerb().getId();
|
|
|
|
}
|
2022-11-26 00:32:06 -03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
fact.getVerb().setId(verbId);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<obelisk::Fact> facts {fact};
|
|
|
|
kb->addFacts(facts);
|
|
|
|
fact = facts.front();
|
2022-11-30 22:44:35 -03:00
|
|
|
|
|
|
|
if (fact.getId() == 0)
|
|
|
|
{
|
|
|
|
kb->getFact(fact);
|
|
|
|
if (fact.getId() == 0)
|
|
|
|
{
|
2022-12-13 17:35:14 -03:00
|
|
|
throw obelisk::ParserException("fact could not be inserted into the database");
|
2022-11-30 22:44:35 -03:00
|
|
|
}
|
|
|
|
}
|
2022-11-26 00:32:06 -03:00
|
|
|
}
|
2022-11-08 22:27:26 -03:00
|
|
|
}
|
2022-11-26 00:32:06 -03:00
|
|
|
|
|
|
|
// fact("chris cromer" and "martin" and "Isabella" can "program" and "speak english");
|