From 92ec34a00040b50c959682e4d2dbb2a82e8d7b9d Mon Sep 17 00:00:00 2001 From: Chris Cromer Date: Mon, 13 Feb 2023 23:03:31 -0300 Subject: [PATCH] read obelisk source files and compile the knowledge base --- src/lexer.cpp | 29 +++++++++++++++++++------- src/lexer.h | 22 ++++++++++++++++++- src/models/fact.cpp | 2 +- src/obelisk.cpp | 51 ++++++++++++++++++++++++++++++++------------- src/obelisk.h | 2 +- src/parser.cpp | 13 ++++++------ src/parser.h | 12 +++++++---- 7 files changed, 95 insertions(+), 36 deletions(-) diff --git a/src/lexer.cpp b/src/lexer.cpp index 5627689..32509b1 100644 --- a/src/lexer.cpp +++ b/src/lexer.cpp @@ -2,20 +2,33 @@ #include +obelisk::Lexer::Lexer(const std::string& sourceFile) +{ + fileStream_.open(sourceFile, std::ifstream::in); + if (!fileStream_) + { + throw obelisk::LexerException("could not open source file " + sourceFile); + } +} + +obelisk::Lexer::~Lexer() +{ + fileStream_.close(); + fileStream_.clear(); +} + int obelisk::Lexer::getToken() { - static int lastChar = ' '; - while (isspace(lastChar)) { - lastChar = std::getc(stdin); + lastChar = fileStream_.get(); } if (isalpha(lastChar)) { eraseIdentifier(); appendIdentifier(lastChar); - while (isalnum((lastChar = getchar()))) + while (isalnum((lastChar = fileStream_.get()))) { appendIdentifier(lastChar); } @@ -63,7 +76,7 @@ int obelisk::Lexer::getToken() firstPeriod = true; } numberStr += lastChar; - lastChar = getchar(); + lastChar = fileStream_.get(); } while (isdigit(lastChar) || lastChar == '.'); @@ -83,7 +96,7 @@ int obelisk::Lexer::getToken() } else if (lastChar == '/') { - lastChar = getchar(); + lastChar = fileStream_.get(); if (lastChar == '/') { commentLine(&lastChar); @@ -101,7 +114,7 @@ int obelisk::Lexer::getToken() } int thisChar = lastChar; - lastChar = getchar(); + lastChar = fileStream_.get(); return thisChar; } @@ -109,7 +122,7 @@ void obelisk::Lexer::commentLine(int* lastChar) { do { - *lastChar = getchar(); + *lastChar = fileStream_.get(); } while (*lastChar != EOF && *lastChar != '\n' && *lastChar != '\r'); } diff --git a/src/lexer.h b/src/lexer.h index 83ec706..4562611 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -1,6 +1,7 @@ #ifndef OBELISK_LEXER_H #define OBELISK_LEXER_H +#include #include namespace obelisk @@ -12,6 +13,12 @@ namespace obelisk class Lexer { private: + int lastChar = ' '; + /** + * @brief The stream of the source file being read. + * + */ + std::ifstream fileStream_; /** * @brief The last found identifier. * @@ -21,7 +28,7 @@ namespace obelisk * @brief The last found number. * */ - double numberValue_; + double numberValue_ = 0; /** * @brief Set the identifier. @@ -113,6 +120,19 @@ namespace obelisk kTokenString = -9 }; + /** + * @brief Construct a new Lexer object. + * + * @param[in] sourceFile The source file to read. + */ + Lexer(const std::string& sourceFile); + + /** + * @brief Destroy the Lexer object. + * + */ + ~Lexer(); + /** * @brief Gets the next token in the source code. * diff --git a/src/models/fact.cpp b/src/models/fact.cpp index d517286..8011bcc 100644 --- a/src/models/fact.cpp +++ b/src/models/fact.cpp @@ -7,8 +7,8 @@ const char* obelisk::Fact::createTable() CREATE TABLE "fact" ( "id" INTEGER NOT NULL UNIQUE, "left_entity" INTEGER NOT NULL, - "right_entity" INTEGER NOT NULL, "verb" INTEGER NOT NULL, + "right_entity" INTEGER NOT NULL, "is_true" INTEGER NOT NULL DEFAULT 0, PRIMARY KEY("id" AUTOINCREMENT), UNIQUE("left_entity", "right_entity", "verb") diff --git a/src/obelisk.cpp b/src/obelisk.cpp index d648d1b..8d64044 100644 --- a/src/obelisk.cpp +++ b/src/obelisk.cpp @@ -9,9 +9,8 @@ #include #include -static int obelisk::mainLoop(const std::vector& sourceFiles, const std::string& kbFile) +int obelisk::mainLoop(const std::vector& sourceFiles, const std::string& kbFile) { - auto parser = std::unique_ptr {new obelisk::Parser()}; std::unique_ptr kb; try @@ -24,8 +23,20 @@ static int obelisk::mainLoop(const std::vector& sourceFiles, const return EXIT_FAILURE; } - // Prime the first token. - fprintf(stderr, "ready> "); + size_t file = 0; + std::shared_ptr lexer; + try + { + lexer = std::shared_ptr {new obelisk::Lexer(sourceFiles[file++])}; + } + catch (obelisk::LexerException& exception) + { + std::cout << exception.what() << std::endl; + return EXIT_FAILURE; + } + auto parser = std::unique_ptr {new obelisk::Parser(lexer)}; + + // prime the first token try { parser->getNextToken(); @@ -38,14 +49,29 @@ static int obelisk::mainLoop(const std::vector& sourceFiles, const while (true) { - fprintf(stderr, "ready> "); switch (parser->getCurrentToken()) { case obelisk::Lexer::kTokenEof : - return EXIT_SUCCESS; - case ';' : // ignore top-level semicolons. - std::cout << "Identifier: " << parser->getLexer()->getIdentifier() << std::endl; - std::cout << "Num: " << parser->getLexer()->getNumberValue() << std::endl; + // end of source file found, create a new lexer and pass it to the parser to use + if (file >= sourceFiles.size()) + { + return EXIT_SUCCESS; + } + try + { + lexer = std::shared_ptr {new obelisk::Lexer(sourceFiles[file++])}; + parser->setLexer(lexer); + // prime the first token in the parser + parser->getNextToken(); + } + catch (obelisk::LexerException& exception) + { + std::cout << exception.what() << std::endl; + return EXIT_FAILURE; + } + break; + case ';' : + // semicolon found, the end of a statement try { parser->getNextToken(); @@ -74,7 +100,7 @@ static int obelisk::mainLoop(const std::vector& sourceFiles, const return EXIT_SUCCESS; } -void obelisk::showUsage() +static void obelisk::showUsage() { std::cout << obelisk::usageMessage << std::endl; } @@ -126,10 +152,5 @@ int main(int argc, char** argv) return EXIT_FAILURE; } - std::cout << sourceFiles[0] << std::endl; - std::cout << kbFile << std::endl; - - return 0; - return obelisk::mainLoop(sourceFiles, kbFile); } diff --git a/src/obelisk.h b/src/obelisk.h index 514ba1b..e3abfcb 100644 --- a/src/obelisk.h +++ b/src/obelisk.h @@ -42,5 +42,5 @@ Options: * * @return int Returns EXIT_SUCCESS or EXIT_FAILURE. */ - static int mainLoop(const std::vector &sourceFiles, const std::string &kbFile); + int mainLoop(const std::vector &sourceFiles, const std::string &kbFile); } // namespace obelisk diff --git a/src/parser.cpp b/src/parser.cpp index 41431db..c420a70 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -11,16 +11,17 @@ #include #include -obelisk::Parser::Parser() -{ - lexer_ = std::unique_ptr {new obelisk::Lexer()}; -} - -std::unique_ptr& obelisk::Parser::getLexer() +std::shared_ptr obelisk::Parser::getLexer() { return lexer_; } +void obelisk::Parser::setLexer(std::shared_ptr lexer) +{ + lexer_ = lexer; + currentToken_ = 0; +} + int obelisk::Parser::getNextToken() { try diff --git a/src/parser.h b/src/parser.h index 00aa208..19b4a8b 100644 --- a/src/parser.h +++ b/src/parser.h @@ -15,8 +15,8 @@ namespace obelisk class Parser { private: - std::unique_ptr lexer_; - int currentToken_; + std::shared_ptr lexer_; + int currentToken_ = 0; void setCurrentToken(int currentToken); @@ -37,9 +37,13 @@ namespace obelisk void parseFact(std::vector& facts); public: - Parser(); + Parser(std::shared_ptr lexer) : + lexer_(lexer) + { + } - std::unique_ptr& getLexer(); + std::shared_ptr getLexer(); + void setLexer(std::shared_ptr lexer); int getCurrentToken();