read obelisk source files and compile the knowledge base
This commit is contained in:
parent
e77fb73cd0
commit
92ec34a000
@ -2,20 +2,33 @@
|
|||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
|
obelisk::Lexer::Lexer(const std::string& sourceFile)
|
||||||
|
{
|
||||||
|
fileStream_.open(sourceFile, std::ifstream::in);
|
||||||
|
if (!fileStream_)
|
||||||
|
{
|
||||||
|
throw obelisk::LexerException("could not open source file " + sourceFile);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
obelisk::Lexer::~Lexer()
|
||||||
|
{
|
||||||
|
fileStream_.close();
|
||||||
|
fileStream_.clear();
|
||||||
|
}
|
||||||
|
|
||||||
int obelisk::Lexer::getToken()
|
int obelisk::Lexer::getToken()
|
||||||
{
|
{
|
||||||
static int lastChar = ' ';
|
|
||||||
|
|
||||||
while (isspace(lastChar))
|
while (isspace(lastChar))
|
||||||
{
|
{
|
||||||
lastChar = std::getc(stdin);
|
lastChar = fileStream_.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isalpha(lastChar))
|
if (isalpha(lastChar))
|
||||||
{
|
{
|
||||||
eraseIdentifier();
|
eraseIdentifier();
|
||||||
appendIdentifier(lastChar);
|
appendIdentifier(lastChar);
|
||||||
while (isalnum((lastChar = getchar())))
|
while (isalnum((lastChar = fileStream_.get())))
|
||||||
{
|
{
|
||||||
appendIdentifier(lastChar);
|
appendIdentifier(lastChar);
|
||||||
}
|
}
|
||||||
@ -63,7 +76,7 @@ int obelisk::Lexer::getToken()
|
|||||||
firstPeriod = true;
|
firstPeriod = true;
|
||||||
}
|
}
|
||||||
numberStr += lastChar;
|
numberStr += lastChar;
|
||||||
lastChar = getchar();
|
lastChar = fileStream_.get();
|
||||||
}
|
}
|
||||||
while (isdigit(lastChar) || lastChar == '.');
|
while (isdigit(lastChar) || lastChar == '.');
|
||||||
|
|
||||||
@ -83,7 +96,7 @@ int obelisk::Lexer::getToken()
|
|||||||
}
|
}
|
||||||
else if (lastChar == '/')
|
else if (lastChar == '/')
|
||||||
{
|
{
|
||||||
lastChar = getchar();
|
lastChar = fileStream_.get();
|
||||||
if (lastChar == '/')
|
if (lastChar == '/')
|
||||||
{
|
{
|
||||||
commentLine(&lastChar);
|
commentLine(&lastChar);
|
||||||
@ -101,7 +114,7 @@ int obelisk::Lexer::getToken()
|
|||||||
}
|
}
|
||||||
|
|
||||||
int thisChar = lastChar;
|
int thisChar = lastChar;
|
||||||
lastChar = getchar();
|
lastChar = fileStream_.get();
|
||||||
return thisChar;
|
return thisChar;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -109,7 +122,7 @@ void obelisk::Lexer::commentLine(int* lastChar)
|
|||||||
{
|
{
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
*lastChar = getchar();
|
*lastChar = fileStream_.get();
|
||||||
}
|
}
|
||||||
while (*lastChar != EOF && *lastChar != '\n' && *lastChar != '\r');
|
while (*lastChar != EOF && *lastChar != '\n' && *lastChar != '\r');
|
||||||
}
|
}
|
||||||
|
22
src/lexer.h
22
src/lexer.h
@ -1,6 +1,7 @@
|
|||||||
#ifndef OBELISK_LEXER_H
|
#ifndef OBELISK_LEXER_H
|
||||||
#define OBELISK_LEXER_H
|
#define OBELISK_LEXER_H
|
||||||
|
|
||||||
|
#include <fstream>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
namespace obelisk
|
namespace obelisk
|
||||||
@ -12,6 +13,12 @@ namespace obelisk
|
|||||||
class Lexer
|
class Lexer
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
|
int lastChar = ' ';
|
||||||
|
/**
|
||||||
|
* @brief The stream of the source file being read.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
std::ifstream fileStream_;
|
||||||
/**
|
/**
|
||||||
* @brief The last found identifier.
|
* @brief The last found identifier.
|
||||||
*
|
*
|
||||||
@ -21,7 +28,7 @@ namespace obelisk
|
|||||||
* @brief The last found number.
|
* @brief The last found number.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
double numberValue_;
|
double numberValue_ = 0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Set the identifier.
|
* @brief Set the identifier.
|
||||||
@ -113,6 +120,19 @@ namespace obelisk
|
|||||||
kTokenString = -9
|
kTokenString = -9
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Construct a new Lexer object.
|
||||||
|
*
|
||||||
|
* @param[in] sourceFile The source file to read.
|
||||||
|
*/
|
||||||
|
Lexer(const std::string& sourceFile);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Destroy the Lexer object.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
~Lexer();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Gets the next token in the source code.
|
* @brief Gets the next token in the source code.
|
||||||
*
|
*
|
||||||
|
@ -7,8 +7,8 @@ const char* obelisk::Fact::createTable()
|
|||||||
CREATE TABLE "fact" (
|
CREATE TABLE "fact" (
|
||||||
"id" INTEGER NOT NULL UNIQUE,
|
"id" INTEGER NOT NULL UNIQUE,
|
||||||
"left_entity" INTEGER NOT NULL,
|
"left_entity" INTEGER NOT NULL,
|
||||||
"right_entity" INTEGER NOT NULL,
|
|
||||||
"verb" INTEGER NOT NULL,
|
"verb" INTEGER NOT NULL,
|
||||||
|
"right_entity" INTEGER NOT NULL,
|
||||||
"is_true" INTEGER NOT NULL DEFAULT 0,
|
"is_true" INTEGER NOT NULL DEFAULT 0,
|
||||||
PRIMARY KEY("id" AUTOINCREMENT),
|
PRIMARY KEY("id" AUTOINCREMENT),
|
||||||
UNIQUE("left_entity", "right_entity", "verb")
|
UNIQUE("left_entity", "right_entity", "verb")
|
||||||
|
@ -9,9 +9,8 @@
|
|||||||
#include <limits>
|
#include <limits>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
static int obelisk::mainLoop(const std::vector<std::string>& sourceFiles, const std::string& kbFile)
|
int obelisk::mainLoop(const std::vector<std::string>& sourceFiles, const std::string& kbFile)
|
||||||
{
|
{
|
||||||
auto parser = std::unique_ptr<obelisk::Parser> {new obelisk::Parser()};
|
|
||||||
std::unique_ptr<obelisk::KnowledgeBase> kb;
|
std::unique_ptr<obelisk::KnowledgeBase> kb;
|
||||||
|
|
||||||
try
|
try
|
||||||
@ -24,8 +23,20 @@ static int obelisk::mainLoop(const std::vector<std::string>& sourceFiles, const
|
|||||||
return EXIT_FAILURE;
|
return EXIT_FAILURE;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Prime the first token.
|
size_t file = 0;
|
||||||
fprintf(stderr, "ready> ");
|
std::shared_ptr<obelisk::Lexer> lexer;
|
||||||
|
try
|
||||||
|
{
|
||||||
|
lexer = std::shared_ptr<obelisk::Lexer> {new obelisk::Lexer(sourceFiles[file++])};
|
||||||
|
}
|
||||||
|
catch (obelisk::LexerException& exception)
|
||||||
|
{
|
||||||
|
std::cout << exception.what() << std::endl;
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
auto parser = std::unique_ptr<obelisk::Parser> {new obelisk::Parser(lexer)};
|
||||||
|
|
||||||
|
// prime the first token
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
parser->getNextToken();
|
parser->getNextToken();
|
||||||
@ -38,14 +49,29 @@ static int obelisk::mainLoop(const std::vector<std::string>& sourceFiles, const
|
|||||||
|
|
||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "ready> ");
|
|
||||||
switch (parser->getCurrentToken())
|
switch (parser->getCurrentToken())
|
||||||
{
|
{
|
||||||
case obelisk::Lexer::kTokenEof :
|
case obelisk::Lexer::kTokenEof :
|
||||||
return EXIT_SUCCESS;
|
// end of source file found, create a new lexer and pass it to the parser to use
|
||||||
case ';' : // ignore top-level semicolons.
|
if (file >= sourceFiles.size())
|
||||||
std::cout << "Identifier: " << parser->getLexer()->getIdentifier() << std::endl;
|
{
|
||||||
std::cout << "Num: " << parser->getLexer()->getNumberValue() << std::endl;
|
return EXIT_SUCCESS;
|
||||||
|
}
|
||||||
|
try
|
||||||
|
{
|
||||||
|
lexer = std::shared_ptr<obelisk::Lexer> {new obelisk::Lexer(sourceFiles[file++])};
|
||||||
|
parser->setLexer(lexer);
|
||||||
|
// prime the first token in the parser
|
||||||
|
parser->getNextToken();
|
||||||
|
}
|
||||||
|
catch (obelisk::LexerException& exception)
|
||||||
|
{
|
||||||
|
std::cout << exception.what() << std::endl;
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case ';' :
|
||||||
|
// semicolon found, the end of a statement
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
parser->getNextToken();
|
parser->getNextToken();
|
||||||
@ -74,7 +100,7 @@ static int obelisk::mainLoop(const std::vector<std::string>& sourceFiles, const
|
|||||||
return EXIT_SUCCESS;
|
return EXIT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
void obelisk::showUsage()
|
static void obelisk::showUsage()
|
||||||
{
|
{
|
||||||
std::cout << obelisk::usageMessage << std::endl;
|
std::cout << obelisk::usageMessage << std::endl;
|
||||||
}
|
}
|
||||||
@ -126,10 +152,5 @@ int main(int argc, char** argv)
|
|||||||
return EXIT_FAILURE;
|
return EXIT_FAILURE;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout << sourceFiles[0] << std::endl;
|
|
||||||
std::cout << kbFile << std::endl;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
return obelisk::mainLoop(sourceFiles, kbFile);
|
return obelisk::mainLoop(sourceFiles, kbFile);
|
||||||
}
|
}
|
||||||
|
@ -42,5 +42,5 @@ Options:
|
|||||||
*
|
*
|
||||||
* @return int Returns EXIT_SUCCESS or EXIT_FAILURE.
|
* @return int Returns EXIT_SUCCESS or EXIT_FAILURE.
|
||||||
*/
|
*/
|
||||||
static int mainLoop(const std::vector<std::string> &sourceFiles, const std::string &kbFile);
|
int mainLoop(const std::vector<std::string> &sourceFiles, const std::string &kbFile);
|
||||||
} // namespace obelisk
|
} // namespace obelisk
|
||||||
|
@ -11,16 +11,17 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
obelisk::Parser::Parser()
|
std::shared_ptr<obelisk::Lexer> obelisk::Parser::getLexer()
|
||||||
{
|
|
||||||
lexer_ = std::unique_ptr<obelisk::Lexer> {new obelisk::Lexer()};
|
|
||||||
}
|
|
||||||
|
|
||||||
std::unique_ptr<obelisk::Lexer>& obelisk::Parser::getLexer()
|
|
||||||
{
|
{
|
||||||
return lexer_;
|
return lexer_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void obelisk::Parser::setLexer(std::shared_ptr<obelisk::Lexer> lexer)
|
||||||
|
{
|
||||||
|
lexer_ = lexer;
|
||||||
|
currentToken_ = 0;
|
||||||
|
}
|
||||||
|
|
||||||
int obelisk::Parser::getNextToken()
|
int obelisk::Parser::getNextToken()
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
|
12
src/parser.h
12
src/parser.h
@ -15,8 +15,8 @@ namespace obelisk
|
|||||||
class Parser
|
class Parser
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
std::unique_ptr<obelisk::Lexer> lexer_;
|
std::shared_ptr<obelisk::Lexer> lexer_;
|
||||||
int currentToken_;
|
int currentToken_ = 0;
|
||||||
|
|
||||||
void setCurrentToken(int currentToken);
|
void setCurrentToken(int currentToken);
|
||||||
|
|
||||||
@ -37,9 +37,13 @@ namespace obelisk
|
|||||||
void parseFact(std::vector<obelisk::Fact>& facts);
|
void parseFact(std::vector<obelisk::Fact>& facts);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Parser();
|
Parser(std::shared_ptr<obelisk::Lexer> lexer) :
|
||||||
|
lexer_(lexer)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
std::unique_ptr<obelisk::Lexer>& getLexer();
|
std::shared_ptr<obelisk::Lexer> getLexer();
|
||||||
|
void setLexer(std::shared_ptr<obelisk::Lexer> lexer);
|
||||||
|
|
||||||
int getCurrentToken();
|
int getCurrentToken();
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user