diff --git a/out/Biblia.txt b/Biblia.txt similarity index 99% rename from out/Biblia.txt rename to Biblia.txt index 9dbd905..318e045 100644 --- a/out/Biblia.txt +++ b/Biblia.txt @@ -7711,4 +7711,4 @@ La venida de Cristo est 18 Yo testifico a todo aquel que oye las palabras de la profecía de este libro: Si alguno añadiere a estas cosas, Dios traerá sobre él las plagas que están escritas en este libro. 19 Y si alguno quitare de las palabras del libro de esta profecía, Dios quitará su parte del libro de la vida, y de la santa ciudad y de las cosas que están escritas en este libro. 20 El que da testimonio de estas cosas dice: Ciertamente vengo en breve. Amén; sí, ven, Señor Jesús. -21 La gracia de nuestro Señor Jesucristo sea con todos vosotros. Amén. \ No newline at end of file +21 La gracia de nuestro Señor Jesucristo sea con todos vosotros. Amén. diff --git a/Makefile b/Makefile index c506fa9..19bd1f1 100644 --- a/Makefile +++ b/Makefile @@ -3,23 +3,34 @@ BINDIR=out DEBUG_FLAGS=-Wall -Werror -CFLAGS=$(DEBUG_FLAGS) -ggdb $(shell pkg-config libxml-2.0 --cflags) -ansi +CFLAGS=$(DEBUG_FLAGS) -ggdb -Wconversion -std=c11 -D_DEFAULT_SOURCE $(shell pkg-config libxml-2.0 --cflags) -ansi LDFLAGS=$(shell pkg-config libxml-2.0 --libs) + +#CPPFLAGS +# DEBUG: show debug information during runtime +# DEBUG_ENCODING: show debug information related to encoding at runtime + +CPPFLAGS=-DDEBUG BINARY=generarxml OBJS = \ main.o \ - readconfig.o + readconfig.o \ + readfile.o \ + encoding.o \ + string.o all: bin bin: $(OBJS) - $(CC) $(xml2-config --cflags --libs) $(CFLAGS) $(OBJS) $(LDFLAGS) \ - -o $(BINDIR)/$(BINARY) + $(CC)$(xml2-config --cflags --libs) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(OBJS) -o $(BINDIR)/$(BINARY) + cp *.conf $(BINDIR)/ + cp *.dtd $(BINDIR)/ + cp Biblia.txt $(BINDIR)/ clean: - rm -fR $(BINDIR)/$(BINARY) *.o + rm -fR $(BINDIR)/* *.o %.o: %.c - $(CC) $(xml2-config --cflags --libs) $(CFLAGS) -c $< -o $@ $(LDFLAGS) + $(CC)$(xml2-config --cflags --libs) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) -c $< -o $@ diff --git a/README b/README new file mode 100644 index 0000000..5f3d277 --- /dev/null +++ b/README @@ -0,0 +1,4 @@ +This program requires the following: +libxml2 +glibc compiled with iconv support +gcc compiled with C99 diff --git a/out/chris.conf b/chris.conf similarity index 89% rename from out/chris.conf rename to chris.conf index ec33a5c..223344e 100644 --- a/out/chris.conf +++ b/chris.conf @@ -6,6 +6,6 @@ SAN MATEO MATEO - 16-20 + 1-2 diff --git a/out/config.dtd b/config.dtd similarity index 100% rename from out/config.dtd rename to config.dtd diff --git a/encoding.c b/encoding.c new file mode 100644 index 0000000..c947306 --- /dev/null +++ b/encoding.c @@ -0,0 +1,102 @@ +#include +#include +#include +#include + +/* + * This function converts the latin1 ISO_8859-1 encoding into utf-8. + */ +char *latin2utf8(char *input) { + if (input && strlen(input) > 0) { + iconv_t cd = iconv_open("UTF-8", "ISO_8859-1"); + + char temp[strlen(input)]; + memcpy(temp, input, strlen(input) + 1); + + char *in_buf = &temp[0]; + size_t in_left = sizeof(temp); + + /* The more tildes and ñs that the string has, the longer the size needs to be. + * if the string is 100% of the aforementioned symbols, we will need a string of + * double the size. + */ + char output[strlen(temp) * 2]; + char *out_buf = &output[0]; + size_t out_left = sizeof(output); + + do { + if (iconv(cd, &in_buf, &in_left, &out_buf, &out_left) == (size_t) -1) { + #ifdef DEBUG + printf("%s\n", input); + perror("iconv"); + #endif + iconv_close(cd); + return NULL; + } + } + while (in_left > 0 && out_left > 0); + *out_buf = 0; + + #if defined(DEBUG) && defined(DEBUG_ENCODING) + printf("latin2utf8: input: %s\n", input); + printf("latin2utf8: output: %s\n", output); + #endif + + char *temp2 = (char *) malloc((strlen(output) + 1) * sizeof(char)); + memcpy(temp2, output, strlen(output) + 1); + + iconv_close(cd); + + return temp2; + } + return NULL; +} + +/* + * This function converts the utf-8 encoding into latin1 ISO_8859-1. + */ +char *utf82latin(char *input) { + if (input && strlen(input) > 0) { + iconv_t cd = iconv_open("ISO_8859-1", "UTF-8"); + + char temp[strlen(input)]; + memcpy(temp, input, strlen(input) + 1); + + char *in_buf = &temp[0]; + size_t in_left = sizeof(temp); + + /* The more tildes and ñs that the string has, the longer the size needs to be. + * if the string is 100% of the aforementioned symbols, we will need a string of + * double the size. + */ + char output[strlen(temp) * 2]; + char *out_buf = &output[0]; + size_t out_left = sizeof(output); + + do { + if (iconv(cd, &in_buf, &in_left, &out_buf, &out_left) == (size_t) -1) { + #ifdef DEBUG + printf("%s\n", input); + perror("iconv"); + #endif + iconv_close(cd); + return NULL; + } + } + while (in_left > 0 && out_left > 0); + *out_buf = 0; + + #if defined(DEBUG) && defined(DEBUG_ENCODING) + printf("latin2utf8: input: %s\n", input); + printf("latin2utf8: output: %s\n", output); + #endif + + char *temp2 = (char *) malloc((strlen(output) + 1) * sizeof(char)); + memcpy(temp2, output, strlen(output) + 1); + + iconv_close(cd); + + return temp2; + } + return NULL; +} diff --git a/encoding.h b/encoding.h new file mode 100644 index 0000000..96a1193 --- /dev/null +++ b/encoding.h @@ -0,0 +1,2 @@ +char *latin2utf8(char *input); +char *utf82latin(char *input); diff --git a/main.c b/main.c index 6e5e998..6eeca9b 100644 --- a/main.c +++ b/main.c @@ -4,6 +4,7 @@ #include #include "main.h" #include "readconfig.h" +#include "readfile.h" #ifdef LIBXML_TREE_ENABLED @@ -11,9 +12,11 @@ * This program is designed to take a text file and convert part of it into xml. */ int main(int argc, char **argv) { - atexit(cleanup); - char *config_file = NULL; + int status; + int i; + + atexit(cleanup); if (argc == 1) { /* No arguments were passed */ @@ -22,7 +25,6 @@ int main(int argc, char **argv) { } /* Read the command line arguments */ - int i; for (i = 1; i < argc; i++) { if ((strcmp(argv[1], "-c") == 0 || strcmp(argv[1], "--config") == 0) && config_file == NULL) { i++; @@ -46,7 +48,7 @@ int main(int argc, char **argv) { config->chapter = NULL; config->chapter_numbers = NULL; - int status = readconfig(config_file, config); + status = readconfig(config_file, config); free(config_file); config_file = NULL; if (status != 0) { @@ -60,6 +62,12 @@ int main(int argc, char **argv) { printf("\tNombre de capitulo: %s\n", config->chapter); printf("\tNumeros de capitulo: %s\n", config->chapter_numbers); + status = readfile(config); + if (status != 0) { + printf("Falló leer Biblia.txt!\n"); + return 1; + } + free(config->file); free(config->bible); free(config->book); @@ -120,5 +128,5 @@ void printusage(int error) { } printf("usage: generarxml [opciones] \n"); - printf(" -s, --config archivo de configuración\n"); + printf(" -c, --config archivo de configuración\n"); } diff --git a/readconfig.c b/readconfig.c index 001a233..3afb6fe 100644 --- a/readconfig.c +++ b/readconfig.c @@ -11,14 +11,14 @@ * config struct. */ int readconfig(char *config_file, CONFIG *config) { - /* Initilize the library */ - LIBXML_TEST_VERSION - xmlParserCtxtPtr context; xmlDocPtr config_xml = NULL; xmlNodePtr root = NULL; xmlNodePtr node = NULL; xmlNodePtr subnode = NULL; + + /* Initilize the library */ + LIBXML_TEST_VERSION context = xmlNewParserCtxt(); if (context == NULL) { @@ -26,7 +26,7 @@ int readconfig(char *config_file, CONFIG *config) { return 1; } - config_xml = xmlCtxtReadFile(context, config_file, NULL, XML_PARSE_DTDVALID); + config_xml = xmlCtxtReadFile(context, config_file, "UTF-8", XML_PARSE_DTDVALID); if (config_xml == NULL) { fprintf(stderr, "Falló analizar %s\n", config_file); xmlFreeParserCtxt(context); @@ -57,7 +57,7 @@ int readconfig(char *config_file, CONFIG *config) { xmlFree(file); } else if ((!xmlStrcmp(node->name, (const xmlChar *) "bible"))){ - xmlChar *bible = xmlNodeListGetString(config_xml, node->xmlChildrenNode, 1); + xmlChar *bible = xmlNodeListGetRawString(config_xml, node->xmlChildrenNode, 1); config->bible = (char *) malloc(strlen((char *) bible) + 1 * sizeof(char)); strcpy(config->bible, (char *) bible); xmlFree(bible); diff --git a/readfile.c b/readfile.c new file mode 100644 index 0000000..11e5e5c --- /dev/null +++ b/readfile.c @@ -0,0 +1,110 @@ +#include +#include +#include +#include +#include "main.h" +#include "readfile.h" +#include "encoding.h" +#include "string.h" + +int readfile(CONFIG *config) { + FILE *file = NULL; + int start; + int end; + int i = 0; + char *line = NULL; + char **array = NULL; + size_t n = 0; + ssize_t nchr = 0; + size_t idx = 0; + size_t it = 0; + size_t new_max = MAX_LINES; + + char *ch = strtok(config->chapter_numbers, "-"); + while (ch != NULL) { + if (i == 0) { + start = atoi(ch); + i++; + } + else { + end = atoi(ch); + } + ch = strtok(NULL, "-"); + } + + #ifdef DEBUG + printf("Start chapter: %d\nEnd chapter: %d\n", start, end); + #endif + + i = 0; + + file = fopen("Biblia.txt", "r"); + if (file == NULL) { + perror("fopen"); + return 1; + } + + if (!(array = calloc(new_max, sizeof(*array)))) { + fprintf(stderr, "allocación de memoria falló."); + return 1; + } + + while ((nchr = getline(&line, &n, file)) != -1) { + while (nchr > 0 && (line[nchr - 1] == '\n' || line[nchr - 1] == '\r' || line[nchr - 1] == ' ' || line[nchr - 1] == '\t')) { + /* remove whitespace from end of the line */ + line[nchr--] = 0; + } + line[nchr] = '\0'; + array[idx++] = strdup(line); + + /* not enough memory for more lines, time to allocate more memory */ + if (idx == new_max) { + new_max = new_max * 2; + char **tmp = realloc(array, new_max * sizeof(*array)); + if (!tmp) { + fprintf(stderr, "reallocación de memoria falló."); + return 1; + } + array = tmp; + } + } + + /* free the extra unused memory */ + if (new_max > idx) { + char **tmp = realloc(array, idx * sizeof(*array)); + if (!tmp) { + fprintf(stderr, "reallocación de memoria falló."); + return 1; + } + array = tmp; + } + + if (file) { + fclose(file); + } + if (line) { + free(line); + } + + printf ("\nLines in file:\n\n"); + for (it = 0; it < 20; it++) { + line = latin2utf8(array[it]); + /* printf(" array [%lu] %s\n", (long) it, line); */ + if (line != NULL) { + if (strcmp(line, config->bible) == 0) { + printf("Bible match: %d -> %s\n", (int) it + 1, line); + } + if (strcmp(line, config->book) == 0) { + printf("Book match: %d -> %s\n", (int) it + 1, line); + } + } + free(line); + } + + for (it = 0; it < idx; it++) { + free(array[it]); + } + free(array); + + return 0; +} diff --git a/readfile.h b/readfile.h new file mode 100644 index 0000000..d0e2b73 --- /dev/null +++ b/readfile.h @@ -0,0 +1,2 @@ +#define MAX_LINES 100 +int readfile(CONFIG *config); diff --git a/string.c b/string.c new file mode 100644 index 0000000..8343a73 --- /dev/null +++ b/string.c @@ -0,0 +1,27 @@ +#include +#include + +char *trim(char *str) { + char *end; + + /* Trim leading space */ + while (isspace((unsigned char) *str)) { + str++; + } + + if(*str == 0) { + /* All spaces? */ + return str; + } + + /* Trim trailing space */ + end = str + strlen(str) - 1; + while (end > str && (isspace((unsigned char) *end) || *end == '\r' || *end == '\n')) { + end--; + } + + /* Write new null terminator */ + *(end + 1) = 0; + + return str; +} diff --git a/string.h b/string.h new file mode 100644 index 0000000..ae049a7 --- /dev/null +++ b/string.h @@ -0,0 +1 @@ +char *trim(char *str);