Browse Source

readfile

master
Chris Cromer 4 years ago
parent
commit
60f4d62594
13 changed files with 285 additions and 18 deletions
  1. +1
    -1
      Biblia.txt
  2. +17
    -6
      Makefile
  3. +4
    -0
      README
  4. +1
    -1
      chris.conf
  5. +0
    -0
      config.dtd
  6. +102
    -0
      encoding.c
  7. +2
    -0
      encoding.h
  8. +13
    -5
      main.c
  9. +5
    -5
      readconfig.c
  10. +110
    -0
      readfile.c
  11. +2
    -0
      readfile.h
  12. +27
    -0
      string.c
  13. +1
    -0
      string.h

out/Biblia.txt → Biblia.txt View File

@@ -7711,4 +7711,4 @@ La venida de Cristo est
18 Yo testifico a todo aquel que oye las palabras de la profecía de este libro: Si alguno añadiere a estas cosas, Dios traerá sobre él las plagas que están escritas en este libro.
19 Y si alguno quitare de las palabras del libro de esta profecía, Dios quitará su parte del libro de la vida, y de la santa ciudad y de las cosas que están escritas en este libro.
20 El que da testimonio de estas cosas dice: Ciertamente vengo en breve. Amén; sí, ven, Señor Jesús.
21 La gracia de nuestro Señor Jesucristo sea con todos vosotros. Amén.
21 La gracia de nuestro Señor Jesucristo sea con todos vosotros. Amén.

+ 17
- 6
Makefile View File

@@ -3,23 +3,34 @@ BINDIR=out

DEBUG_FLAGS=-Wall -Werror

CFLAGS=$(DEBUG_FLAGS) -ggdb $(shell pkg-config libxml-2.0 --cflags) -ansi
CFLAGS=$(DEBUG_FLAGS) -ggdb -Wconversion -std=c11 -D_DEFAULT_SOURCE $(shell pkg-config libxml-2.0 --cflags) -ansi
LDFLAGS=$(shell pkg-config libxml-2.0 --libs)

#CPPFLAGS
# DEBUG: show debug information during runtime
# DEBUG_ENCODING: show debug information related to encoding at runtime

CPPFLAGS=-DDEBUG
BINARY=generarxml

OBJS = \
main.o \
readconfig.o
readconfig.o \
readfile.o \
encoding.o \
string.o

all: bin

bin: $(OBJS)
$(CC) $(xml2-config --cflags --libs) $(CFLAGS) $(OBJS) $(LDFLAGS) \
-o $(BINDIR)/$(BINARY)
$(CC)$(xml2-config --cflags --libs) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(OBJS) -o $(BINDIR)/$(BINARY)
cp *.conf $(BINDIR)/
cp *.dtd $(BINDIR)/
cp Biblia.txt $(BINDIR)/

clean:
rm -fR $(BINDIR)/$(BINARY) *.o
rm -fR $(BINDIR)/* *.o

%.o: %.c
$(CC) $(xml2-config --cflags --libs) $(CFLAGS) -c $< -o $@ $(LDFLAGS)
$(CC)$(xml2-config --cflags --libs) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) -c $< -o $@

+ 4
- 0
README View File

@@ -0,0 +1,4 @@
This program requires the following:
libxml2
glibc compiled with iconv support
gcc compiled with C99

out/chris.conf → chris.conf View File

@@ -6,6 +6,6 @@
<book>SAN MATEO</book>
<chapter>
<name>MATEO</name>
<number>16-20</number>
<number>1-2</number>
</chapter>
</config>

out/config.dtd → config.dtd View File


+ 102
- 0
encoding.c View File

@@ -0,0 +1,102 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <iconv.h>

/*
* This function converts the latin1 ISO_8859-1 encoding into utf-8.
*/
char *latin2utf8(char *input) {
if (input && strlen(input) > 0) {
iconv_t cd = iconv_open("UTF-8", "ISO_8859-1");

char temp[strlen(input)];
memcpy(temp, input, strlen(input) + 1);

char *in_buf = &temp[0];
size_t in_left = sizeof(temp);

/* The more tildes and ñs that the string has, the longer the size needs to be.
* if the string is 100% of the aforementioned symbols, we will need a string of
* double the size.
*/
char output[strlen(temp) * 2];
char *out_buf = &output[0];
size_t out_left = sizeof(output);

do {
if (iconv(cd, &in_buf, &in_left, &out_buf, &out_left) == (size_t) -1) {
#ifdef DEBUG
printf("%s\n", input);
perror("iconv");
#endif
iconv_close(cd);
return NULL;
}
}
while (in_left > 0 && out_left > 0);
*out_buf = 0;

#if defined(DEBUG) && defined(DEBUG_ENCODING)
printf("latin2utf8: input: %s\n", input);
printf("latin2utf8: output: %s\n", output);
#endif

char *temp2 = (char *) malloc((strlen(output) + 1) * sizeof(char));
memcpy(temp2, output, strlen(output) + 1);

iconv_close(cd);

return temp2;
}
return NULL;
}

/*
* This function converts the utf-8 encoding into latin1 ISO_8859-1.
*/
char *utf82latin(char *input) {
if (input && strlen(input) > 0) {
iconv_t cd = iconv_open("ISO_8859-1", "UTF-8");

char temp[strlen(input)];
memcpy(temp, input, strlen(input) + 1);

char *in_buf = &temp[0];
size_t in_left = sizeof(temp);

/* The more tildes and ñs that the string has, the longer the size needs to be.
* if the string is 100% of the aforementioned symbols, we will need a string of
* double the size.
*/
char output[strlen(temp) * 2];
char *out_buf = &output[0];
size_t out_left = sizeof(output);

do {
if (iconv(cd, &in_buf, &in_left, &out_buf, &out_left) == (size_t) -1) {
#ifdef DEBUG
printf("%s\n", input);
perror("iconv");
#endif
iconv_close(cd);
return NULL;
}
}
while (in_left > 0 && out_left > 0);
*out_buf = 0;

#if defined(DEBUG) && defined(DEBUG_ENCODING)
printf("latin2utf8: input: %s\n", input);
printf("latin2utf8: output: %s\n", output);
#endif

char *temp2 = (char *) malloc((strlen(output) + 1) * sizeof(char));
memcpy(temp2, output, strlen(output) + 1);

iconv_close(cd);

return temp2;
}
return NULL;
}

+ 2
- 0
encoding.h View File

@@ -0,0 +1,2 @@
char *latin2utf8(char *input);
char *utf82latin(char *input);

+ 13
- 5
main.c View File

@@ -4,6 +4,7 @@
#include <libxml/tree.h>
#include "main.h"
#include "readconfig.h"
#include "readfile.h"

#ifdef LIBXML_TREE_ENABLED

@@ -11,9 +12,11 @@
* This program is designed to take a text file and convert part of it into xml.
*/
int main(int argc, char **argv) {
atexit(cleanup);

char *config_file = NULL;
int status;
int i;

atexit(cleanup);

if (argc == 1) {
/* No arguments were passed */
@@ -22,7 +25,6 @@ int main(int argc, char **argv) {
}
/* Read the command line arguments */
int i;
for (i = 1; i < argc; i++) {
if ((strcmp(argv[1], "-c") == 0 || strcmp(argv[1], "--config") == 0) && config_file == NULL) {
i++;
@@ -46,7 +48,7 @@ int main(int argc, char **argv) {
config->chapter = NULL;
config->chapter_numbers = NULL;

int status = readconfig(config_file, config);
status = readconfig(config_file, config);
free(config_file);
config_file = NULL;
if (status != 0) {
@@ -60,6 +62,12 @@ int main(int argc, char **argv) {
printf("\tNombre de capitulo: %s\n", config->chapter);
printf("\tNumeros de capitulo: %s\n", config->chapter_numbers);

status = readfile(config);
if (status != 0) {
printf("Falló leer Biblia.txt!\n");
return 1;
}

free(config->file);
free(config->bible);
free(config->book);
@@ -120,5 +128,5 @@ void printusage(int error) {
}

printf("usage: generarxml [opciones] \n");
printf(" -s, --config <archivo> archivo de configuración\n");
printf(" -c, --config <archivo> archivo de configuración\n");
}

+ 5
- 5
readconfig.c View File

@@ -11,14 +11,14 @@
* config struct.
*/
int readconfig(char *config_file, CONFIG *config) {
/* Initilize the library */
LIBXML_TEST_VERSION
xmlParserCtxtPtr context;
xmlDocPtr config_xml = NULL;
xmlNodePtr root = NULL;
xmlNodePtr node = NULL;
xmlNodePtr subnode = NULL;

/* Initilize the library */
LIBXML_TEST_VERSION
context = xmlNewParserCtxt();
if (context == NULL) {
@@ -26,7 +26,7 @@ int readconfig(char *config_file, CONFIG *config) {
return 1;
}

config_xml = xmlCtxtReadFile(context, config_file, NULL, XML_PARSE_DTDVALID);
config_xml = xmlCtxtReadFile(context, config_file, "UTF-8", XML_PARSE_DTDVALID);
if (config_xml == NULL) {
fprintf(stderr, "Falló analizar %s\n", config_file);
xmlFreeParserCtxt(context);
@@ -57,7 +57,7 @@ int readconfig(char *config_file, CONFIG *config) {
xmlFree(file);
}
else if ((!xmlStrcmp(node->name, (const xmlChar *) "bible"))){
xmlChar *bible = xmlNodeListGetString(config_xml, node->xmlChildrenNode, 1);
xmlChar *bible = xmlNodeListGetRawString(config_xml, node->xmlChildrenNode, 1);
config->bible = (char *) malloc(strlen((char *) bible) + 1 * sizeof(char));
strcpy(config->bible, (char *) bible);
xmlFree(bible);


+ 110
- 0
readfile.c View File

@@ -0,0 +1,110 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include "main.h"
#include "readfile.h"
#include "encoding.h"
#include "string.h"

int readfile(CONFIG *config) {
FILE *file = NULL;
int start;
int end;
int i = 0;
char *line = NULL;
char **array = NULL;
size_t n = 0;
ssize_t nchr = 0;
size_t idx = 0;
size_t it = 0;
size_t new_max = MAX_LINES;

char *ch = strtok(config->chapter_numbers, "-");
while (ch != NULL) {
if (i == 0) {
start = atoi(ch);
i++;
}
else {
end = atoi(ch);
}
ch = strtok(NULL, "-");
}

#ifdef DEBUG
printf("Start chapter: %d\nEnd chapter: %d\n", start, end);
#endif

i = 0;

file = fopen("Biblia.txt", "r");
if (file == NULL) {
perror("fopen");
return 1;
}

if (!(array = calloc(new_max, sizeof(*array)))) {
fprintf(stderr, "allocación de memoria falló.");
return 1;
}

while ((nchr = getline(&line, &n, file)) != -1) {
while (nchr > 0 && (line[nchr - 1] == '\n' || line[nchr - 1] == '\r' || line[nchr - 1] == ' ' || line[nchr - 1] == '\t')) {
/* remove whitespace from end of the line */
line[nchr--] = 0;
}
line[nchr] = '\0';
array[idx++] = strdup(line);

/* not enough memory for more lines, time to allocate more memory */
if (idx == new_max) {
new_max = new_max * 2;
char **tmp = realloc(array, new_max * sizeof(*array));
if (!tmp) {
fprintf(stderr, "reallocación de memoria falló.");
return 1;
}
array = tmp;
}
}
/* free the extra unused memory */
if (new_max > idx) {
char **tmp = realloc(array, idx * sizeof(*array));
if (!tmp) {
fprintf(stderr, "reallocación de memoria falló.");
return 1;
}
array = tmp;
}

if (file) {
fclose(file);
}
if (line) {
free(line);
}

printf ("\nLines in file:\n\n");
for (it = 0; it < 20; it++) {
line = latin2utf8(array[it]);
/* printf(" array [%lu] %s\n", (long) it, line); */
if (line != NULL) {
if (strcmp(line, config->bible) == 0) {
printf("Bible match: %d -> %s\n", (int) it + 1, line);
}
if (strcmp(line, config->book) == 0) {
printf("Book match: %d -> %s\n", (int) it + 1, line);
}
}
free(line);
}

for (it = 0; it < idx; it++) {
free(array[it]);
}
free(array);

return 0;
}

+ 2
- 0
readfile.h View File

@@ -0,0 +1,2 @@
#define MAX_LINES 100
int readfile(CONFIG *config);

+ 27
- 0
string.c View File

@@ -0,0 +1,27 @@
#include <string.h>
#include <ctype.h>

char *trim(char *str) {
char *end;

/* Trim leading space */
while (isspace((unsigned char) *str)) {
str++;
}

if(*str == 0) {
/* All spaces? */
return str;
}

/* Trim trailing space */
end = str + strlen(str) - 1;
while (end > str && (isspace((unsigned char) *end) || *end == '\r' || *end == '\n')) {
end--;
}

/* Write new null terminator */
*(end + 1) = 0;

return str;
}

+ 1
- 0
string.h View File

@@ -0,0 +1 @@
char *trim(char *str);

Loading…
Cancel
Save