Program to generate an xml file based on a provided text file
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

103 lines
3.0KB

  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <string.h>
  4. #include <iconv.h>
  5. /*
  6. * This function converts the latin1 ISO_8859-1 encoding into utf-8.
  7. */
  8. char *latin_to_utf8(char *input) {
  9. if (input && strlen(input) > 0) {
  10. iconv_t cd = iconv_open("UTF-8", "ISO_8859-1");
  11. char temp[strlen(input)];
  12. memcpy(temp, input, strlen(input) + 1);
  13. char *in_buffer = &temp[0];
  14. size_t in_left = sizeof(temp);
  15. /* The more tildes and ñs that the string has, the longer the size needs to be.
  16. * if the string is 100% of the aforementioned symbols, we will need a string of
  17. * double the size.
  18. */
  19. char output[strlen(temp) * 2];
  20. char *out_buffer = &output[0];
  21. size_t out_left = sizeof(output);
  22. do {
  23. if (iconv(cd, &in_buffer, &in_left, &out_buffer, &out_left) == (size_t) -1) {
  24. #ifdef DEBUG
  25. printf("%s\n", input);
  26. perror("iconv");
  27. #endif
  28. iconv_close(cd);
  29. return NULL;
  30. }
  31. }
  32. while (in_left > 0 && out_left > 0);
  33. *out_buffer = 0;
  34. #if defined(DEBUG) && defined(DEBUG_ENCODING)
  35. printf("latin to UTF-8: input: %s\n", input);
  36. printf("latin to UTF-8: output: %s\n", output);
  37. #endif
  38. char *temp2 = (char *) malloc((strlen(output) + 1) * sizeof(char));
  39. memcpy(temp2, output, strlen(output) + 1);
  40. iconv_close(cd);
  41. return temp2;
  42. }
  43. return NULL;
  44. }
  45. /*
  46. * This function converts the utf-8 encoding into latin1 ISO_8859-1.
  47. */
  48. char *utf8_to_latin(char *input) {
  49. if (input && strlen(input) > 0) {
  50. iconv_t cd = iconv_open("ISO_8859-1", "UTF-8");
  51. char temp[strlen(input)];
  52. memcpy(temp, input, strlen(input) + 1);
  53. char *in_buffer = &temp[0];
  54. size_t in_left = sizeof(temp);
  55. /* The more tildes and ñs that the string has, the longer the size needs to be.
  56. * if the string is 100% of the aforementioned symbols, we will need a string of
  57. * double the size.
  58. */
  59. char output[strlen(temp) * 2];
  60. char *out_buffer = &output[0];
  61. size_t out_left = sizeof(output);
  62. do {
  63. if (iconv(cd, &in_buffer, &in_left, &out_buffer, &out_left) == (size_t) -1) {
  64. #ifdef DEBUG
  65. printf("%s\n", input);
  66. perror("iconv");
  67. #endif
  68. iconv_close(cd);
  69. return NULL;
  70. }
  71. }
  72. while (in_left > 0 && out_left > 0);
  73. *out_buffer = 0;
  74. #if defined(DEBUG) && defined(DEBUG_ENCODING)
  75. printf("UTF-8 to latin: input: %s\n", input);
  76. printf("UTF-8 to latin: output: %s\n", output);
  77. #endif
  78. char *temp2 = (char *) malloc((strlen(output) + 1) * sizeof(char));
  79. memcpy(temp2, output, strlen(output) + 1);
  80. iconv_close(cd);
  81. return temp2;
  82. }
  83. return NULL;
  84. }