From b20c6ad3ed930977990f3812b25b80d2ce282d79 Mon Sep 17 00:00:00 2001 From: yzrh Date: Thu, 31 Dec 2020 18:58:52 +0000 Subject: [PATCH] Handle binary data in dictionary. --- src/cnki_pdf.c | 14 +++++++------- src/pdf.c | 8 ++++---- src/pdf_get.c | 28 ++++++++++++++++++++-------- 3 files changed, 31 insertions(+), 19 deletions(-) diff --git a/src/cnki_pdf.c b/src/cnki_pdf.c index d8eabff..8c9fbb8 100644 --- a/src/cnki_pdf.c +++ b/src/cnki_pdf.c @@ -110,7 +110,7 @@ cnki_pdf(cnki_t **param) strcat(dictionary, " "); } snprintf(buf, 64, - "]\n/Count %d\n>>\n", + "]\n/Count %d\n>>", pdf_get_kid_count(&pdf, parent[i])); strcat(dictionary, buf); @@ -198,7 +198,7 @@ cnki_pdf(cnki_t **param) snprintf(buf, 64, "/Count %d\n", (*param)->file_stat->page); strcat(dictionary, buf); - strcat(dictionary, ">>\n"); + strcat(dictionary, ">>"); pdf_obj_prepend(&pdf, root, NULL, dictionary, NULL); @@ -238,10 +238,10 @@ cnki_pdf(cnki_t **param) if (catalog != 0) { if ((*param)->stat > 0) - printf("catalog object is %d.\n", catalog); + printf("Catalog object is %d.\n", catalog); } else { if ((*param)->stat > 0) - printf("catalog object is missing\n"); + printf("Catalog object is missing\n"); if ((*param)->stat > 1) printf("Generating catalog object\n"); @@ -258,7 +258,7 @@ cnki_pdf(cnki_t **param) strcat(dictionary, buf); } - strcat(dictionary, ">>\n"); + strcat(dictionary, ">>"); pdf_obj_append(&pdf, 0, NULL, dictionary, NULL); @@ -273,7 +273,7 @@ cnki_pdf(cnki_t **param) if (xref != 0) { if ((*param)->stat > 0) - printf("xref object is %d.\n", xref); + printf("Xref object is %d.\n", xref); if ((*param)->stat > 1) printf("Deleting xref object\n"); @@ -284,7 +284,7 @@ cnki_pdf(cnki_t **param) printf("Deleted xref object\n"); } else { if ((*param)->stat > 0) - printf("xref object is missing\n"); + printf("Xref object is missing\n"); } free(dictionary); diff --git a/src/pdf.c b/src/pdf.c index efcf730..202b6d2 100644 --- a/src/pdf.c +++ b/src/pdf.c @@ -84,24 +84,24 @@ pdf_obj_add(pdf_object_t **pdf, int id, (*pdf)->id = id; if (dictionary != NULL) { - (*pdf)->dictionary_size = strlen(dictionary) + 1; + (*pdf)->dictionary_size = strlen(dictionary); (*pdf)->dictionary = malloc((*pdf)->dictionary_size); if ((*pdf)->dictionary == NULL) return 1; - strncpy((*pdf)->dictionary, dictionary, (*pdf)->dictionary_size); + memcpy((*pdf)->dictionary, dictionary, (*pdf)->dictionary_size); (*pdf)->object_size = 0; (*pdf)->object = NULL; } else if (object != NULL) { - (*pdf)->object_size = strlen(object) + 1; + (*pdf)->object_size = strlen(object); (*pdf)->object = malloc((*pdf)->object_size); if ((*pdf)->object == NULL) return 1; - strncpy((*pdf)->object, object, (*pdf)->object_size); + memcpy((*pdf)->object, object, (*pdf)->object_size); (*pdf)->dictionary_size = 0; (*pdf)->dictionary = NULL; diff --git a/src/pdf_get.c b/src/pdf_get.c index 95d5b66..c5ab788 100644 --- a/src/pdf_get.c +++ b/src/pdf_get.c @@ -4,6 +4,12 @@ * SPDX-License-Identifier: Apache-2.0 */ +#ifdef __linux__ + +#define _GNU_SOURCE + +#endif /* __linux__ */ + #include #include @@ -152,7 +158,8 @@ pdf_get_catalog_id(pdf_object_t **pdf) while (ptr != NULL) { if (ptr->dictionary != NULL && - strstr(ptr->dictionary, "/Catalog") != NULL) + memmem(ptr->dictionary, ptr->dictionary_size, + "/Catalog", 8) != NULL) catalog_id = ptr->id; ptr = ptr->next; @@ -173,7 +180,8 @@ pdf_get_xref_id(pdf_object_t **pdf) while (ptr != NULL) { if (ptr->dictionary != NULL && - strstr(ptr->dictionary, "/XRef") != NULL) + memmem(ptr->dictionary, ptr->dictionary_size, + "/XRef", 5) != NULL) xref_id = ptr->id; ptr = ptr->next; @@ -208,10 +216,11 @@ pdf_get_parent_id(pdf_object_t **pdf, int **id) while (ptr != NULL) { if (ptr->dictionary != NULL && - (head = strstr(ptr->dictionary, "/Parent ")) != NULL && + (head = memmem(ptr->dictionary, ptr->dictionary_size, + "/Parent ", 8)) != NULL && (tail = strchr(head + 8, ' ')) != NULL) { memset(str, 0, 8); - strncpy(str, head + 8, (tail - head) - 8); + memcpy(str, head + 8, (tail - head) - 8); str_val = atoi(str); if (!_id_in(str_val, *id)) { @@ -258,7 +267,8 @@ pdf_get_kid_id(pdf_object_t **pdf, int id, int **kid) } if (ptr->dictionary != NULL && - strstr(ptr->dictionary, str) != NULL) { + memmem(ptr->dictionary, ptr->dictionary_size, + str, strlen(str)) != NULL) { ret = realloc(*kid, ++kid_size * sizeof(int)); if (ret == NULL) @@ -297,13 +307,15 @@ pdf_get_kid_count(pdf_object_t **pdf, int id) while (ptr != NULL) { if (ptr->dictionary != NULL && - strstr(ptr->dictionary, id_str) != NULL && - (pos = strstr(ptr->dictionary, "/Count ")) != NULL) { + memmem(ptr->dictionary, ptr->dictionary_size, + id_str, strlen(id_str)) != NULL && + (pos = memmem(ptr->dictionary, ptr->dictionary_size, + "/Count ", 7)) != NULL) { for (int i = 8; i >= 0; i--) { if (i + 7 <= ptr->dictionary_size - (pos - ptr->dictionary) && pos[i + 7] >= '0' && pos[i + 7] <= '9') { memset(str, 0, 8); - strncpy(str, pos + 7, i + 1); + memcpy(str, pos + 7, i + 1); str_val = atoi(str); count += str_val; break;