Handle invalid result from PDF parser.

Signed-off-by: yzrh <yzrh@noema.org>
This commit is contained in:
yzrh 2023-01-03 15:39:53 +00:00
parent e0fe937e1a
commit 7ac0971a17
2 changed files with 35 additions and 23 deletions

View file

@ -160,10 +160,10 @@ _pdf_obj_dedup(cnki_t **param, pdf_object_t **pdf)
pdf_object_t *ptr; pdf_object_t *ptr;
if ((*param)->stat > 1) if ((*param)->stat > 1)
printf("Deleting duplicated object\n\t%8s\n", "id"); printf("Deleting duplicated object\n");
ptr = *pdf; ptr = *pdf;
while (ptr->next != NULL) { while (ptr != NULL && ptr->next != NULL) {
if (ptr->id == ptr->next->id) { if (ptr->id == ptr->next->id) {
pdf_get_obj(&ptr, ptr->id, &tmp); pdf_get_obj(&ptr, ptr->id, &tmp);
pdf_obj_del(&ptr, ptr->id); pdf_obj_del(&ptr, ptr->id);
@ -174,7 +174,7 @@ _pdf_obj_dedup(cnki_t **param, pdf_object_t **pdf)
ret++; ret++;
if ((*param)->stat > 1) if ((*param)->stat > 1)
printf("\t%8d\n", ptr->id); printf("Deleted duplicated object %d.\n", ptr->id);
} }
ptr = ptr->next; ptr = ptr->next;
@ -247,18 +247,18 @@ cnki_pdf(cnki_t **param)
int *parent = NULL; int *parent = NULL;
pdf_get_parent_id(&pdf, &parent); pdf_get_parent_id(&pdf, &parent);
if (parent[0] == 0)
return 1;
if ((*param)->stat > 0) if ((*param)->stat > 0)
printf("Discovered %d parent object(s)\n", parent[0]); printf("Discovered %d parent object(s)\n", parent[0]);
int8_t *parent_missing = malloc(parent[0] * sizeof(int8_t)); int8_t *parent_missing;
int *kid;
if (parent[0] > 0) {
parent_missing = malloc(parent[0] * sizeof(int8_t));
if (parent_missing == NULL) if (parent_missing == NULL)
return 1; return 1;
}
int *kid;
for (int i = 1; i <= parent[0]; i++) { for (int i = 1; i <= parent[0]; i++) {
if ((*param)->stat > 1) if ((*param)->stat > 1)
@ -326,7 +326,7 @@ cnki_pdf(cnki_t **param)
if ((*param)->stat > 1) if ((*param)->stat > 1)
printf("Searching for root object\n"); printf("Searching for root object\n");
dictionary_size = 128; dictionary_size = 128 + 12 * parent[0];
dictionary = malloc(dictionary_size); dictionary = malloc(dictionary_size);
if (dictionary == NULL) { if (dictionary == NULL) {
@ -400,9 +400,11 @@ cnki_pdf(cnki_t **param)
root); root);
} }
free(parent); if (parent[0] > 0)
free(parent_missing); free(parent_missing);
free(parent);
int outline = _pdf_cnki_outline(param, &pdf); int outline = _pdf_cnki_outline(param, &pdf);
if ((*param)->stat > 1) if ((*param)->stat > 1)
@ -1166,14 +1168,6 @@ cnki_pdf_hn(cnki_t **param)
free(dictionary); free(dictionary);
dictionary_size = 256;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(root_kid);
return 1;
}
/* Add /Parent to page object */ /* Add /Parent to page object */
for (int i = 0; i < (*param)->file_stat->page; i++) { for (int i = 0; i < (*param)->file_stat->page; i++) {
if (pdf_get_obj(&pdf, root_kid[i], &tmp) != 0) { if (pdf_get_obj(&pdf, root_kid[i], &tmp) != 0) {
@ -1182,9 +1176,16 @@ cnki_pdf_hn(cnki_t **param)
return 1; return 1;
} }
memset(dictionary, 0, dictionary_size); dictionary_size = tmp->dictionary_size + 24;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(root_kid);
return 1;
}
memcpy(dictionary, tmp->dictionary, tmp->dictionary_size); memcpy(dictionary, tmp->dictionary, tmp->dictionary_size);
memset(dictionary + tmp->dictionary_size, 0, 24);
snprintf(buf, 64, "/Parent %d 0 R\n>>", root); snprintf(buf, 64, "/Parent %d 0 R\n>>", root);
strcat(dictionary, buf); strcat(dictionary, buf);
@ -1194,10 +1195,20 @@ cnki_pdf_hn(cnki_t **param)
free(root_kid); free(root_kid);
return 1; return 1;
} }
free(dictionary);
} }
free(root_kid); free(root_kid);
dictionary_size = 128;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(root_kid);
return 1;
}
memset(dictionary, 0, dictionary_size); memset(dictionary, 0, dictionary_size);
if ((*param)->stat > 0) if ((*param)->stat > 0)

View file

@ -119,6 +119,7 @@ _locate(pdf_object_t **pdf, FILE **fp, int size_buf)
fseek(*fp, tail + 7, SEEK_SET); fseek(*fp, tail + 7, SEEK_SET);
head = tail = 0; head = tail = 0;
} else if (head > 0 && tail > 0) { } else if (head > 0 && tail > 0) {
if (cur + size_buf < end)
fseek(*fp, head, SEEK_SET); fseek(*fp, head, SEEK_SET);
tail = 0; tail = 0;
} else { } else {