Handle invalid result from PDF parser.

Signed-off-by: yzrh <yzrh@noema.org>
This commit is contained in:
yzrh 2023-01-03 15:39:53 +00:00
parent e0fe937e1a
commit 7ac0971a17
2 changed files with 35 additions and 23 deletions

View file

@ -160,10 +160,10 @@ _pdf_obj_dedup(cnki_t **param, pdf_object_t **pdf)
pdf_object_t *ptr;
if ((*param)->stat > 1)
printf("Deleting duplicated object\n\t%8s\n", "id");
printf("Deleting duplicated object\n");
ptr = *pdf;
while (ptr->next != NULL) {
while (ptr != NULL && ptr->next != NULL) {
if (ptr->id == ptr->next->id) {
pdf_get_obj(&ptr, ptr->id, &tmp);
pdf_obj_del(&ptr, ptr->id);
@ -174,7 +174,7 @@ _pdf_obj_dedup(cnki_t **param, pdf_object_t **pdf)
ret++;
if ((*param)->stat > 1)
printf("\t%8d\n", ptr->id);
printf("Deleted duplicated object %d.\n", ptr->id);
}
ptr = ptr->next;
@ -247,19 +247,19 @@ cnki_pdf(cnki_t **param)
int *parent = NULL;
pdf_get_parent_id(&pdf, &parent);
if (parent[0] == 0)
return 1;
if ((*param)->stat > 0)
printf("Discovered %d parent object(s)\n", parent[0]);
int8_t *parent_missing = malloc(parent[0] * sizeof(int8_t));
if (parent_missing == NULL)
return 1;
int8_t *parent_missing;
int *kid;
if (parent[0] > 0) {
parent_missing = malloc(parent[0] * sizeof(int8_t));
if (parent_missing == NULL)
return 1;
}
for (int i = 1; i <= parent[0]; i++) {
if ((*param)->stat > 1)
printf("Searching for object %d\n", parent[i]);
@ -326,7 +326,7 @@ cnki_pdf(cnki_t **param)
if ((*param)->stat > 1)
printf("Searching for root object\n");
dictionary_size = 128;
dictionary_size = 128 + 12 * parent[0];
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
@ -400,8 +400,10 @@ cnki_pdf(cnki_t **param)
root);
}
if (parent[0] > 0)
free(parent_missing);
free(parent);
free(parent_missing);
int outline = _pdf_cnki_outline(param, &pdf);
@ -1166,14 +1168,6 @@ cnki_pdf_hn(cnki_t **param)
free(dictionary);
dictionary_size = 256;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(root_kid);
return 1;
}
/* Add /Parent to page object */
for (int i = 0; i < (*param)->file_stat->page; i++) {
if (pdf_get_obj(&pdf, root_kid[i], &tmp) != 0) {
@ -1182,9 +1176,16 @@ cnki_pdf_hn(cnki_t **param)
return 1;
}
memset(dictionary, 0, dictionary_size);
dictionary_size = tmp->dictionary_size + 24;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(root_kid);
return 1;
}
memcpy(dictionary, tmp->dictionary, tmp->dictionary_size);
memset(dictionary + tmp->dictionary_size, 0, 24);
snprintf(buf, 64, "/Parent %d 0 R\n>>", root);
strcat(dictionary, buf);
@ -1194,10 +1195,20 @@ cnki_pdf_hn(cnki_t **param)
free(root_kid);
return 1;
}
free(dictionary);
}
free(root_kid);
dictionary_size = 128;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(root_kid);
return 1;
}
memset(dictionary, 0, dictionary_size);
if ((*param)->stat > 0)

View file

@ -119,7 +119,8 @@ _locate(pdf_object_t **pdf, FILE **fp, int size_buf)
fseek(*fp, tail + 7, SEEK_SET);
head = tail = 0;
} else if (head > 0 && tail > 0) {
fseek(*fp, head, SEEK_SET);
if (cur + size_buf < end)
fseek(*fp, head, SEEK_SET);
tail = 0;
} else {
fseek(*fp, -7, SEEK_CUR);