Handle invalid result from PDF parser.
Signed-off-by: yzrh <yzrh@noema.org>
This commit is contained in:
parent
e0fe937e1a
commit
7ac0971a17
2 changed files with 35 additions and 23 deletions
|
@ -160,10 +160,10 @@ _pdf_obj_dedup(cnki_t **param, pdf_object_t **pdf)
|
|||
pdf_object_t *ptr;
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Deleting duplicated object\n\t%8s\n", "id");
|
||||
printf("Deleting duplicated object\n");
|
||||
|
||||
ptr = *pdf;
|
||||
while (ptr->next != NULL) {
|
||||
while (ptr != NULL && ptr->next != NULL) {
|
||||
if (ptr->id == ptr->next->id) {
|
||||
pdf_get_obj(&ptr, ptr->id, &tmp);
|
||||
pdf_obj_del(&ptr, ptr->id);
|
||||
|
@ -174,7 +174,7 @@ _pdf_obj_dedup(cnki_t **param, pdf_object_t **pdf)
|
|||
ret++;
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("\t%8d\n", ptr->id);
|
||||
printf("Deleted duplicated object %d.\n", ptr->id);
|
||||
}
|
||||
|
||||
ptr = ptr->next;
|
||||
|
@ -247,18 +247,18 @@ cnki_pdf(cnki_t **param)
|
|||
int *parent = NULL;
|
||||
pdf_get_parent_id(&pdf, &parent);
|
||||
|
||||
if (parent[0] == 0)
|
||||
return 1;
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Discovered %d parent object(s)\n", parent[0]);
|
||||
|
||||
int8_t *parent_missing = malloc(parent[0] * sizeof(int8_t));
|
||||
int8_t *parent_missing;
|
||||
int *kid;
|
||||
|
||||
if (parent[0] > 0) {
|
||||
parent_missing = malloc(parent[0] * sizeof(int8_t));
|
||||
|
||||
if (parent_missing == NULL)
|
||||
return 1;
|
||||
|
||||
int *kid;
|
||||
}
|
||||
|
||||
for (int i = 1; i <= parent[0]; i++) {
|
||||
if ((*param)->stat > 1)
|
||||
|
@ -326,7 +326,7 @@ cnki_pdf(cnki_t **param)
|
|||
if ((*param)->stat > 1)
|
||||
printf("Searching for root object\n");
|
||||
|
||||
dictionary_size = 128;
|
||||
dictionary_size = 128 + 12 * parent[0];
|
||||
dictionary = malloc(dictionary_size);
|
||||
|
||||
if (dictionary == NULL) {
|
||||
|
@ -400,9 +400,11 @@ cnki_pdf(cnki_t **param)
|
|||
root);
|
||||
}
|
||||
|
||||
free(parent);
|
||||
if (parent[0] > 0)
|
||||
free(parent_missing);
|
||||
|
||||
free(parent);
|
||||
|
||||
int outline = _pdf_cnki_outline(param, &pdf);
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
|
@ -1166,14 +1168,6 @@ cnki_pdf_hn(cnki_t **param)
|
|||
|
||||
free(dictionary);
|
||||
|
||||
dictionary_size = 256;
|
||||
dictionary = malloc(dictionary_size);
|
||||
|
||||
if (dictionary == NULL) {
|
||||
free(root_kid);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Add /Parent to page object */
|
||||
for (int i = 0; i < (*param)->file_stat->page; i++) {
|
||||
if (pdf_get_obj(&pdf, root_kid[i], &tmp) != 0) {
|
||||
|
@ -1182,9 +1176,16 @@ cnki_pdf_hn(cnki_t **param)
|
|||
return 1;
|
||||
}
|
||||
|
||||
memset(dictionary, 0, dictionary_size);
|
||||
dictionary_size = tmp->dictionary_size + 24;
|
||||
dictionary = malloc(dictionary_size);
|
||||
|
||||
if (dictionary == NULL) {
|
||||
free(root_kid);
|
||||
return 1;
|
||||
}
|
||||
|
||||
memcpy(dictionary, tmp->dictionary, tmp->dictionary_size);
|
||||
memset(dictionary + tmp->dictionary_size, 0, 24);
|
||||
|
||||
snprintf(buf, 64, "/Parent %d 0 R\n>>", root);
|
||||
strcat(dictionary, buf);
|
||||
|
@ -1194,10 +1195,20 @@ cnki_pdf_hn(cnki_t **param)
|
|||
free(root_kid);
|
||||
return 1;
|
||||
}
|
||||
|
||||
free(dictionary);
|
||||
}
|
||||
|
||||
free(root_kid);
|
||||
|
||||
dictionary_size = 128;
|
||||
dictionary = malloc(dictionary_size);
|
||||
|
||||
if (dictionary == NULL) {
|
||||
free(root_kid);
|
||||
return 1;
|
||||
}
|
||||
|
||||
memset(dictionary, 0, dictionary_size);
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
|
|
|
@ -119,6 +119,7 @@ _locate(pdf_object_t **pdf, FILE **fp, int size_buf)
|
|||
fseek(*fp, tail + 7, SEEK_SET);
|
||||
head = tail = 0;
|
||||
} else if (head > 0 && tail > 0) {
|
||||
if (cur + size_buf < end)
|
||||
fseek(*fp, head, SEEK_SET);
|
||||
tail = 0;
|
||||
} else {
|
||||
|
|
Loading…
Reference in a new issue