Handle invalid result from PDF parser.
Signed-off-by: yzrh <yzrh@noema.org>
This commit is contained in:
parent
e0fe937e1a
commit
7ac0971a17
2 changed files with 35 additions and 23 deletions
|
@ -160,10 +160,10 @@ _pdf_obj_dedup(cnki_t **param, pdf_object_t **pdf)
|
||||||
pdf_object_t *ptr;
|
pdf_object_t *ptr;
|
||||||
|
|
||||||
if ((*param)->stat > 1)
|
if ((*param)->stat > 1)
|
||||||
printf("Deleting duplicated object\n\t%8s\n", "id");
|
printf("Deleting duplicated object\n");
|
||||||
|
|
||||||
ptr = *pdf;
|
ptr = *pdf;
|
||||||
while (ptr->next != NULL) {
|
while (ptr != NULL && ptr->next != NULL) {
|
||||||
if (ptr->id == ptr->next->id) {
|
if (ptr->id == ptr->next->id) {
|
||||||
pdf_get_obj(&ptr, ptr->id, &tmp);
|
pdf_get_obj(&ptr, ptr->id, &tmp);
|
||||||
pdf_obj_del(&ptr, ptr->id);
|
pdf_obj_del(&ptr, ptr->id);
|
||||||
|
@ -174,7 +174,7 @@ _pdf_obj_dedup(cnki_t **param, pdf_object_t **pdf)
|
||||||
ret++;
|
ret++;
|
||||||
|
|
||||||
if ((*param)->stat > 1)
|
if ((*param)->stat > 1)
|
||||||
printf("\t%8d\n", ptr->id);
|
printf("Deleted duplicated object %d.\n", ptr->id);
|
||||||
}
|
}
|
||||||
|
|
||||||
ptr = ptr->next;
|
ptr = ptr->next;
|
||||||
|
@ -247,19 +247,19 @@ cnki_pdf(cnki_t **param)
|
||||||
int *parent = NULL;
|
int *parent = NULL;
|
||||||
pdf_get_parent_id(&pdf, &parent);
|
pdf_get_parent_id(&pdf, &parent);
|
||||||
|
|
||||||
if (parent[0] == 0)
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
if ((*param)->stat > 0)
|
if ((*param)->stat > 0)
|
||||||
printf("Discovered %d parent object(s)\n", parent[0]);
|
printf("Discovered %d parent object(s)\n", parent[0]);
|
||||||
|
|
||||||
int8_t *parent_missing = malloc(parent[0] * sizeof(int8_t));
|
int8_t *parent_missing;
|
||||||
|
|
||||||
if (parent_missing == NULL)
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
int *kid;
|
int *kid;
|
||||||
|
|
||||||
|
if (parent[0] > 0) {
|
||||||
|
parent_missing = malloc(parent[0] * sizeof(int8_t));
|
||||||
|
|
||||||
|
if (parent_missing == NULL)
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
for (int i = 1; i <= parent[0]; i++) {
|
for (int i = 1; i <= parent[0]; i++) {
|
||||||
if ((*param)->stat > 1)
|
if ((*param)->stat > 1)
|
||||||
printf("Searching for object %d\n", parent[i]);
|
printf("Searching for object %d\n", parent[i]);
|
||||||
|
@ -326,7 +326,7 @@ cnki_pdf(cnki_t **param)
|
||||||
if ((*param)->stat > 1)
|
if ((*param)->stat > 1)
|
||||||
printf("Searching for root object\n");
|
printf("Searching for root object\n");
|
||||||
|
|
||||||
dictionary_size = 128;
|
dictionary_size = 128 + 12 * parent[0];
|
||||||
dictionary = malloc(dictionary_size);
|
dictionary = malloc(dictionary_size);
|
||||||
|
|
||||||
if (dictionary == NULL) {
|
if (dictionary == NULL) {
|
||||||
|
@ -400,8 +400,10 @@ cnki_pdf(cnki_t **param)
|
||||||
root);
|
root);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (parent[0] > 0)
|
||||||
|
free(parent_missing);
|
||||||
|
|
||||||
free(parent);
|
free(parent);
|
||||||
free(parent_missing);
|
|
||||||
|
|
||||||
int outline = _pdf_cnki_outline(param, &pdf);
|
int outline = _pdf_cnki_outline(param, &pdf);
|
||||||
|
|
||||||
|
@ -1166,14 +1168,6 @@ cnki_pdf_hn(cnki_t **param)
|
||||||
|
|
||||||
free(dictionary);
|
free(dictionary);
|
||||||
|
|
||||||
dictionary_size = 256;
|
|
||||||
dictionary = malloc(dictionary_size);
|
|
||||||
|
|
||||||
if (dictionary == NULL) {
|
|
||||||
free(root_kid);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Add /Parent to page object */
|
/* Add /Parent to page object */
|
||||||
for (int i = 0; i < (*param)->file_stat->page; i++) {
|
for (int i = 0; i < (*param)->file_stat->page; i++) {
|
||||||
if (pdf_get_obj(&pdf, root_kid[i], &tmp) != 0) {
|
if (pdf_get_obj(&pdf, root_kid[i], &tmp) != 0) {
|
||||||
|
@ -1182,9 +1176,16 @@ cnki_pdf_hn(cnki_t **param)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
memset(dictionary, 0, dictionary_size);
|
dictionary_size = tmp->dictionary_size + 24;
|
||||||
|
dictionary = malloc(dictionary_size);
|
||||||
|
|
||||||
|
if (dictionary == NULL) {
|
||||||
|
free(root_kid);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
memcpy(dictionary, tmp->dictionary, tmp->dictionary_size);
|
memcpy(dictionary, tmp->dictionary, tmp->dictionary_size);
|
||||||
|
memset(dictionary + tmp->dictionary_size, 0, 24);
|
||||||
|
|
||||||
snprintf(buf, 64, "/Parent %d 0 R\n>>", root);
|
snprintf(buf, 64, "/Parent %d 0 R\n>>", root);
|
||||||
strcat(dictionary, buf);
|
strcat(dictionary, buf);
|
||||||
|
@ -1194,10 +1195,20 @@ cnki_pdf_hn(cnki_t **param)
|
||||||
free(root_kid);
|
free(root_kid);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
free(dictionary);
|
||||||
}
|
}
|
||||||
|
|
||||||
free(root_kid);
|
free(root_kid);
|
||||||
|
|
||||||
|
dictionary_size = 128;
|
||||||
|
dictionary = malloc(dictionary_size);
|
||||||
|
|
||||||
|
if (dictionary == NULL) {
|
||||||
|
free(root_kid);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
memset(dictionary, 0, dictionary_size);
|
memset(dictionary, 0, dictionary_size);
|
||||||
|
|
||||||
if ((*param)->stat > 0)
|
if ((*param)->stat > 0)
|
||||||
|
|
|
@ -119,7 +119,8 @@ _locate(pdf_object_t **pdf, FILE **fp, int size_buf)
|
||||||
fseek(*fp, tail + 7, SEEK_SET);
|
fseek(*fp, tail + 7, SEEK_SET);
|
||||||
head = tail = 0;
|
head = tail = 0;
|
||||||
} else if (head > 0 && tail > 0) {
|
} else if (head > 0 && tail > 0) {
|
||||||
fseek(*fp, head, SEEK_SET);
|
if (cur + size_buf < end)
|
||||||
|
fseek(*fp, head, SEEK_SET);
|
||||||
tail = 0;
|
tail = 0;
|
||||||
} else {
|
} else {
|
||||||
fseek(*fp, -7, SEEK_CUR);
|
fseek(*fp, -7, SEEK_CUR);
|
||||||
|
|
Loading…
Reference in a new issue