Handle duplicated object in CAJ.
Signed-off-by: yzrh <yzrh@noema.org>
This commit is contained in:
parent
cde014cffb
commit
1a1fee1034
1 changed files with 47 additions and 26 deletions
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2023, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
@ -145,11 +145,46 @@ _pdf_obj_sort(cnki_t **param, pdf_object_t **pdf)
|
|||
|
||||
ret = pdf_obj_sort(pdf);
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Sorted object(s)\n");
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
_pdf_obj_dedup(cnki_t **param, pdf_object_t **pdf)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
pdf_object_t *tmp;
|
||||
pdf_object_t *ptr;
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Deleting duplicated object\n\t%8s\n", "id");
|
||||
|
||||
ptr = *pdf;
|
||||
while (ptr->next != NULL) {
|
||||
if (ptr->id == ptr->next->id) {
|
||||
pdf_get_obj(&ptr, ptr->id, &tmp);
|
||||
pdf_obj_del(&ptr, ptr->id);
|
||||
|
||||
tmp->next = NULL;
|
||||
pdf_obj_destroy(&tmp);
|
||||
|
||||
ret++;
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("\t%8d\n", ptr->id);
|
||||
}
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
if ((*param)->stat > 0) {
|
||||
if (ret == 0)
|
||||
printf("Sorted object(s)\n");
|
||||
printf("No duplicated object\n");
|
||||
else
|
||||
printf("Object(s) not sorted\n");
|
||||
printf("Deleted %d duplicated object(s)\n", ret);
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
@ -338,12 +373,9 @@ cnki_pdf(cnki_t **param)
|
|||
printf("Generating root object\n");
|
||||
|
||||
snprintf(buf, 64,
|
||||
"<<\n/Type /Pages\n/Kids ");
|
||||
"<<\n/Type /Pages\n/Kids [");
|
||||
strcat(dictionary, buf);
|
||||
|
||||
if (parent[0] > 1)
|
||||
strcat(dictionary, "[");
|
||||
|
||||
for (int i = 0, j = 0; i < parent[0]; i++) {
|
||||
if (parent_missing[i] == 1) {
|
||||
snprintf(buf, 64, "%d 0 R", parent[i + 1]);
|
||||
|
@ -354,12 +386,7 @@ cnki_pdf(cnki_t **param)
|
|||
}
|
||||
}
|
||||
|
||||
if (parent[0] > 1)
|
||||
strcat(dictionary, "]");
|
||||
|
||||
strcat(dictionary, "\n");
|
||||
|
||||
snprintf(buf, 64, "/Count %d\n", (*param)->file_stat->page);
|
||||
snprintf(buf, 64, "]\n/Count %d\n", (*param)->file_stat->page);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
strcat(dictionary, ">>");
|
||||
|
@ -442,6 +469,8 @@ cnki_pdf(cnki_t **param)
|
|||
|
||||
_pdf_obj_sort(param, &pdf);
|
||||
|
||||
_pdf_obj_dedup(param, &pdf);
|
||||
|
||||
_pdf_dump(param, &pdf);
|
||||
|
||||
pdf_obj_destroy(&pdf);
|
||||
|
@ -721,12 +750,12 @@ cnki_pdf_hn(cnki_t **param)
|
|||
if ((*param)->stat > 2)
|
||||
printf("Not extracted.\n");
|
||||
|
||||
pdf_obj_append(&pdf, ids[i], "null", NULL, NULL, 0);
|
||||
pdf_obj_append(&pdf, ids[i], NULL, NULL, NULL, 0);
|
||||
} else {
|
||||
if ((*param)->stat > 2)
|
||||
printf("Unsupported format.\n");
|
||||
|
||||
pdf_obj_append(&pdf, ids[i], "null", NULL, NULL, 0);
|
||||
pdf_obj_append(&pdf, ids[i], NULL, NULL, NULL, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1028,7 +1057,7 @@ cnki_pdf_hn(cnki_t **param)
|
|||
free(stream);
|
||||
} else {
|
||||
pdf_obj_append(&pdf, ids[ptr->image_length + 1],
|
||||
"null", NULL, NULL, 0);
|
||||
NULL, NULL, NULL, 0);
|
||||
}
|
||||
|
||||
memset(dictionary, 0, dictionary_size);
|
||||
|
@ -1094,12 +1123,9 @@ cnki_pdf_hn(cnki_t **param)
|
|||
|
||||
int root = pdf_get_free_id(&pdf);
|
||||
|
||||
snprintf(buf, 64, "<<\n/Type /Pages\n/Kids ");
|
||||
snprintf(buf, 64, "<<\n/Type /Pages\n/Kids [");
|
||||
strcat(dictionary, buf);
|
||||
|
||||
if ((*param)->file_stat->page > 1)
|
||||
strcat(dictionary, "[");
|
||||
|
||||
for (int i = 0; i < (*param)->file_stat->page; i++) {
|
||||
snprintf(buf, 64, "%d 0 R", root_kid[i]);
|
||||
strcat(dictionary, buf);
|
||||
|
@ -1107,12 +1133,7 @@ cnki_pdf_hn(cnki_t **param)
|
|||
strcat(dictionary, " ");
|
||||
}
|
||||
|
||||
if ((*param)->file_stat->page > 1)
|
||||
strcat(dictionary, "]");
|
||||
|
||||
strcat(dictionary, "\n");
|
||||
|
||||
snprintf(buf, 64, "/Count %d\n", (*param)->file_stat->page);
|
||||
snprintf(buf, 64, "]\n/Count %d\n", (*param)->file_stat->page);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
strcat(dictionary, ">>");
|
||||
|
|
Loading…
Reference in a new issue