Handle duplicated object in CAJ.

Signed-off-by: yzrh <yzrh@noema.org>
This commit is contained in:
yzrh 2023-01-01 19:31:33 +00:00
parent cde014cffb
commit 1a1fee1034

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org> * Copyright (c) 2020-2023, yzrh <yzrh@noema.org>
* *
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
*/ */
@ -145,11 +145,46 @@ _pdf_obj_sort(cnki_t **param, pdf_object_t **pdf)
ret = pdf_obj_sort(pdf); ret = pdf_obj_sort(pdf);
if ((*param)->stat > 0)
printf("Sorted object(s)\n");
return ret;
}
static int
_pdf_obj_dedup(cnki_t **param, pdf_object_t **pdf)
{
int ret = 0;
pdf_object_t *tmp;
pdf_object_t *ptr;
if ((*param)->stat > 1)
printf("Deleting duplicated object\n\t%8s\n", "id");
ptr = *pdf;
while (ptr->next != NULL) {
if (ptr->id == ptr->next->id) {
pdf_get_obj(&ptr, ptr->id, &tmp);
pdf_obj_del(&ptr, ptr->id);
tmp->next = NULL;
pdf_obj_destroy(&tmp);
ret++;
if ((*param)->stat > 1)
printf("\t%8d\n", ptr->id);
}
ptr = ptr->next;
}
if ((*param)->stat > 0) { if ((*param)->stat > 0) {
if (ret == 0) if (ret == 0)
printf("Sorted object(s)\n"); printf("No duplicated object\n");
else else
printf("Object(s) not sorted\n"); printf("Deleted %d duplicated object(s)\n", ret);
} }
return ret; return ret;
@ -338,12 +373,9 @@ cnki_pdf(cnki_t **param)
printf("Generating root object\n"); printf("Generating root object\n");
snprintf(buf, 64, snprintf(buf, 64,
"<<\n/Type /Pages\n/Kids "); "<<\n/Type /Pages\n/Kids [");
strcat(dictionary, buf); strcat(dictionary, buf);
if (parent[0] > 1)
strcat(dictionary, "[");
for (int i = 0, j = 0; i < parent[0]; i++) { for (int i = 0, j = 0; i < parent[0]; i++) {
if (parent_missing[i] == 1) { if (parent_missing[i] == 1) {
snprintf(buf, 64, "%d 0 R", parent[i + 1]); snprintf(buf, 64, "%d 0 R", parent[i + 1]);
@ -354,12 +386,7 @@ cnki_pdf(cnki_t **param)
} }
} }
if (parent[0] > 1) snprintf(buf, 64, "]\n/Count %d\n", (*param)->file_stat->page);
strcat(dictionary, "]");
strcat(dictionary, "\n");
snprintf(buf, 64, "/Count %d\n", (*param)->file_stat->page);
strcat(dictionary, buf); strcat(dictionary, buf);
strcat(dictionary, ">>"); strcat(dictionary, ">>");
@ -442,6 +469,8 @@ cnki_pdf(cnki_t **param)
_pdf_obj_sort(param, &pdf); _pdf_obj_sort(param, &pdf);
_pdf_obj_dedup(param, &pdf);
_pdf_dump(param, &pdf); _pdf_dump(param, &pdf);
pdf_obj_destroy(&pdf); pdf_obj_destroy(&pdf);
@ -721,12 +750,12 @@ cnki_pdf_hn(cnki_t **param)
if ((*param)->stat > 2) if ((*param)->stat > 2)
printf("Not extracted.\n"); printf("Not extracted.\n");
pdf_obj_append(&pdf, ids[i], "null", NULL, NULL, 0); pdf_obj_append(&pdf, ids[i], NULL, NULL, NULL, 0);
} else { } else {
if ((*param)->stat > 2) if ((*param)->stat > 2)
printf("Unsupported format.\n"); printf("Unsupported format.\n");
pdf_obj_append(&pdf, ids[i], "null", NULL, NULL, 0); pdf_obj_append(&pdf, ids[i], NULL, NULL, NULL, 0);
} }
} }
@ -1028,7 +1057,7 @@ cnki_pdf_hn(cnki_t **param)
free(stream); free(stream);
} else { } else {
pdf_obj_append(&pdf, ids[ptr->image_length + 1], pdf_obj_append(&pdf, ids[ptr->image_length + 1],
"null", NULL, NULL, 0); NULL, NULL, NULL, 0);
} }
memset(dictionary, 0, dictionary_size); memset(dictionary, 0, dictionary_size);
@ -1094,12 +1123,9 @@ cnki_pdf_hn(cnki_t **param)
int root = pdf_get_free_id(&pdf); int root = pdf_get_free_id(&pdf);
snprintf(buf, 64, "<<\n/Type /Pages\n/Kids "); snprintf(buf, 64, "<<\n/Type /Pages\n/Kids [");
strcat(dictionary, buf); strcat(dictionary, buf);
if ((*param)->file_stat->page > 1)
strcat(dictionary, "[");
for (int i = 0; i < (*param)->file_stat->page; i++) { for (int i = 0; i < (*param)->file_stat->page; i++) {
snprintf(buf, 64, "%d 0 R", root_kid[i]); snprintf(buf, 64, "%d 0 R", root_kid[i]);
strcat(dictionary, buf); strcat(dictionary, buf);
@ -1107,12 +1133,7 @@ cnki_pdf_hn(cnki_t **param)
strcat(dictionary, " "); strcat(dictionary, " ");
} }
if ((*param)->file_stat->page > 1) snprintf(buf, 64, "]\n/Count %d\n", (*param)->file_stat->page);
strcat(dictionary, "]");
strcat(dictionary, "\n");
snprintf(buf, 64, "/Count %d\n", (*param)->file_stat->page);
strcat(dictionary, buf); strcat(dictionary, buf);
strcat(dictionary, ">>"); strcat(dictionary, ">>");