Handle duplicated image in HN.
Signed-off-by: yzrh <yzrh@noema.org>
This commit is contained in:
parent
000405693e
commit
7d9d658461
1 changed files with 24 additions and 2 deletions
|
@ -500,6 +500,8 @@ cnki_pdf_hn(cnki_t **param)
|
||||||
|
|
||||||
char buf[64];
|
char buf[64];
|
||||||
|
|
||||||
|
pdf_object_t *tmp;
|
||||||
|
|
||||||
int cnt = 0;
|
int cnt = 0;
|
||||||
int *root_kid = malloc((*param)->file_stat->page * sizeof(int));
|
int *root_kid = malloc((*param)->file_stat->page * sizeof(int));
|
||||||
|
|
||||||
|
@ -971,6 +973,28 @@ cnki_pdf_hn(cnki_t **param)
|
||||||
margin_y = (3507.874 - dim[1]) / 2;
|
margin_y = (3507.874 - dim[1]) / 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Remove duplicated image, ptr->image_length is sometimes squared */
|
||||||
|
for (int i = 1; i < ptr->image_length; i++) {
|
||||||
|
if ((ptr->image_data[i].x > 0 || ptr->image_data[i].y > 0) &&
|
||||||
|
dim[i * 2] < dim[0] && dim[i * 2 + 1] < dim[1])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
for (int j = i; j < ptr->image_length; j++) {
|
||||||
|
pdf_get_obj(&pdf, ids[j], &tmp);
|
||||||
|
pdf_obj_del(&pdf, ids[j]);
|
||||||
|
|
||||||
|
tmp->next = NULL;
|
||||||
|
pdf_obj_destroy(&tmp);
|
||||||
|
|
||||||
|
dim[j * 2] = -1;
|
||||||
|
dim[j * 2 + 1] = -1;
|
||||||
|
|
||||||
|
pdf_obj_append(&pdf, ids[j], NULL, NULL, NULL, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
for (int i = 0; i < ptr->image_length; i++) {
|
for (int i = 0; i < ptr->image_length; i++) {
|
||||||
if (dim[i * 2] <= 0 || dim[i * 2 + 1] <= 0)
|
if (dim[i * 2] <= 0 || dim[i * 2 + 1] <= 0)
|
||||||
continue;
|
continue;
|
||||||
|
@ -1150,8 +1174,6 @@ cnki_pdf_hn(cnki_t **param)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
pdf_object_t *tmp;
|
|
||||||
|
|
||||||
/* Add /Parent to page object */
|
/* Add /Parent to page object */
|
||||||
for (int i = 0; i < (*param)->file_stat->page; i++) {
|
for (int i = 0; i < (*param)->file_stat->page; i++) {
|
||||||
if (pdf_get_obj(&pdf, root_kid[i], &tmp) != 0) {
|
if (pdf_get_obj(&pdf, root_kid[i], &tmp) != 0) {
|
||||||
|
|
Loading…
Reference in a new issue