Handle combination of text and image in page content.

Signed-off-by: yzrh <yzrh@noema.org>
This commit is contained in:
yzrh 2022-12-29 21:10:03 +00:00
parent 5a1afb0056
commit 1ce3f89574
2 changed files with 63 additions and 64 deletions

View file

@ -460,6 +460,9 @@ cnki_pdf_hn(cnki_t **param)
if (pdf_obj_create(&pdf) != 0)
return 1;
int font = pdf_get_free_id(&pdf);
pdf_obj_append(&pdf, font, NULL, "<<\n/Type /Font\n/Subtype /TrueType\n/BaseFont /NotoSansCJKSC\n>>", NULL, 0);
if ((*param)->stat > 1)
printf("Generating PDF object(s)\n");
@ -480,15 +483,12 @@ cnki_pdf_hn(cnki_t **param)
while (ptr != NULL) {
/*
* External object (ptr->image_length) +
* content object +
* resource object +
* content object +
* page object
*/
int *ids = NULL;
if (ptr->image_length > 0)
pdf_get_free_ids(&pdf, &ids, ptr->image_length + 3);
else
pdf_get_free_ids(&pdf, &ids, 2);
int bitmap_size;
char *bitmap;
@ -721,19 +721,39 @@ cnki_pdf_hn(cnki_t **param)
if ((*param)->stat > 2)
printf("Not extracted.\n");
pdf_obj_append(&pdf, ids[i], NULL, NULL, NULL, 0);
pdf_obj_append(&pdf, ids[i], "null", NULL, NULL, 0);
} else {
if ((*param)->stat > 2)
printf("Unsupported format.\n");
pdf_obj_append(&pdf, ids[i], NULL, NULL, NULL, 0);
pdf_obj_append(&pdf, ids[i], "null", NULL, NULL, 0);
}
}
if (ptr->image_length > 0)
free(dictionary);
dictionary_size = 128 + 2 * ptr->text_size + 128 * ptr->image_length;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(root_kid);
free(ids);
free(dim);
return 1;
}
memset(dictionary, 0, dictionary_size);
strcat(dictionary, "<<\n");
if (ptr->text_size > 0) {
snprintf(buf, 64, "/Font <</F0 %d 0 R>>\n", font);
strcat(dictionary, buf);
}
if (ptr->image_length > 0) {
memset(dictionary, 0, dictionary_size);
strcat(dictionary, "<<\n/XObject <<");
strcat(dictionary, "/XObject <<");
for (int i = 0; i < ptr->image_length; i++) {
snprintf(buf, 64, "/Im%d %d 0 R", i, ids[i]);
@ -743,12 +763,14 @@ cnki_pdf_hn(cnki_t **param)
strcat(dictionary, " ");
}
strcat(dictionary, ">>\n>>");
strcat(dictionary, ">>\n");
}
strcat(dictionary, ">>");
pdf_obj_append(&pdf, ids[ptr->image_length], NULL, dictionary, NULL, 0);
free(dictionary);
}
memset(dictionary, 0, dictionary_size);
int conv_size;
char *conv_dst;
@ -766,20 +788,10 @@ cnki_pdf_hn(cnki_t **param)
ptr->text = stream;
}
dictionary_size = 64 + 2 * ptr->text_size;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(root_kid);
free(ids);
free(dim);
return 1;
}
memset(dictionary, 0, dictionary_size);
strcat(dictionary, "BT\n");
strcat(dictionary, "/F0 10 Tf\n");
for (int i = 0, j = 0; i < ptr->text_size - 1;) {
switch ((uint16_t) (ptr->text[i + 1] << 8 | ptr->text[i])) {
case 0x8001:
@ -801,6 +813,9 @@ cnki_pdf_hn(cnki_t **param)
conv_src[0] = ptr->text[i + 3];
conv_src[1] = ptr->text[i + 2];
//snprintf(buf, 64, "%f %f Td\n");
//strcat(dictionary, buf);
conv_size = 6;
if (strconv(&conv_dst, "UTF-16BE",
@ -831,6 +846,9 @@ cnki_pdf_hn(cnki_t **param)
conv_src[0] = ptr->text[i + 7];
conv_src[1] = ptr->text[i + 6];
//snprintf(buf, 64, "%f %f Td\n");
//strcat(dictionary, buf);
conv_size = 6;
if (strconv(&conv_dst, "UTF-16BE",
@ -885,23 +903,11 @@ cnki_pdf_hn(cnki_t **param)
strcat(dictionary, "ET");
/* FIXME: Use the text somehow? */
free(dictionary);
}
dictionary_size = 128 + 128 * ptr->image_length;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(root_kid);
free(ids);
free(dim);
return 1;
if (ptr->image_length > 0)
strcat(dictionary, "\n");
}
if (ptr->image_length > 0) {
memset(dictionary, 0, dictionary_size);
char resize_str[64] = "0.25 0 0 0.25 0 0 cm\n";
double resize_x = 1;
double resize_y = 1;
@ -970,10 +976,13 @@ cnki_pdf_hn(cnki_t **param)
strcat(dictionary, "\n");
}
free(dim);
}
if (strlen(dictionary) > 0) {
if (strdeflate(&stream, &stream_size, dictionary, strlen(dictionary)) != 0) {
free(root_kid);
free(ids);
free(dim);
free(dictionary);
return 1;
}
@ -993,6 +1002,9 @@ cnki_pdf_hn(cnki_t **param)
NULL, dictionary, stream, stream_size);
free(stream);
} else {
pdf_obj_append(&pdf, ids[ptr->image_length + 1],
"null", NULL, NULL, 0);
}
memset(dictionary, 0, dictionary_size);
@ -1002,9 +1014,6 @@ cnki_pdf_hn(cnki_t **param)
/* A4 paper */
strcat(dictionary, "/MediaBox [0 0 595.2756 841.8898]\n");
if (ptr->image_length > 0) {
free(dim);
snprintf(buf, 64, "/Resources %d 0 R\n", ids[ptr->image_length]);
strcat(dictionary, buf);
@ -1015,19 +1024,9 @@ cnki_pdf_hn(cnki_t **param)
pdf_obj_append(&pdf, ids[ptr->image_length + 2], NULL, dictionary, NULL, 0);
root_kid[cnt++] = ids[ptr->image_length + 2];
} else {
snprintf(buf, 64, "/Contents %d 0 R\n", ids[ptr->image_length]);
strcat(dictionary, buf);
/* Add /Parent when we know root */
pdf_obj_append(&pdf, ids[ptr->image_length + 1], NULL, dictionary, NULL, 0);
root_kid[cnt++] = ids[ptr->image_length + 1];
}
free(dictionary);
free(ids);
free(dictionary);
ptr = ptr->next;
}

View file

@ -6,5 +6,5 @@
#define VERSION "0"
#define RELEASE "2"
#define PATCH "2"
#define PATCH "3"
#define EXTRA ""