Handle combination of text and image in page content.
Signed-off-by: yzrh <yzrh@noema.org>
This commit is contained in:
parent
5a1afb0056
commit
1ce3f89574
2 changed files with 63 additions and 64 deletions
125
src/cnki_pdf.c
125
src/cnki_pdf.c
|
@ -460,6 +460,9 @@ cnki_pdf_hn(cnki_t **param)
|
|||
if (pdf_obj_create(&pdf) != 0)
|
||||
return 1;
|
||||
|
||||
int font = pdf_get_free_id(&pdf);
|
||||
pdf_obj_append(&pdf, font, NULL, "<<\n/Type /Font\n/Subtype /TrueType\n/BaseFont /NotoSansCJKSC\n>>", NULL, 0);
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Generating PDF object(s)\n");
|
||||
|
||||
|
@ -480,15 +483,12 @@ cnki_pdf_hn(cnki_t **param)
|
|||
while (ptr != NULL) {
|
||||
/*
|
||||
* External object (ptr->image_length) +
|
||||
* content object +
|
||||
* resource object +
|
||||
* content object +
|
||||
* page object
|
||||
*/
|
||||
int *ids = NULL;
|
||||
if (ptr->image_length > 0)
|
||||
pdf_get_free_ids(&pdf, &ids, ptr->image_length + 3);
|
||||
else
|
||||
pdf_get_free_ids(&pdf, &ids, 2);
|
||||
pdf_get_free_ids(&pdf, &ids, ptr->image_length + 3);
|
||||
|
||||
int bitmap_size;
|
||||
char *bitmap;
|
||||
|
@ -721,19 +721,39 @@ cnki_pdf_hn(cnki_t **param)
|
|||
if ((*param)->stat > 2)
|
||||
printf("Not extracted.\n");
|
||||
|
||||
pdf_obj_append(&pdf, ids[i], NULL, NULL, NULL, 0);
|
||||
pdf_obj_append(&pdf, ids[i], "null", NULL, NULL, 0);
|
||||
} else {
|
||||
if ((*param)->stat > 2)
|
||||
printf("Unsupported format.\n");
|
||||
|
||||
pdf_obj_append(&pdf, ids[i], NULL, NULL, NULL, 0);
|
||||
pdf_obj_append(&pdf, ids[i], "null", NULL, NULL, 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (ptr->image_length > 0) {
|
||||
memset(dictionary, 0, dictionary_size);
|
||||
if (ptr->image_length > 0)
|
||||
free(dictionary);
|
||||
|
||||
strcat(dictionary, "<<\n/XObject <<");
|
||||
dictionary_size = 128 + 2 * ptr->text_size + 128 * ptr->image_length;
|
||||
dictionary = malloc(dictionary_size);
|
||||
|
||||
if (dictionary == NULL) {
|
||||
free(root_kid);
|
||||
free(ids);
|
||||
free(dim);
|
||||
return 1;
|
||||
}
|
||||
|
||||
memset(dictionary, 0, dictionary_size);
|
||||
|
||||
strcat(dictionary, "<<\n");
|
||||
|
||||
if (ptr->text_size > 0) {
|
||||
snprintf(buf, 64, "/Font <</F0 %d 0 R>>\n", font);
|
||||
strcat(dictionary, buf);
|
||||
}
|
||||
|
||||
if (ptr->image_length > 0) {
|
||||
strcat(dictionary, "/XObject <<");
|
||||
|
||||
for (int i = 0; i < ptr->image_length; i++) {
|
||||
snprintf(buf, 64, "/Im%d %d 0 R", i, ids[i]);
|
||||
|
@ -743,13 +763,15 @@ cnki_pdf_hn(cnki_t **param)
|
|||
strcat(dictionary, " ");
|
||||
}
|
||||
|
||||
strcat(dictionary, ">>\n>>");
|
||||
|
||||
pdf_obj_append(&pdf, ids[ptr->image_length], NULL, dictionary, NULL, 0);
|
||||
|
||||
free(dictionary);
|
||||
strcat(dictionary, ">>\n");
|
||||
}
|
||||
|
||||
strcat(dictionary, ">>");
|
||||
|
||||
pdf_obj_append(&pdf, ids[ptr->image_length], NULL, dictionary, NULL, 0);
|
||||
|
||||
memset(dictionary, 0, dictionary_size);
|
||||
|
||||
int conv_size;
|
||||
char *conv_dst;
|
||||
char conv_src[2];
|
||||
|
@ -766,20 +788,10 @@ cnki_pdf_hn(cnki_t **param)
|
|||
ptr->text = stream;
|
||||
}
|
||||
|
||||
dictionary_size = 64 + 2 * ptr->text_size;
|
||||
dictionary = malloc(dictionary_size);
|
||||
|
||||
if (dictionary == NULL) {
|
||||
free(root_kid);
|
||||
free(ids);
|
||||
free(dim);
|
||||
return 1;
|
||||
}
|
||||
|
||||
memset(dictionary, 0, dictionary_size);
|
||||
|
||||
strcat(dictionary, "BT\n");
|
||||
|
||||
strcat(dictionary, "/F0 10 Tf\n");
|
||||
|
||||
for (int i = 0, j = 0; i < ptr->text_size - 1;) {
|
||||
switch ((uint16_t) (ptr->text[i + 1] << 8 | ptr->text[i])) {
|
||||
case 0x8001:
|
||||
|
@ -801,6 +813,9 @@ cnki_pdf_hn(cnki_t **param)
|
|||
conv_src[0] = ptr->text[i + 3];
|
||||
conv_src[1] = ptr->text[i + 2];
|
||||
|
||||
//snprintf(buf, 64, "%f %f Td\n");
|
||||
//strcat(dictionary, buf);
|
||||
|
||||
conv_size = 6;
|
||||
|
||||
if (strconv(&conv_dst, "UTF-16BE",
|
||||
|
@ -831,6 +846,9 @@ cnki_pdf_hn(cnki_t **param)
|
|||
conv_src[0] = ptr->text[i + 7];
|
||||
conv_src[1] = ptr->text[i + 6];
|
||||
|
||||
//snprintf(buf, 64, "%f %f Td\n");
|
||||
//strcat(dictionary, buf);
|
||||
|
||||
conv_size = 6;
|
||||
|
||||
if (strconv(&conv_dst, "UTF-16BE",
|
||||
|
@ -885,23 +903,11 @@ cnki_pdf_hn(cnki_t **param)
|
|||
|
||||
strcat(dictionary, "ET");
|
||||
|
||||
/* FIXME: Use the text somehow? */
|
||||
free(dictionary);
|
||||
}
|
||||
|
||||
dictionary_size = 128 + 128 * ptr->image_length;
|
||||
dictionary = malloc(dictionary_size);
|
||||
|
||||
if (dictionary == NULL) {
|
||||
free(root_kid);
|
||||
free(ids);
|
||||
free(dim);
|
||||
return 1;
|
||||
if (ptr->image_length > 0)
|
||||
strcat(dictionary, "\n");
|
||||
}
|
||||
|
||||
if (ptr->image_length > 0) {
|
||||
memset(dictionary, 0, dictionary_size);
|
||||
|
||||
char resize_str[64] = "0.25 0 0 0.25 0 0 cm\n";
|
||||
double resize_x = 1;
|
||||
double resize_y = 1;
|
||||
|
@ -970,10 +976,13 @@ cnki_pdf_hn(cnki_t **param)
|
|||
strcat(dictionary, "\n");
|
||||
}
|
||||
|
||||
free(dim);
|
||||
}
|
||||
|
||||
if (strlen(dictionary) > 0) {
|
||||
if (strdeflate(&stream, &stream_size, dictionary, strlen(dictionary)) != 0) {
|
||||
free(root_kid);
|
||||
free(ids);
|
||||
free(dim);
|
||||
free(dictionary);
|
||||
return 1;
|
||||
}
|
||||
|
@ -993,6 +1002,9 @@ cnki_pdf_hn(cnki_t **param)
|
|||
NULL, dictionary, stream, stream_size);
|
||||
|
||||
free(stream);
|
||||
} else {
|
||||
pdf_obj_append(&pdf, ids[ptr->image_length + 1],
|
||||
"null", NULL, NULL, 0);
|
||||
}
|
||||
|
||||
memset(dictionary, 0, dictionary_size);
|
||||
|
@ -1002,32 +1014,19 @@ cnki_pdf_hn(cnki_t **param)
|
|||
/* A4 paper */
|
||||
strcat(dictionary, "/MediaBox [0 0 595.2756 841.8898]\n");
|
||||
|
||||
if (ptr->image_length > 0) {
|
||||
free(dim);
|
||||
snprintf(buf, 64, "/Resources %d 0 R\n", ids[ptr->image_length]);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
snprintf(buf, 64, "/Resources %d 0 R\n", ids[ptr->image_length]);
|
||||
strcat(dictionary, buf);
|
||||
snprintf(buf, 64, "/Contents %d 0 R\n", ids[ptr->image_length + 1]);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
snprintf(buf, 64, "/Contents %d 0 R\n", ids[ptr->image_length + 1]);
|
||||
strcat(dictionary, buf);
|
||||
/* Add /Parent when we know root */
|
||||
pdf_obj_append(&pdf, ids[ptr->image_length + 2], NULL, dictionary, NULL, 0);
|
||||
|
||||
/* Add /Parent when we know root */
|
||||
pdf_obj_append(&pdf, ids[ptr->image_length + 2], NULL, dictionary, NULL, 0);
|
||||
|
||||
root_kid[cnt++] = ids[ptr->image_length + 2];
|
||||
} else {
|
||||
snprintf(buf, 64, "/Contents %d 0 R\n", ids[ptr->image_length]);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
/* Add /Parent when we know root */
|
||||
pdf_obj_append(&pdf, ids[ptr->image_length + 1], NULL, dictionary, NULL, 0);
|
||||
|
||||
root_kid[cnt++] = ids[ptr->image_length + 1];
|
||||
}
|
||||
|
||||
free(dictionary);
|
||||
root_kid[cnt++] = ids[ptr->image_length + 2];
|
||||
|
||||
free(ids);
|
||||
free(dictionary);
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
|
|
@ -6,5 +6,5 @@
|
|||
|
||||
#define VERSION "0"
|
||||
#define RELEASE "2"
|
||||
#define PATCH "2"
|
||||
#define PATCH "3"
|
||||
#define EXTRA ""
|
||||
|
|
Loading…
Reference in a new issue