Add preliminary support for HN figure placement.
Signed-off-by: yzrh <yzrh@noema.org>
This commit is contained in:
parent
224a09a015
commit
abce2fd2e4
6 changed files with 156 additions and 53 deletions
|
@ -39,9 +39,9 @@ Specify output file
|
|||
Set buffer size (default 512k)
|
||||
|
||||
-v, --verbose
|
||||
Print more information (twice for even more, three times for HN image decoding information as well)
|
||||
Print more information (twice for even more, three times for HN image processing information as well)
|
||||
|
||||
Thanks
|
||||
======
|
||||
|
||||
This project is inspired by [https://github.com/JeziL/caj2pdf](https://github.com/JeziL/caj2pdf)
|
||||
This project is inspired by [https://github.com/caj2pdf/caj2pdf](https://github.com/caj2pdf/caj2pdf)
|
||||
|
|
|
@ -138,7 +138,7 @@ cnki_info(cnki_t **param)
|
|||
if ((*param)->file_stat->outline > 0) {
|
||||
if ((*param)->stat > 1) {
|
||||
printf("Loading outline(s)\n");
|
||||
printf("\t%16s\t%-24s\t%12s\t%12s\t%5s\n",
|
||||
printf("\t%19s\t%-24s\t%12s\t%12s\t%5s\n",
|
||||
"title",
|
||||
"hierarchy",
|
||||
"page",
|
||||
|
|
|
@ -58,6 +58,10 @@ typedef struct _hn_image_t {
|
|||
int32_t format; /* hn_code */
|
||||
int32_t address;
|
||||
int32_t size;
|
||||
int16_t x;
|
||||
int16_t y;
|
||||
int16_t w;
|
||||
int16_t h;
|
||||
char *image;
|
||||
} hn_image_t;
|
||||
|
||||
|
|
|
@ -93,6 +93,10 @@ cnki_hn(cnki_t **param)
|
|||
fread(&ptr->image_data[i].format, 4, 1, (*param)->fp_i);
|
||||
fread(&ptr->image_data[i].address, 4, 1, (*param)->fp_i);
|
||||
fread(&ptr->image_data[i].size, 4, 1, (*param)->fp_i);
|
||||
ptr->image_data[i].x = 0;
|
||||
ptr->image_data[i].y = 0;
|
||||
ptr->image_data[i].w = 0;
|
||||
ptr->image_data[i].h = 0;
|
||||
fseek((*param)->fp_i,
|
||||
ptr->image_data[i].address + ptr->image_data[i].size,
|
||||
SEEK_SET);
|
||||
|
|
193
src/cnki_pdf.c
193
src/cnki_pdf.c
|
@ -524,7 +524,7 @@ cnki_pdf_hn(cnki_t **param)
|
|||
"/Subtype /Image\n");
|
||||
|
||||
if ((*param)->stat > 2)
|
||||
printf("\tDecoding data, page %04d item %02d format %d... ",
|
||||
printf("\tProcessing image, page %04d item %d format %d... ",
|
||||
ptr->page, i, ptr->image_data[i].format);
|
||||
|
||||
switch (ptr->image_data[i].format) {
|
||||
|
@ -700,7 +700,7 @@ cnki_pdf_hn(cnki_t **param)
|
|||
snprintf(buf, 64, "/Im%d %d 0 R", i, ids[i]);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
if (i + 1 < ptr->image_length)
|
||||
if (i < ptr->image_length - 1)
|
||||
strcat(dictionary, " ");
|
||||
}
|
||||
|
||||
|
@ -739,43 +739,112 @@ cnki_pdf_hn(cnki_t **param)
|
|||
|
||||
memset(dictionary, 0, dictionary_size);
|
||||
|
||||
strcat(dictionary, "<feff");
|
||||
strcat(dictionary, "BT\n");
|
||||
|
||||
for (int i = 0; i < ptr->text_size; i += 6) {
|
||||
if (i + 5 >= ptr->text_size)
|
||||
break;
|
||||
for (int i = 0, j = 0; i < ptr->text_size - 1;) {
|
||||
switch ((uint16_t) (ptr->text[i + 1] << 8 | ptr->text[i])) {
|
||||
case 0x8001:
|
||||
if (ptr->address_next <= ptr->address) {
|
||||
i += 2;
|
||||
break;
|
||||
}
|
||||
|
||||
conv_src[0] = ptr->text[i + 5];
|
||||
conv_src[1] = ptr->text[i + 4];
|
||||
strcat(dictionary, "T*\n");
|
||||
case 0x8070:
|
||||
if (ptr->address_next > ptr->address) {
|
||||
i += 4;
|
||||
|
||||
if ((conv_src[0] << 8 | conv_src[1]) == 0xa389) {
|
||||
strcat(dictionary, "a389");
|
||||
continue;
|
||||
} else if ((conv_src[0] << 8 | conv_src[1]) == 0xa38a) {
|
||||
strcat(dictionary, "a38a");
|
||||
continue;
|
||||
} else if ((conv_src[0] << 8 | conv_src[1]) == 0xa38d) {
|
||||
strcat(dictionary, "a38d");
|
||||
continue;
|
||||
} else if ((conv_src[0] << 8 | conv_src[1]) == 0xa3a0) {
|
||||
strcat(dictionary, "a3a0");
|
||||
continue;
|
||||
}
|
||||
for (;;) {
|
||||
if (i + 3 >= ptr->text_size ||
|
||||
(unsigned char) ptr->text[i + 1] == 0x80)
|
||||
break;
|
||||
|
||||
conv_size = 6;
|
||||
conv_src[0] = ptr->text[i + 3];
|
||||
conv_src[1] = ptr->text[i + 2];
|
||||
|
||||
if (strconv(&conv_dst, "UTF-16BE",
|
||||
conv_src, "GB18030", &conv_size) == 0) {
|
||||
for (int j = 0; j < conv_size - 2; j++) {
|
||||
snprintf(conv_hex, 3,
|
||||
"%02x", (unsigned char) conv_dst[j]);
|
||||
strcat(dictionary, conv_hex);
|
||||
}
|
||||
free(conv_dst);
|
||||
conv_size = 6;
|
||||
|
||||
if (strconv(&conv_dst, "UTF-16BE",
|
||||
conv_src, "GB18030", &conv_size) == 0) {
|
||||
if (conv_size - 2 > 0) {
|
||||
strcat(dictionary, "<feff");
|
||||
for (int k = 0; k < conv_size - 2; k++) {
|
||||
snprintf(conv_hex, 3,
|
||||
"%02x", (unsigned char) conv_dst[k]);
|
||||
strcat(dictionary, conv_hex);
|
||||
}
|
||||
strcat(dictionary, "> Tj\n");
|
||||
}
|
||||
free(conv_dst);
|
||||
}
|
||||
|
||||
i += 4;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
if (i + 7 >= ptr->text_size) {
|
||||
i += 2;
|
||||
break;
|
||||
}
|
||||
|
||||
conv_src[0] = ptr->text[i + 7];
|
||||
conv_src[1] = ptr->text[i + 6];
|
||||
|
||||
conv_size = 6;
|
||||
|
||||
if (strconv(&conv_dst, "UTF-16BE",
|
||||
conv_src, "GB18030", &conv_size) == 0) {
|
||||
if (conv_size - 2 > 0) {
|
||||
strcat(dictionary, "<feff");
|
||||
for (int k = 0; k < conv_size - 2; k++) {
|
||||
snprintf(conv_hex, 3,
|
||||
"%02x", (unsigned char) conv_dst[k]);
|
||||
strcat(dictionary, conv_hex);
|
||||
}
|
||||
strcat(dictionary, "> Tj\n");
|
||||
}
|
||||
free(conv_dst);
|
||||
}
|
||||
|
||||
i += 8;
|
||||
break;
|
||||
case 0x800a:
|
||||
if (i + 27 >= ptr->text_size || j >= ptr->image_length) {
|
||||
i += 2;
|
||||
break;
|
||||
}
|
||||
|
||||
if (ptr->image_length > 0) {
|
||||
ptr->image_data[j].x =
|
||||
ptr->text[i + 5] << 8 | ptr->text[i + 4];
|
||||
ptr->image_data[j].y =
|
||||
ptr->text[i + 7] << 8 | ptr->text[i + 6];
|
||||
ptr->image_data[j].w =
|
||||
ptr->text[i + 9] << 8 | ptr->text[i + 8];
|
||||
ptr->image_data[j].h =
|
||||
ptr->text[i + 11] << 8 | ptr->text[i + 10];
|
||||
|
||||
if ((*param)->stat > 2)
|
||||
printf("\tItem %d: origin (%4d, %4d), width %4d, height %4d\n",
|
||||
j,
|
||||
ptr->image_data[j].x,
|
||||
ptr->image_data[j].y,
|
||||
ptr->image_data[j].w,
|
||||
ptr->image_data[j].h);
|
||||
}
|
||||
|
||||
i += 28;
|
||||
j++;
|
||||
break;
|
||||
default:
|
||||
i += 4;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
strcat(dictionary, ">");
|
||||
strcat(dictionary, "ET");
|
||||
|
||||
/* FIXME: Use the text somehow? */
|
||||
free(dictionary);
|
||||
|
@ -794,20 +863,14 @@ cnki_pdf_hn(cnki_t **param)
|
|||
if (ptr->image_length > 0) {
|
||||
memset(dictionary, 0, dictionary_size);
|
||||
|
||||
strcat(dictionary, "q\n");
|
||||
|
||||
strcat(dictionary, "0.25 0 0 0.25 0 0 cm\n");
|
||||
|
||||
double resize_x;
|
||||
double resize_y;
|
||||
|
||||
for (int i = 0; i < ptr->image_length; i++) {
|
||||
if (dim[i * 2] <= 0 || dim[i * 2 + 1] <= 0)
|
||||
continue;
|
||||
char resize_str[64] = "0.25 0 0 0.25 0 0 cm\n";
|
||||
double resize_x = 1;
|
||||
double resize_y = 1;
|
||||
|
||||
if (dim[0] > 0 && dim[1] > 0) {
|
||||
/* Scale within bound of A4 paper */
|
||||
resize_x = 595.276 * 4 / dim[i * 2];
|
||||
resize_y = 841.89 * 4 / dim[i * 2 + 1];
|
||||
resize_x = 4 * 595.2756 / dim[0];
|
||||
resize_y = 4 * 841.8898 / dim[1];
|
||||
|
||||
if (resize_y < resize_x)
|
||||
snprintf(buf, 64, "%f 0 0 %f 0 0 cm\n",
|
||||
|
@ -815,9 +878,18 @@ cnki_pdf_hn(cnki_t **param)
|
|||
else
|
||||
snprintf(buf, 64, "%f 0 0 %f 0 0 cm\n",
|
||||
resize_x, resize_x);
|
||||
strcat(dictionary, buf);
|
||||
strcat(resize_str, buf);
|
||||
}
|
||||
|
||||
/* Apply transformation matrix */
|
||||
for (int i = 0; i < ptr->image_length; i++) {
|
||||
if (dim[i * 2] <= 0 || dim[i * 2 + 1] <= 0)
|
||||
continue;
|
||||
|
||||
strcat(dictionary, "q\n");
|
||||
|
||||
strcat(dictionary, resize_str);
|
||||
|
||||
/* Rotate image */
|
||||
if (ptr->image_data[i].format == JBIG || ptr->image_data[i].format == DCT_1) {
|
||||
snprintf(buf, 64, "1 0 0 1 0 %d cm\n",
|
||||
dim[i * 2 + 1]);
|
||||
|
@ -826,15 +898,38 @@ cnki_pdf_hn(cnki_t **param)
|
|||
strcat(dictionary, "1 0 0 -1 0 0 cm\n");
|
||||
}
|
||||
|
||||
/* Translate figure */
|
||||
if (i > 0) {
|
||||
double origin_x = 0.4043745 * ptr->image_data[i].x;
|
||||
double origin_y = 0.4043561 * ptr->image_data[i].y;
|
||||
|
||||
if (origin_x < 0)
|
||||
origin_x += (2381.102 - dim[i * 2]) / 2;
|
||||
|
||||
if (origin_y < 0)
|
||||
origin_y += (3367.559 + dim[i * 2 + 1]) / 2;
|
||||
|
||||
if (ptr->image_data[i].format == JBIG || ptr->image_data[i].format == DCT_1)
|
||||
origin_y = -3367.559 + origin_y + dim[i * 2 + 1];
|
||||
else
|
||||
origin_y = 3367.559 - origin_y - dim[i * 2 + 1];
|
||||
|
||||
snprintf(buf, 64, "1 0 0 1 %f %f cm\n", origin_x, origin_y);
|
||||
strcat(dictionary, buf);
|
||||
}
|
||||
|
||||
snprintf(buf, 64, "%d 0 0 %d 0 0 cm\n",
|
||||
dim[i * 2], dim[i * 2 + 1]);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
snprintf(buf, 64, "/Im%d Do\n", i);
|
||||
strcat(dictionary, buf);
|
||||
}
|
||||
|
||||
strcat(dictionary, "Q");
|
||||
strcat(dictionary, "Q");
|
||||
|
||||
if (i < ptr->image_length - 1)
|
||||
strcat(dictionary, "\n");
|
||||
}
|
||||
|
||||
if (strdeflate(&stream, &stream_size, dictionary, strlen(dictionary)) != 0) {
|
||||
free(root_kid);
|
||||
|
@ -866,7 +961,7 @@ cnki_pdf_hn(cnki_t **param)
|
|||
strcat(dictionary, "<<\n/Type /Page\n");
|
||||
|
||||
/* A4 paper */
|
||||
strcat(dictionary, "/MediaBox [0 0 595.276 841.89]\n");
|
||||
strcat(dictionary, "/MediaBox [0 0 595.2756 841.8898]\n");
|
||||
|
||||
if (ptr->image_length > 0) {
|
||||
free(dim);
|
||||
|
@ -946,7 +1041,7 @@ cnki_pdf_hn(cnki_t **param)
|
|||
for (int i = 0; i < (*param)->file_stat->page; i++) {
|
||||
snprintf(buf, 64, "%d 0 R", root_kid[i]);
|
||||
strcat(dictionary, buf);
|
||||
if (i + 1 < (*param)->file_stat->page)
|
||||
if (i < (*param)->file_stat->page - 1)
|
||||
strcat(dictionary, " ");
|
||||
}
|
||||
|
||||
|
|
|
@ -6,5 +6,5 @@
|
|||
|
||||
#define VERSION "0"
|
||||
#define RELEASE "2"
|
||||
#define PATCH "1"
|
||||
#define PATCH "2"
|
||||
#define EXTRA ""
|
||||
|
|
Loading…
Reference in a new issue