Compare commits

...

3 commits

Author SHA1 Message Date
2fa2b760ae Fix HN text parsing.
Signed-off-by: yzrh <yzrh@noema.org>
2023-01-15 15:34:46 +00:00
dd5854678c Fix JBIG2 allocation.
Signed-off-by: yzrh <yzrh@noema.org>
2023-01-06 12:02:43 +00:00
123d62141c Add document information dictionary to output.
Signed-off-by: yzrh <yzrh@noema.org>
2023-01-05 19:15:01 +00:00
4 changed files with 78 additions and 62 deletions

View file

@ -850,45 +850,7 @@ cnki_pdf_hn(cnki_t **param)
for (int i = 0, j = 0; i < ptr->text_size - 1;) {
switch (((unsigned char) ptr->text[i + 1] << 8) + (unsigned char) ptr->text[i]) {
case 0x8001:
if (ptr->address_next > ptr->address)
strcat(dictionary, "T*\n");
case 0x8070:
if (ptr->address_next > ptr->address) {
i += 4;
for (;;) {
if (i + 3 >= ptr->text_size ||
(unsigned char) ptr->text[i + 1] == 0x80)
break;
conv_src[0] = ptr->text[i + 3];
conv_src[1] = ptr->text[i + 2];
//snprintf(buf, 64, "%f %f Td\n");
//strcat(dictionary, buf);
conv_size = 6;
if (strconv(&conv_dst, "UTF-16BE",
conv_src, "GB18030", &conv_size) == 0) {
if (conv_size - 2 > 0) {
strcat(dictionary, "<feff");
for (int k = 0; k < conv_size - 2; k++) {
snprintf(conv_hex, 3,
"%02x", (unsigned char) conv_dst[k]);
strcat(dictionary, conv_hex);
}
strcat(dictionary, "> Tj\n");
}
free(conv_dst);
}
i += 4;
}
break;
}
if (ptr->address_next <= ptr->address) {
if (i + 7 >= ptr->text_size) {
i += 2;
break;
@ -897,7 +859,7 @@ cnki_pdf_hn(cnki_t **param)
conv_src[0] = ptr->text[i + 7];
conv_src[1] = ptr->text[i + 6];
//snprintf(buf, 64, "%f %f Td\n");
//snprintf(buf, 64, "1 0 0 1 %d %d Tm\n")
//strcat(dictionary, buf);
conv_size = 6;
@ -905,7 +867,7 @@ cnki_pdf_hn(cnki_t **param)
if (strconv(&conv_dst, "UTF-16BE",
conv_src, "GB18030", &conv_size) == 0) {
if (conv_size - 2 > 0) {
strcat(dictionary, "<feff");
strcat(dictionary, "<");
for (int k = 0; k < conv_size - 2; k++) {
snprintf(conv_hex, 3,
"%02x", (unsigned char) conv_dst[k]);
@ -917,6 +879,46 @@ cnki_pdf_hn(cnki_t **param)
}
i += 8;
break;
}
strcat(dictionary, "T*\n");
case 0x8070:
i += 4;
if (ptr->address_next <= ptr->address)
break;
for (;;) {
if (i + 3 >= ptr->text_size ||
(unsigned char) ptr->text[i + 1] == 0x80)
break;
conv_src[0] = ptr->text[i + 3];
conv_src[1] = ptr->text[i + 2];
//snprintf(buf, 64, "1 0 0 1 %d %d Tm\n")
//strcat(dictionary, buf);
conv_size = 6;
if (strconv(&conv_dst, "UTF-16BE",
conv_src, "GB18030", &conv_size) == 0) {
if (conv_size - 2 > 0) {
strcat(dictionary, "<");
for (int k = 0; k < conv_size - 2; k++) {
snprintf(conv_hex, 3,
"%02x", (unsigned char) conv_dst[k]);
strcat(dictionary, conv_hex);
}
strcat(dictionary, "> Tj\n");
}
free(conv_dst);
}
i += 4;
}
break;
case 0x800a:
if (i + 27 >= ptr->text_size || j >= ptr->image_length) {

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, yzrh <yzrh@noema.org>
* Copyright (c) 2022-2023, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
@ -31,5 +31,6 @@ strdec_jbig2(char **bitmap,
}
jbig2_release_page(ctx, image);
jbig2_ctx_free(ctx);
return 0;
}

View file

@ -1,19 +1,39 @@
/*
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
* Copyright (c) 2020-2023, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "version.h"
#include "md5.h"
#include "pdf.h"
static int
_info_obj(pdf_object_t **pdf)
{
char dictionary[128] = "<<\n"
"/Producer (Melon " VERSION "." RELEASE "." PATCH EXTRA ")\n"
"/CreationDate (D:";
char buf[64];
time_t timestamp = time(NULL);
strftime(buf, 64, "%Y%m%d%H%M%S", gmtime(&timestamp));
strcat(dictionary, buf);
strcat(dictionary, "+00'00')\n>>");
return pdf_obj_append(pdf, 0, NULL, dictionary, NULL, 0);
}
int
pdf_dump_obj(pdf_object_t **pdf, FILE **fp)
{
if (*pdf == NULL || *fp == NULL)
if (*pdf == NULL || *fp == NULL || _info_obj(pdf) != 0)
return 1;
long cur;
@ -152,18 +172,11 @@ pdf_dump_trailer(pdf_object_t **pdf, FILE **fp, int xref)
while (ptr->next != NULL)
ptr = ptr->next;
/*
* TODO: Document information dictionary
* `"/Producer (Melon)"'
* `"/CreationDate (D:YYYYMMDDHHmmSS+00'00')"'
*
* Trailer dictionary
* `"/Info %d 0 R"'
*/
fprintf(*fp,
"/Size %d\n/Root %d 0 R\n",
"/Size %d\n/Root %d 0 R\n/Info %d 0 R\n",
ptr->id + 1,
pdf_get_catalog_id(pdf));
pdf_get_catalog_id(pdf),
ptr->id);
fputs("/ID [", *fp);

View file

@ -5,6 +5,6 @@
*/
#define VERSION "0"
#define RELEASE "2"
#define PATCH "5"
#define RELEASE "3"
#define PATCH "0"
#define EXTRA ""