Compare commits

..

No commits in common. "master" and "0.2.1" have entirely different histories.

21 changed files with 316 additions and 857 deletions

View file

@ -1,38 +1,9 @@
0.3.0 (2023-XX-XX)
==================
* Support HN text overlay.
* Support HN page with text.
* Handle inaccurate page count in CAJ and KDH.
0.2.5 (2023-01-05)
==================
* Improve PDF parser.
* Handle duplicated object in CAJ.
* Handle duplicated image in HN.
* Handle incomplete PDF object in CAJ and KDH.
* Handle invalid PDF object token in CAJ and KDH.
* Fix JBIG decoder.
0.2.4 (2022-12-31)
==================
* Fix HN image compositing.
* Fix PDF object check.
0.2.3 (2022-12-30)
==================
* Support HN figure placement.
0.2.2 (2022-12-29)
==================
* Support JPEG 2000 for HN.
* Handle missing but referenced root object.
* Handle HN with more than one image per page.
* Fix buffer overflow.
* Support HN figure placement.
* Support HN text overlay.
0.2.1 (2022-12-26)
==================

View file

@ -9,15 +9,16 @@ Development
Currently, CAJ, KDH, and HN can be converted. Please report
any failures with a sample that can reproduce the behaviour.
HN support does not support JPEG 2000 yet.
Dependency
----------
1. libcrypto (OpenSSL)
2. zlib
3. jbig2dec
4. libjpeg-turbo
5. openjpeg
6. pkgconf
1. OpenSSL
2. libiconv
3. zlib
4. jbig2dec
5. libjpeg-turbo
Usage
=====
@ -35,12 +36,12 @@ Options
Specify output file
-b, --buffer
Set input buffer size (default 512k)
Set buffer size (default 512k)
-v, --verbose
Print more information (twice for even more, three times for HN image processing information as well)
Print more information (twice for even more, three times for HN image decoding information as well)
Thanks
======
This project is inspired by [https://github.com/caj2pdf/caj2pdf](https://github.com/caj2pdf/caj2pdf)
This project is inspired by [https://github.com/JeziL/caj2pdf](https://github.com/JeziL/caj2pdf)

View file

@ -4,28 +4,23 @@
# SPDX-License-Identifier: Apache-2.0
#
src = melon.c iconv.c zlib.c jbig2.c jpeg.c jp2.c md5.c \
cnki_caj.c cnki_hn.c cnki_kdh.c cnki_outline_tree.c cnki_pdf.c \
cnki_zlib.c cnki_jbig.c cnki_jbig_dec.c cnki_jbig2.c cnki.c \
src = melon.c iconv.c zlib.c jbig.c jbig2.c jpeg.c \
cnki_caj.c cnki_hn.c cnki_kdh.c cnki_outline_tree.c \
cnki_pdf.c cnki_zlib.c cnki_jbig.c cnki_jbig2.c cnki.c \
pdf_cnki.c pdf_get.c pdf_parser.c pdf_writer.c pdf.c
inc = extern.h version.h iconv.h zlib.h jbig2.h jpeg.h jp2.h md5.h \
cnki.h pdf_cnki.h cnki_jbig.h cnki_jbig_dec.h pdf.h
inc = extern.h version.h iconv.h zlib.h jbig.h jbig2.h jpeg.h \
cnki.h pdf_cnki.h cnki_jbig.h pdf.h
obj = ${src:.c=.o}
PREFIX = /usr/local
CFLAGS = -O2 -pipe -flto -Wall -Wextra
LDFLAGS = -Wl,-O2 -lcrypto -lz -ljbig2dec -ljpeg -lopenjp2 -Wl,--as-needed
LDFLAGS = -Wl,-O2 -lcrypto -liconv -lz -ljbig2dec -ljpeg -Wl,--as-needed
CFLAGS += -I/usr/local/include
LDFLAGS += -L/usr/local/lib
OPENJPEG_CFLAGS != pkgconf --cflags libopenjp2
CFLAGS += ${OPENJPEG_CFLAGS}
CFLAGS += -DLIBICONV_PLUG
all: ${obj} ${inc}
${CC} ${LDFLAGS} -o melon ${obj}

View file

@ -138,7 +138,7 @@ cnki_info(cnki_t **param)
if ((*param)->file_stat->outline > 0) {
if ((*param)->stat > 1) {
printf("Loading outline(s)\n");
printf("\t%19s\t%-24s\t%12s\t%12s\t%5s\n",
printf("\t%16s\t%-24s\t%12s\t%12s\t%5s\n",
"title",
"hierarchy",
"page",

View file

@ -58,10 +58,6 @@ typedef struct _hn_image_t {
int32_t format; /* hn_code */
int32_t address;
int32_t size;
uint16_t x;
uint16_t y;
uint16_t w;
uint16_t h;
char *image;
} hn_image_t;

View file

@ -93,10 +93,6 @@ cnki_hn(cnki_t **param)
fread(&ptr->image_data[i].format, 4, 1, (*param)->fp_i);
fread(&ptr->image_data[i].address, 4, 1, (*param)->fp_i);
fread(&ptr->image_data[i].size, 4, 1, (*param)->fp_i);
ptr->image_data[i].x = 0;
ptr->image_data[i].y = 0;
ptr->image_data[i].w = 0;
ptr->image_data[i].h = 0;
fseek((*param)->fp_i,
ptr->image_data[i].address + ptr->image_data[i].size,
SEEK_SET);

View file

@ -8,7 +8,7 @@
#include <string.h>
#include "cnki_jbig.h"
#include "cnki_jbig_dec.h"
#include "jbig.h"
int
cnki_jbig(char **bitmap, int *bitmap_size,

View file

@ -27,8 +27,8 @@ typedef struct _dib_t {
uint16_t depth;
uint32_t compression; /* dib_compression_code */
uint32_t size;
int32_t resolution_h;
int32_t resolution_v;
uint32_t resolution_h;
uint32_t resolution_v;
uint32_t colour;
uint32_t colour_used;
} dib_t;

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, yzrh <yzrh@noema.org>
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
@ -15,18 +15,16 @@ cnki_kdh(cnki_t **param)
if ((*param)->stat > 0)
printf("Begin 'KDH' decryption\n");
long cur = ADDRESS_KDH_BODY;
long end;
fseek((*param)->fp_i, 0, SEEK_END);
end = ftell((*param)->fp_i);
fseek((*param)->fp_i, cur, SEEK_SET);
long size = ftell((*param)->fp_i);
fseek((*param)->fp_i, ADDRESS_KDH_BODY, SEEK_SET);
const char key[] = KEY_KDH;
const int key_len = KEY_KDH_LENGTH;
long key_cur = 0;
int buf_size;
char buf[(*param)->size_buf];
FILE *tmp = tmpfile();
@ -35,32 +33,32 @@ cnki_kdh(cnki_t **param)
return 1;
for (;;) {
if (cur + (*param)->size_buf < end)
buf_size = (*param)->size_buf;
else
buf_size = end - cur;
fread(buf, (*param)->size_buf, 1, (*param)->fp_i);
fread(buf, buf_size, 1, (*param)->fp_i);
for (int i = 0; i < (*param)->size_buf; i++) {
buf[i] ^= key[key_cur % key_len];
key_cur++;
}
for (int i = 0; i < buf_size; i++)
buf[i] ^= key[key_cur++ % key_len];
fwrite(buf, (*param)->size_buf, 1, tmp);
fwrite(buf, buf_size, 1, tmp);
if ((cur = ftell((*param)->fp_i)) >= end)
if (ftell((*param)->fp_i) == size)
break;
}
if ((*param)->stat > 0)
printf("Decrypted %ld byte(s)\n", ftell(tmp));
fclose((*param)->fp_i);
fseek(tmp, 0, SEEK_SET);
FILE *orig = (*param)->fp_i;
(*param)->fp_i = tmp;
cnki_pdf(param);
(*param)->fp_i = orig;
fclose(tmp);
if ((*param)->stat > 0)
printf("Conversion ended\n");

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, yzrh <yzrh@noema.org>
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
@ -11,7 +11,6 @@
#include "iconv.h"
#include "zlib.h"
#include "jpeg.h"
#include "jp2.h"
#include "pdf.h"
#include "pdf_cnki.h"
@ -145,54 +144,11 @@ _pdf_obj_sort(cnki_t **param, pdf_object_t **pdf)
ret = pdf_obj_sort(pdf);
if ((*param)->stat > 0)
printf("Sorted object(s)\n");
return ret;
}
static int
_pdf_obj_dedup(cnki_t **param, pdf_object_t **pdf)
{
int ret = 0;
pdf_object_t *tmp;
pdf_object_t *ptr;
if ((*param)->stat > 1)
printf("Deleting duplicated object\n");
ptr = *pdf;
while (ptr->next != NULL && ptr->next->next != NULL) {
if (ptr->next->id == ptr->next->next->id) {
/* Keep the bigger one, the smaller one is usually incomplete */
if (ptr->next->size < ptr->next->next->size) {
pdf_get_obj(&ptr, ptr->next->id, &tmp);
pdf_obj_del(&ptr, ptr->next->id);
} else {
pdf_get_obj(&ptr->next, ptr->next->id, &tmp);
pdf_obj_del(&ptr->next, ptr->next->id);
}
tmp->next = NULL;
pdf_obj_destroy(&tmp);
ret++;
if ((*param)->stat > 1)
printf("Deleted duplicated object %d.\n", ptr->next->id);
continue;
}
ptr = ptr->next;
}
if ((*param)->stat > 0) {
if (ret == 0)
printf("No duplicated object\n");
printf("Sorted object(s)\n");
else
printf("Deleted %d duplicated object(s)\n", ret);
printf("Object(s) not sorted\n");
}
return ret;
@ -255,22 +211,18 @@ cnki_pdf(cnki_t **param)
int *parent = NULL;
pdf_get_parent_id(&pdf, &parent);
if (parent[0] == 0)
return 1;
if ((*param)->stat > 0)
printf("Discovered %d parent object(s)\n", parent[0]);
pdf_obj_sort(&pdf);
_pdf_obj_dedup(param, &pdf);
int8_t *parent_missing;
int *kid;
if (parent[0] > 0) {
parent_missing = malloc(parent[0] * sizeof(int8_t));
int *parent_missing = malloc(parent[0] * sizeof(int));
if (parent_missing == NULL)
return 1;
}
int *kid;
for (int i = 1; i <= parent[0]; i++) {
if ((*param)->stat > 1)
@ -313,7 +265,7 @@ cnki_pdf(cnki_t **param)
snprintf(buf, 64,
"]\n/Count %d\n>>",
pdf_get_kid_count(&pdf, parent[i]) > 0 ? pdf_get_kid_count(&pdf, parent[i]) : kid[0]);
pdf_get_kid_count(&pdf, parent[i]));
strcat(dictionary, buf);
pdf_obj_prepend(&pdf, parent[i], NULL, dictionary, NULL, 0);
@ -338,7 +290,7 @@ cnki_pdf(cnki_t **param)
if ((*param)->stat > 1)
printf("Searching for root object\n");
dictionary_size = 128 + 12 * parent[0];
dictionary_size = 128;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
@ -356,9 +308,7 @@ cnki_pdf(cnki_t **param)
if (parent_missing[i] == 1)
root_kid++;
if (root_kid > 1) {
root = pdf_get_free_id(&pdf);
} else {
if (root_kid <= 1) {
if (root_kid == 0) {
for (int i = 1; i <= parent[0]; i++)
if (root == 0 || root < parent[i])
@ -366,32 +316,30 @@ cnki_pdf(cnki_t **param)
} else {
for (int i = 0; i < parent[0]; i++)
if (parent_missing[i] == 1)
root = parent[i + 1];
root = i;
}
if (root == 0)
root = pdf_get_free_id(&pdf);
else if ((*param)->stat > 0)
printf("Root object is %d.\n", root);
}
int root_gen;
pdf_object_t *tmp;
if ((root_gen = pdf_get_obj(&pdf, root, &tmp)) != 0) {
if ((*param)->stat > 0)
printf("Root object is %d.\n",
root);
} else {
if ((*param)->stat > 0)
printf("Root object is missing\n");
if ((*param)->stat > 1)
printf("Generating root object\n");
root = pdf_get_free_id(&pdf);
snprintf(buf, 64,
"<<\n/Type /Pages\n/Kids [");
"<<\n/Type /Pages\n/Kids ");
strcat(dictionary, buf);
if (parent[0] > 1)
strcat(dictionary, "[");
for (int i = 0, j = 0; i < parent[0]; i++) {
if (parent_missing[i] == 1) {
if (parent_missing[i]) {
snprintf(buf, 64, "%d 0 R", parent[i + 1]);
strcat(dictionary, buf);
@ -400,7 +348,12 @@ cnki_pdf(cnki_t **param)
}
}
snprintf(buf, 64, "]\n/Count %d\n", (*param)->file_stat->page);
if (parent[0] > 1)
strcat(dictionary, "]");
strcat(dictionary, "\n");
snprintf(buf, 64, "/Count %d\n", (*param)->file_stat->page);
strcat(dictionary, buf);
strcat(dictionary, ">>");
@ -414,13 +367,26 @@ cnki_pdf(cnki_t **param)
root);
}
if (parent[0] > 0)
free(parent);
free(parent_missing);
free(parent);
int outline = _pdf_cnki_outline(param, &pdf);
if ((*param)->stat > 1)
printf("Searching for catalog object\n");
int catalog = pdf_get_catalog_id(&pdf);
if (catalog != 0) {
if ((*param)->stat > 0)
printf("Catalog object is %d.\n", catalog);
} else {
if ((*param)->stat > 0)
printf("Catalog object is missing\n");
if ((*param)->stat > 1)
printf("Generating catalog object\n");
snprintf(buf, 64,
"<<\n/Type /Catalog\n/Pages %d 0 R\n",
root);
@ -435,31 +401,6 @@ cnki_pdf(cnki_t **param)
strcat(dictionary, ">>");
if ((*param)->stat > 1)
printf("Searching for catalog object\n");
int catalog = pdf_get_catalog_id(&pdf);
if (catalog != 0) {
if ((*param)->stat > 0)
printf("Catalog object is %d.\n", catalog);
if (root_gen != 0) {
if ((*param)->stat > 1)
printf("Replacing catalog object\n");
pdf_obj_replace(&pdf, catalog, NULL, dictionary, NULL, 0);
if ((*param)->stat > 0)
printf("Replaced catalog object\n");
}
} else {
if ((*param)->stat > 0)
printf("Catalog object is missing\n");
if ((*param)->stat > 1)
printf("Generating catalog object\n");
pdf_obj_append(&pdf, 0, NULL, dictionary, NULL, 0);
if ((*param)->stat > 0)
@ -478,6 +419,8 @@ cnki_pdf(cnki_t **param)
if ((*param)->stat > 1)
printf("Deleting xref object\n");
pdf_object_t *tmp;
pdf_get_obj(&pdf, xref, &tmp);
pdf_obj_del(&pdf, xref);
@ -513,9 +456,6 @@ cnki_pdf_hn(cnki_t **param)
if (pdf_obj_create(&pdf) != 0)
return 1;
int font = pdf_get_free_id(&pdf);
pdf_obj_append(&pdf, font, NULL, "<<\n/Type /Font\n/Subtype /TrueType\n/BaseFont /NotoSansCJKSC\n>>", NULL, 0);
if ((*param)->stat > 1)
printf("Generating PDF object(s)\n");
@ -524,26 +464,27 @@ cnki_pdf_hn(cnki_t **param)
char buf[64];
pdf_object_t *tmp;
int cnt = 0;
int *root_kid = malloc((*param)->file_stat->page * sizeof(int));
if (root_kid == NULL)
return 1;
memset(root_kid, 0, (*param)->file_stat->page * sizeof(int));
memset(root_kid, 0, (*param)->file_stat->page);
object_hn_t *ptr = (*param)->object_hn;
while (ptr != NULL) {
/*
* External object (ptr->image_length) +
* resource object +
* content object +
* resource object +
* page object
*/
int *ids = NULL;
if (ptr->image_length > 0)
pdf_get_free_ids(&pdf, &ids, ptr->image_length + 3);
else
pdf_get_free_ids(&pdf, &ids, 2);
int bitmap_size;
char *bitmap;
@ -551,10 +492,10 @@ cnki_pdf_hn(cnki_t **param)
int stream_size;
char *stream;
double *dim;
int *dim;
if (ptr->image_length > 0) {
dim = malloc(2 * ptr->image_length * sizeof(double));
dim = malloc(2 * ptr->image_length * sizeof(int));
if (dim == NULL) {
free(root_kid);
@ -583,7 +524,7 @@ cnki_pdf_hn(cnki_t **param)
"/Subtype /Image\n");
if ((*param)->stat > 2)
printf("\tProcessing image, page %04d item %d format %d... ",
printf("\tDecoding data, page %04d item %02d format %d... ",
ptr->page, i, ptr->image_data[i].format);
switch (ptr->image_data[i].format) {
@ -719,41 +660,6 @@ cnki_pdf_hn(cnki_t **param)
dim[i * 2 + 1] = info[1];
break;
case JPX:
ret = strinfo_jp2_dim(&info[0],
&info[1],
ptr->image_data[i].image,
ptr->image_data[i].size);
if (ret != 0) {
dim[i * 2] = 0;
dim[i * 2 + 1] = 0;
break;
}
stream_size = ptr->image_data[i].size;
stream = malloc(stream_size);
if (stream == NULL) {
free(root_kid);
free(ids);
free(dim);
free(dictionary);
return 1;
}
memcpy(stream, ptr->image_data[i].image, stream_size);
snprintf(buf, 64, "/Width %d\n/Height %d\n",
info[0], info[1]);
strcat(dictionary, buf);
snprintf(buf, 64, "/Length %d\n",
stream_size);
strcat(dictionary, buf);
strcat(dictionary, "/Filter /JPXDecode\n");
dim[i * 2] = info[0];
dim[i * 2 + 1] = info[1];
break;
default:
ret = -1;
dim[i * 2] = -1;
@ -785,47 +691,25 @@ cnki_pdf_hn(cnki_t **param)
}
}
if (ptr->image_length > 0)
free(dictionary);
dictionary_size = 128 + 2 * ptr->text_size + 128 * ptr->image_length;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(root_kid);
free(ids);
free(dim);
return 1;
}
if (ptr->image_length > 0) {
memset(dictionary, 0, dictionary_size);
strcat(dictionary, "<<\n");
if (ptr->text_size > 0) {
snprintf(buf, 64, "/Font <</F0 %d 0 R>>\n", font);
strcat(dictionary, buf);
}
if (ptr->image_length > 0) {
strcat(dictionary, "/XObject <<");
strcat(dictionary, "<<\n/XObject <<");
for (int i = 0; i < ptr->image_length; i++) {
snprintf(buf, 64, "/Im%d %d 0 R", i, ids[i]);
strcat(dictionary, buf);
if (i < ptr->image_length - 1)
if (i + 1 < ptr->image_length)
strcat(dictionary, " ");
}
strcat(dictionary, ">>\n");
}
strcat(dictionary, ">>");
strcat(dictionary, ">>\n>>");
pdf_obj_append(&pdf, ids[ptr->image_length], NULL, dictionary, NULL, 0);
memset(dictionary, 0, dictionary_size);
free(dictionary);
}
int conv_size;
char *conv_dst;
@ -843,249 +727,119 @@ cnki_pdf_hn(cnki_t **param)
ptr->text = stream;
}
strcat(dictionary, "BT\n");
dictionary_size = 64 + 2 * ptr->text_size;
dictionary = malloc(dictionary_size);
strcat(dictionary, "/F0 10 Tf\n");
for (int i = 0, j = 0; i < ptr->text_size - 1;) {
switch (((unsigned char) ptr->text[i + 1] << 8) + (unsigned char) ptr->text[i]) {
case 0x8001:
if (ptr->address_next <= ptr->address) {
if (i + 7 >= ptr->text_size) {
i += 2;
break;
if (dictionary == NULL) {
free(root_kid);
free(ids);
free(dim);
return 1;
}
conv_src[0] = ptr->text[i + 7];
conv_src[1] = ptr->text[i + 6];
//snprintf(buf, 64, "1 0 0 1 %d %d Tm\n")
//strcat(dictionary, buf);
conv_size = 6;
if (strconv(&conv_dst, "UTF-16BE",
conv_src, "GB18030", &conv_size) == 0) {
if (conv_size - 2 > 0) {
strcat(dictionary, "<");
for (int k = 0; k < conv_size - 2; k++) {
snprintf(conv_hex, 3,
"%02x", (unsigned char) conv_dst[k]);
strcat(dictionary, conv_hex);
}
strcat(dictionary, "> Tj\n");
}
free(conv_dst);
}
i += 8;
break;
}
strcat(dictionary, "T*\n");
case 0x8070:
i += 4;
if (ptr->address_next <= ptr->address)
break;
for (;;) {
if (i + 3 >= ptr->text_size ||
(unsigned char) ptr->text[i + 1] == 0x80)
break;
conv_src[0] = ptr->text[i + 3];
conv_src[1] = ptr->text[i + 2];
//snprintf(buf, 64, "1 0 0 1 %d %d Tm\n")
//strcat(dictionary, buf);
conv_size = 6;
if (strconv(&conv_dst, "UTF-16BE",
conv_src, "GB18030", &conv_size) == 0) {
if (conv_size - 2 > 0) {
strcat(dictionary, "<");
for (int k = 0; k < conv_size - 2; k++) {
snprintf(conv_hex, 3,
"%02x", (unsigned char) conv_dst[k]);
strcat(dictionary, conv_hex);
}
strcat(dictionary, "> Tj\n");
}
free(conv_dst);
}
i += 4;
}
break;
case 0x800a:
if (i + 27 >= ptr->text_size || j >= ptr->image_length) {
i += 2;
if (j >= ptr->image_length)
i += 26;
break;
}
if (ptr->image_length > 0) {
ptr->image_data[j].x += (unsigned char) ptr->text[i + 5] << 8;
ptr->image_data[j].x += (unsigned char) ptr->text[i + 4];
ptr->image_data[j].y += (unsigned char) ptr->text[i + 7] << 8;
ptr->image_data[j].y += (unsigned char) ptr->text[i + 6];
ptr->image_data[j].w += (unsigned char) ptr->text[i + 9] << 8;
ptr->image_data[j].w += (unsigned char) ptr->text[i + 8];
ptr->image_data[j].h += (unsigned char) ptr->text[i + 11] << 8;
ptr->image_data[j].h += (unsigned char) ptr->text[i + 10];
if ((*param)->stat > 2)
printf("\tItem %d: origin (%4d, %4d), width %4d, height %4d\n",
j,
ptr->image_data[j].x,
ptr->image_data[j].y,
ptr->image_data[j].w,
ptr->image_data[j].h);
}
i += 28;
if (j == 0 || ptr->image_data[j].x > 0 || ptr->image_data[j].y > 0)
j++;
break;
default:
i += 4;
break;
}
}
strcat(dictionary, "ET");
if (ptr->image_length > 0)
strcat(dictionary, "\n");
}
/* FIXME: Use the text somehow? */
memset(dictionary, 0, dictionary_size);
if (ptr->image_length > 0) {
double resize_x = 1;
double resize_y = 1;
double margin_x = 0;
double margin_y = 0;
if (ptr->image_data[0].x == 0 && ptr->image_data[0].y == 0 && dim[0] > 0 && dim[1] > 0) {
/* Scale within bound of A4 paper */
resize_x = 2480.315 / dim[0];
resize_y = 3507.874 / dim[1];
if (resize_y < resize_x) {
for (int i = 0; i < ptr->image_length; i++) {
dim[i * 2] *= resize_y;
dim[i * 2 + 1] *= resize_y;
}
} else {
for (int i = 0; i < ptr->image_length; i++) {
dim[i * 2] *= resize_x;
dim[i * 2 + 1] *= resize_x;
}
}
margin_x = (2480.315 - dim[0]) / 2;
margin_y = (3507.874 - dim[1]) / 2;
}
/* Remove duplicated image, ptr->image_length is sometimes squared */
for (int i = 1; i < ptr->image_length; i++) {
if ((ptr->image_data[i].x > 0 || ptr->image_data[i].y > 0) &&
dim[i * 2] < dim[0] && dim[i * 2 + 1] < dim[1])
continue;
for (int j = i; j < ptr->image_length; j++) {
pdf_get_obj(&pdf, ids[j], &tmp);
pdf_obj_del(&pdf, ids[j]);
tmp->next = NULL;
pdf_obj_destroy(&tmp);
dim[j * 2] = -1;
dim[j * 2 + 1] = -1;
pdf_obj_append(&pdf, ids[j], NULL, NULL, NULL, 0);
}
strcat(dictionary, "<feff");
for (int i = 0; i < ptr->text_size; i += 6) {
if (i + 5 >= ptr->text_size)
break;
conv_src[0] = ptr->text[i + 5];
conv_src[1] = ptr->text[i + 4];
if ((conv_src[0] << 8 | conv_src[1]) == 0xa389) {
strcat(dictionary, "a389");
continue;
} else if ((conv_src[0] << 8 | conv_src[1]) == 0xa38a) {
strcat(dictionary, "a38a");
continue;
} else if ((conv_src[0] << 8 | conv_src[1]) == 0xa38d) {
strcat(dictionary, "a38d");
continue;
} else if ((conv_src[0] << 8 | conv_src[1]) == 0xa3a0) {
strcat(dictionary, "a3a0");
continue;
}
conv_size = 6;
if (strconv(&conv_dst, "UTF-16BE",
conv_src, "GB18030", &conv_size) == 0) {
for (int j = 0; j < conv_size - 2; j++) {
snprintf(conv_hex, 3,
"%02x", (unsigned char) conv_dst[j]);
strcat(dictionary, conv_hex);
}
free(conv_dst);
}
}
strcat(dictionary, ">");
/* FIXME: Use the text somehow? */
free(dictionary);
}
dictionary_size = 64 + 128 * ptr->image_length;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(root_kid);
free(ids);
free(dim);
return 1;
}
if (ptr->image_length > 0) {
memset(dictionary, 0, dictionary_size);
strcat(dictionary, "q\n");
strcat(dictionary, "0.25 0 0 0.25 0 0 cm\n");
double resize_x;
double resize_y;
for (int i = 0; i < ptr->image_length; i++) {
if (dim[i * 2] <= 0 || dim[i * 2 + 1] <= 0)
continue;
strcat(dictionary, "q\n");
/* Scale within bound of A4 paper */
resize_x = 595.276 * 4 / dim[i * 2];
resize_y = 841.89 * 4 / dim[i * 2 + 1];
strcat(dictionary, "0.24 0 0 0.24 0 0 cm\n");
/* Rotate image */
if (ptr->image_data[i].format == JBIG || ptr->image_data[i].format == DCT_1) {
snprintf(buf, 64, "1 0 0 -1 0 %f cm\n", dim[i * 2 + 1]);
strcat(dictionary, buf);
}
/* Translate figure */
if (ptr->image_data[i].x > 0 || ptr->image_data[i].y > 0) {
double origin_x = ptr->image_data[i].x * 0.40433;
double origin_y = ptr->image_data[i].y * 0.40433;
if (resize_y < resize_x) {
origin_x *= resize_y;
origin_y *= resize_y;
} else {
origin_x *= resize_x;
origin_y *= resize_x;
}
if (ptr->image_data[i].format == JBIG || ptr->image_data[i].format == DCT_1)
origin_y = -3507.874 + origin_y + dim[i * 2 + 1];
if (resize_y < resize_x)
snprintf(buf, 64, "%f 0 0 %f 0 0 cm\n",
resize_y, resize_y);
else
origin_y = 3507.874 - origin_y - dim[i * 2 + 1];
snprintf(buf, 64, "1 0 0 1 %f %f cm\n", origin_x, origin_y);
snprintf(buf, 64, "%f 0 0 %f 0 0 cm\n",
resize_x, resize_x);
strcat(dictionary, buf);
}
if (margin_x > 0 || margin_y > 0) {
/* Apply transformation matrix */
if (ptr->image_data[i].format == JBIG || ptr->image_data[i].format == DCT_1) {
snprintf(buf, 64, "1 0 0 1 %f %f cm\n", margin_x, -margin_y);
snprintf(buf, 64, "1 0 0 1 0 %d cm\n",
dim[i * 2 + 1]);
strcat(dictionary, buf);
} else {
snprintf(buf, 64, "1 0 0 1 %f %f cm\n", margin_x, margin_y);
strcat(dictionary, buf);
}
strcat(dictionary, "1 0 0 -1 0 0 cm\n");
}
snprintf(buf, 64, "%f 0 0 %f 0 0 cm\n", dim[i * 2], dim[i * 2 + 1]);
snprintf(buf, 64, "%d 0 0 %d 0 0 cm\n",
dim[i * 2], dim[i * 2 + 1]);
strcat(dictionary, buf);
snprintf(buf, 64, "/Im%d Do\n", i);
strcat(dictionary, buf);
}
strcat(dictionary, "Q");
if (i < ptr->image_length - 1)
strcat(dictionary, "\n");
}
free(dim);
}
if (strlen(dictionary) > 0) {
if (strdeflate(&stream, &stream_size, dictionary, strlen(dictionary)) != 0) {
free(root_kid);
free(ids);
free(dim);
free(dictionary);
return 1;
}
@ -1105,9 +859,6 @@ cnki_pdf_hn(cnki_t **param)
NULL, dictionary, stream, stream_size);
free(stream);
} else {
pdf_obj_append(&pdf, ids[ptr->image_length + 1],
NULL, NULL, NULL, 0);
}
memset(dictionary, 0, dictionary_size);
@ -1115,7 +866,10 @@ cnki_pdf_hn(cnki_t **param)
strcat(dictionary, "<<\n/Type /Page\n");
/* A4 paper */
strcat(dictionary, "/MediaBox [0 0 595.2756 841.8898]\n");
strcat(dictionary, "/MediaBox [0 0 595.276 841.89]\n");
if (ptr->image_length > 0) {
free(dim);
snprintf(buf, 64, "/Resources %d 0 R\n", ids[ptr->image_length]);
strcat(dictionary, buf);
@ -1127,9 +881,19 @@ cnki_pdf_hn(cnki_t **param)
pdf_obj_append(&pdf, ids[ptr->image_length + 2], NULL, dictionary, NULL, 0);
root_kid[cnt++] = ids[ptr->image_length + 2];
} else {
snprintf(buf, 64, "/Contents %d 0 R\n", ids[ptr->image_length]);
strcat(dictionary, buf);
/* Add /Parent when we know root */
pdf_obj_append(&pdf, ids[ptr->image_length + 1], NULL, dictionary, NULL, 0);
root_kid[cnt++] = ids[ptr->image_length + 1];
}
free(dictionary);
free(ids);
free(dictionary);
ptr = ptr->next;
}
@ -1173,17 +937,25 @@ cnki_pdf_hn(cnki_t **param)
int root = pdf_get_free_id(&pdf);
snprintf(buf, 64, "<<\n/Type /Pages\n/Kids [");
snprintf(buf, 64, "<<\n/Type /Pages\n/Kids ");
strcat(dictionary, buf);
if ((*param)->file_stat->page > 1)
strcat(dictionary, "[");
for (int i = 0; i < (*param)->file_stat->page; i++) {
snprintf(buf, 64, "%d 0 R", root_kid[i]);
strcat(dictionary, buf);
if (i < (*param)->file_stat->page - 1)
if (i + 1 < (*param)->file_stat->page)
strcat(dictionary, " ");
}
snprintf(buf, 64, "]\n/Count %d\n", (*param)->file_stat->page);
if ((*param)->file_stat->page > 1)
strcat(dictionary, "]");
strcat(dictionary, "\n");
snprintf(buf, 64, "/Count %d\n", (*param)->file_stat->page);
strcat(dictionary, buf);
strcat(dictionary, ">>");
@ -1192,6 +964,16 @@ cnki_pdf_hn(cnki_t **param)
free(dictionary);
dictionary_size = 256;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(root_kid);
return 1;
}
pdf_object_t *tmp = NULL;
/* Add /Parent to page object */
for (int i = 0; i < (*param)->file_stat->page; i++) {
if (pdf_get_obj(&pdf, root_kid[i], &tmp) != 0) {
@ -1200,16 +982,9 @@ cnki_pdf_hn(cnki_t **param)
return 1;
}
dictionary_size = tmp->dictionary_size + 24;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(root_kid);
return 1;
}
memset(dictionary, 0, dictionary_size);
memcpy(dictionary, tmp->dictionary, tmp->dictionary_size);
memset(dictionary + tmp->dictionary_size, 0, 24);
snprintf(buf, 64, "/Parent %d 0 R\n>>", root);
strcat(dictionary, buf);
@ -1219,20 +994,10 @@ cnki_pdf_hn(cnki_t **param)
free(root_kid);
return 1;
}
free(dictionary);
}
free(root_kid);
dictionary_size = 128;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(root_kid);
return 1;
}
memset(dictionary, 0, dictionary_size);
if ((*param)->stat > 0)

View file

@ -9,7 +9,7 @@
#include <stdlib.h>
#include <string.h>
static const uint16_t _LSZ[0x71] = {
static const uint16_t _LSZ[256] = {
0x5a1d,
0x2586, 0x1114, 0x080b, 0x03d8, 0x01da, 0x00e5, 0x006f, 0x0036,
0x001a, 0x000d, 0x0006, 0x0003, 0x0001, 0x5a7f, 0x3f25, 0x2cf2,
@ -28,7 +28,7 @@ static const uint16_t _LSZ[0x71] = {
0x5627, 0x50e7, 0x4b85, 0x5597, 0x504f, 0x5a10, 0x5522, 0x59eb
};
static const uint8_t _NLPS[0x71] = {
static const uint8_t _NLPS[256] = {
1,
14, 16, 18, 20, 23, 25, 28, 30,
33, 35, 9, 10, 12, 15, 36, 38,
@ -47,7 +47,7 @@ static const uint8_t _NLPS[0x71] = {
105, 108, 109, 110, 111, 110, 112, 112
};
static const uint8_t _NMPS[0x71] = {
static const uint8_t _NMPS[256] = {
1,
2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 13, 15, 16, 17,
@ -66,7 +66,7 @@ static const uint8_t _NMPS[0x71] = {
106, 107, 103, 109, 107, 111, 109, 111
};
static const bool _SWTCH[0x71] = {
static const bool _SWTCH[256] = {
1,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 0, 0,
@ -99,7 +99,7 @@ static int _height;
static int _width_padded;
static int _ret_pos;
static unsigned char *_ret;
static char *_ret;
static int _scd_size;
static unsigned char *_scd;
@ -108,7 +108,7 @@ static void
_bytein(void)
{
if (_ret_pos < _scd_size)
_reg_c += _scd[_ret_pos++] << 8;
_reg_c += *(_scd + _ret_pos++) << 8;
_ct = 8;
}
@ -209,36 +209,25 @@ _procline(int line, char *a, char *b, char *c)
/* The encoder must be erroneous */
uint16_t cx = 0;
if (line > 0) {
cx += (_ret[_width_padded * (_height - line)] & 0x20) << 2;
cx += _ret[_width_padded * (_height - line)] & 0x40;
cx += (_ret[_width_padded * (_height - line)] & 0x80) >> 2;
}
if (line > 1) {
cx += (_ret[_width_padded * (_height - line + 1)] & 0x40) >> 4;
cx += (_ret[_width_padded * (_height - line + 1)] & 0x80) >> 6;
}
for (int i = 0; i < _width; i++) {
_decode(cx);
cx >>= 1;
if (_pix == 1) {
_ret[_width_padded * (_height - line - 1) + i / 8] |= _pix << (7 - (i & 0x07));
c[i] = 1;
*(_ret + _width_padded * (_height - line - 1) + i / 8) |= _pix << (7 - (i & 0x07));
*(c + i) = 1;
cx |= 0x0200;
} else {
cx &= 0xfdff;
}
if (i + 2 < _width && a[i + 2] == 1)
if (i + 2 < _width && *(a + i + 2) == 1)
cx |= 0x0004;
else
cx &= 0xfffb;
if (i + 3 < _width && b[i + 3] == 1)
if (i + 3 < _width && *(b + i + 3) == 1)
cx |= 0x0080;
else
cx &= 0xff7f;
@ -304,7 +293,7 @@ strdec_jbig(char **bitmap, int width, int height,
memset(*bitmap, 0, _height * _width_padded);
_ret_pos = 0;
_ret = (unsigned char *) *bitmap;
_ret = *bitmap;
_scd_size = jbig_size;
_scd = (unsigned char *) jbig;

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, yzrh <yzrh@noema.org>
* Copyright (c) 2022, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
@ -31,6 +31,5 @@ strdec_jbig2(char **bitmap,
}
jbig2_release_page(ctx, image);
jbig2_ctx_free(ctx);
return 0;
}

115
src/jp2.c
View file

@ -1,115 +0,0 @@
/*
* Copyright (c) 2022, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <string.h>
#include <openjpeg.h>
typedef struct _stream_user_data {
OPJ_SIZE_T pos;
OPJ_SIZE_T size;
const unsigned char *data;
} stream_user_data;
static OPJ_SIZE_T
_opj_stream_read(void *p_buffer, OPJ_SIZE_T p_nb_bytes, void *p_user_data)
{
stream_user_data *d = (stream_user_data *) p_user_data;
if (d->pos >= d->size)
return (OPJ_SIZE_T) - 1;
OPJ_SIZE_T ret_size = p_nb_bytes;
if (d->pos + ret_size > d->size)
ret_size = d->size - d->pos;
memcpy(p_buffer, d->data + d->pos, ret_size);
d->pos += ret_size;
return ret_size;
}
static OPJ_OFF_T
_opj_stream_skip(OPJ_OFF_T p_nb_bytes, void *p_user_data)
{
stream_user_data *d = (stream_user_data *) p_user_data;
if (d->pos + p_nb_bytes <= d->size)
d->pos += p_nb_bytes;
else
d->pos = d->size;
return d->pos;
}
static OPJ_BOOL
_opj_stream_seek(OPJ_OFF_T p_nb_bytes, void *p_user_data)
{
stream_user_data *d = (stream_user_data *) p_user_data;
if (p_nb_bytes <= (OPJ_OFF_T) d->size) {
d->pos = p_nb_bytes;
return OPJ_TRUE;
}
return OPJ_FALSE;
}
int
strinfo_jp2_dim(int *jp2_width, int *jp2_height,
const char * restrict data, int data_size)
{
opj_codec_t *codec;
opj_dparameters_t param;
opj_stream_t *stream;
opj_image_t *image;
stream_user_data d;
if (data_size < 2)
return 1;
opj_set_default_decoder_parameters(&param);
if ((unsigned char) data[0] == 0xff && (unsigned char) data[1] == 0x4f)
codec = opj_create_decompress(OPJ_CODEC_J2K);
else
codec = opj_create_decompress(OPJ_CODEC_JP2);
if (!opj_setup_decoder(codec, &param)) {
opj_destroy_codec(codec);
return 1;
}
stream = opj_stream_default_create(OPJ_TRUE);
d.pos = 0;
d.size = data_size;
d.data = (unsigned char *) data;
opj_stream_set_read_function(stream, _opj_stream_read);
opj_stream_set_skip_function(stream, _opj_stream_skip);
opj_stream_set_seek_function(stream, _opj_stream_seek);
opj_stream_set_user_data(stream, &d, NULL);
opj_stream_set_user_data_length(stream, data_size);
if (!opj_read_header(stream, codec, &image)) {
opj_destroy_codec(codec);
opj_stream_destroy(stream);
return 1;
}
opj_destroy_codec(codec);
opj_stream_destroy(stream);
*jp2_width = image->x1 - image->x0;
*jp2_height = image->y1 - image->y0;
opj_image_destroy(image);
return 0;
}

View file

@ -1,8 +0,0 @@
/*
* Copyright (c) 2022, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
int strinfo_jp2_dim(int *jp2_width, int *jp2_height,
const char * restrict data, int data_size);

View file

@ -1,24 +0,0 @@
/*
* Copyright (c) 2023, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <stdlib.h>
#include <openssl/md5.h>
int
strmd5(unsigned char **dst, int *dst_size,
const unsigned char * restrict src, int src_size)
{
*dst_size = MD5_DIGEST_LENGTH;
*dst = malloc(*dst_size);
if (*dst == NULL)
return 1;
MD5(src, src_size, *dst);
return 0;
}

View file

@ -1,9 +0,0 @@
/*
* Copyright (c) 2023, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
int
strmd5(unsigned char **dst, int *dst_size,
const unsigned char * restrict src, int src_size);

View file

@ -89,7 +89,7 @@ pdf_get_free_id(pdf_object_t **pdf)
int id = 0;
for (int i = 1; i < 100000000; i++) {
for (int i = 1; i < 99999999; i++) {
ptr = (*pdf)->next;
while (ptr != NULL) {
if (ptr->id == i) {
@ -123,7 +123,7 @@ pdf_get_free_ids(pdf_object_t **pdf, int **ids, int count)
int id = 0;
pdf_object_t *ptr;
for (int i = 1; i < 100000000; i++) {
for (int i = 1; i < 99999999; i++) {
ptr = (*pdf)->next;
while (ptr != NULL) {
if (ptr->id == i) {

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, yzrh <yzrh@noema.org>
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
@ -19,37 +19,28 @@ static void *
_memmem_whitespace(const void *p0, size_t s0, const void *p1, size_t s1)
{
const char whitespace[6] = {
0x00,
0x09,
0x0a,
0x0c,
0x0d,
0x20
'\r',
'\n',
'\f',
'\t',
'\0',
' '
};
char *ret = NULL;
char tmp[s1 + 1];
memcpy(tmp, p1, s1);
char str[s1 + 1];
memcpy(str, p1, s1);
size_t tmp_size = 0;
char *tmp;
char *ret;
for (int i = 0; i < 6; i++) {
str[s1] = whitespace[i];
if ((tmp = memmem(p0, s0, str, s1 + 1)) == NULL)
continue;
if (tmp_size == 0 || (size_t) (tmp - (char *) p0) < tmp_size) {
tmp_size = tmp - (char *) p0;
ret = tmp;
}
}
tmp[s1] = whitespace[i];
if((ret = memmem(p0, s0, tmp, s1 + 1)) != NULL)
return ret;
}
return NULL;
}
static int
_locate(pdf_object_t **pdf, FILE **fp, int size_buf)
{
@ -66,45 +57,23 @@ _locate(pdf_object_t **pdf, FILE **fp, int size_buf)
end = ftell(*fp);
fseek(*fp, cur, SEEK_SET);
long head = 0;
long tail = 0;
int head = 0;
int tail = 0;
char *pos;
char *tmp;
for (;;) {
if (cur + size_buf < end) {
fread(buf, size_buf, 1, *fp);
} else {
fread(buf, end - cur, 1, *fp);
memset(buf + end - cur, 0, size_buf - end + cur);
}
if (head == 0) {
/* Hack needed for invalid object */
pos = _memmem_whitespace(buf, size_buf, " 0 obj", 6);
tmp = memmem(buf, size_buf, " 0 obj", 6);
while (tmp != NULL && tmp[6] != 0x3c && tmp[6] != 0x5b)
tmp = memmem(tmp + 6, size_buf - (tmp - buf) - 6, " 0 obj", 6);
if (pos != NULL && tmp != NULL) {
if (pos - buf < tmp - buf)
if (head == 0 && (pos = _memmem_whitespace(buf, size_buf, " 0 obj", 6)) != NULL)
head = cur + (pos - buf) + 7;
else
head = cur + (tmp - buf) + 6;
} else if (pos != NULL) {
head = cur + (pos - buf) + 7;
} else if (tmp != NULL) {
head = cur + (tmp - buf) + 6;
}
}
if (tail == 0 && (pos = _memmem_whitespace(buf, size_buf, "endobj", 6)) != NULL) {
/* We need to check if it is the object stored in stream */
while (memcmp(pos + 7,
"\r\nendstream", 11) == 0 &&
(tmp = _memmem_whitespace(pos + 7,
size_buf - (pos - buf) - 7,
(tmp = _memmem_whitespace(pos + 6,
size_buf - (pos - buf) - 6,
"endobj", 6)) != NULL)
pos = tmp;
@ -133,17 +102,13 @@ _locate(pdf_object_t **pdf, FILE **fp, int size_buf)
ptr->address = head;
ptr->size = tail - head;
fseek(*fp, tail + 7, SEEK_SET);
fseek(*fp, tail + 6, SEEK_SET);
head = tail = 0;
} else if (head > 0 && tail > 0) {
if (cur + size_buf < end)
fseek(*fp, head, SEEK_SET);
tail = 0;
} else {
fseek(*fp, -7, SEEK_CUR);
fseek(*fp, -6, SEEK_CUR);
}
if ((cur = ftell(*fp)) + 7 >= end)
if ((cur = ftell(*fp)) + 6 >= end)
break;
}
@ -161,7 +126,6 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
pdf_object_t *ptr = (*pdf)->next;
char str[8];
char *buf;
char *head;
char *tail;
@ -173,86 +137,34 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
if (buf == NULL)
return 1;
fseek(*fp, ptr->address, SEEK_SET);
fread(buf, ptr->size, 1, *fp);
memset(buf, 0, ptr->size);
/* Handle incomplete object */
head = buf;
while ((tmp = _memmem_whitespace(head,
ptr->size - (head - buf),
" 0 obj", 6)) != NULL)
head = tmp + 7;
/* Hack needed for invalid object */
while ((tmp = memmem(head,
ptr->size - (head - buf),
" 0 obj", 6)) != NULL)
head = tmp + 6;
if (head - buf > 0) {
ptr->address += head - buf;
ptr->size -= head - buf;
tmp = realloc(buf, ptr->size);
if (tmp == NULL)
return 1;
buf = tmp;
fseek(*fp, ptr->address, SEEK_SET);
fread(buf, ptr->size, 1, *fp);
}
/* Hack needed for invalid object */
fseek(*fp, ptr->address - 14, SEEK_SET);
fread(str, 8, 1, *fp);
if (str[7] < '0' || str[7] > '9') {
fseek(*fp, ptr->address - 15, SEEK_SET);
fread(str, 8, 1, *fp);
}
for (int i = 7; i >= 0; i--) {
if (str[i] < '0' || str[i] > '9') {
if (i < 7)
ptr->id = atoi(str + i + 1);
else
ptr->id = 0;
fseek(*fp, ptr->address - 12, SEEK_SET);
fread(buf, 8, 1, *fp);
for (int i = 0; i < 8; i++) {
if (buf[i] >= '0' && buf[i] <= '9') {
ptr->id = atoi(buf + i);
break;
}
}
if ((head = memmem(buf, ptr->size, "<<", 2)) != NULL &&
((tail = _memmem_whitespace(buf, ptr->size, ">>", 2)) != NULL ||
/* Hack needed for invalid object */
(tail = memmem(buf, ptr->size, ">>", 2)) != NULL)) {
if (memmem(buf, tail - buf, "stream\r\n", 8) != NULL) {
tail = memmem(buf, ptr->size, ">>", 2);
fseek(*fp, ptr->address, SEEK_SET);
fread(buf, ptr->size, 1, *fp);
while (ptr->size - (tail - buf) > 2 &&
(tmp = memmem(tail + 2,
ptr->size - (tail - buf) - 2,
">>", 2)) != NULL &&
memmem(tail + 2,
(tmp - tail) - 2,
"stream\r\n", 8) == NULL)
tail = tmp;
} else {
if ((head = memmem(buf, ptr->size, "<<", 2)) != NULL &&
(tail = _memmem_whitespace(buf, ptr->size, ">>", 2)) != NULL) {
/*
* A dictionary object may have nested dictionary,
* but it should not be in a stream
*/
while (ptr->size - (tail - buf) > 3 &&
(tmp = _memmem_whitespace(tail + 3,
ptr->size - (tail - buf) - 3,
while ((tmp = _memmem_whitespace(tail + 2,
ptr->size - (tail - buf) - 2,
">>", 2)) != NULL &&
memmem(tail + 3,
(tmp - tail) - 3,
memmem(tail + 2,
ptr->size - (tail - buf) - 2,
"stream\r\n", 8) == NULL)
tail = tmp;
}
ptr->dictionary_size = tail - head + 2;
ptr->dictionary = malloc(ptr->dictionary_size + 1);
@ -260,8 +172,8 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
if (ptr->dictionary == NULL)
return 1;
memset(ptr->dictionary, 0, ptr->dictionary_size + 1);
memcpy(ptr->dictionary, head, ptr->dictionary_size);
memset(ptr->dictionary + ptr->dictionary_size, 0, 1);
if ((head = memmem(tail,
ptr->size - (tail - buf),
@ -274,11 +186,11 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
* contains another object that
* contains another stream
*/
while (_memmem_whitespace(tail + 10,
ptr->size - (tail - buf) - 10,
while (_memmem_whitespace(tail,
ptr->size - (tail - buf),
"endobj", 6) != NULL &&
(tmp = _memmem_whitespace(tail + 10,
ptr->size - (tail - buf) - 10,
(tmp = _memmem_whitespace(tail + 9,
ptr->size - (tail - buf) - 9,
"endstream", 9)) != NULL)
tail = tmp;
@ -290,13 +202,19 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
memcpy(ptr->stream, head + 8, ptr->stream_size);
}
free(buf);
} else {
ptr->object_size = ptr->size;
ptr->object = buf;
ptr->object = malloc(ptr->object_size + 1);
if (ptr->object == NULL)
return 1;
memset(ptr->object, 0, ptr->object_size + 1);
memcpy(ptr->object, buf, ptr->object_size);
}
free(buf);
ptr = ptr->next;
}

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, yzrh <yzrh@noema.org>
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
@ -8,32 +8,14 @@
#include <string.h>
#include <time.h>
#include "version.h"
#include "md5.h"
#include <openssl/md5.h>
#include "pdf.h"
static int
_info_obj(pdf_object_t **pdf)
{
char dictionary[128] = "<<\n"
"/Producer (Melon " VERSION "." RELEASE "." PATCH EXTRA ")\n"
"/CreationDate (D:";
char buf[64];
time_t timestamp = time(NULL);
strftime(buf, 64, "%Y%m%d%H%M%S", gmtime(&timestamp));
strcat(dictionary, buf);
strcat(dictionary, "+00'00')\n>>");
return pdf_obj_append(pdf, 0, NULL, dictionary, NULL, 0);
}
int
pdf_dump_obj(pdf_object_t **pdf, FILE **fp)
{
if (*pdf == NULL || *fp == NULL || _info_obj(pdf) != 0)
if (*pdf == NULL || *fp == NULL)
return 1;
long cur;
@ -162,28 +144,35 @@ pdf_dump_trailer(pdf_object_t **pdf, FILE **fp, int xref)
buf_size = snprintf(buf, 64, "%lx%x", timestamp, size);
#endif
int fid_size;
unsigned char *fid;
unsigned char str[64];
memcpy(str, buf, 64);
if (strmd5(&fid, &fid_size, (unsigned char *) buf, buf_size) != 0)
return 1;
unsigned char fid[MD5_DIGEST_LENGTH];
MD5(str, buf_size, fid);
pdf_object_t *ptr = *pdf;
while (ptr->next != NULL)
ptr = ptr->next;
/*
* TODO: Document information dictionary
* `"/Producer (Melon)"'
* `"/CreationDate (D:YYYYMMDDHHmmSS+00'00')"'
*
* Trailer dictionary
* `"/Info %d 0 R"'
*/
fprintf(*fp,
"/Size %d\n/Root %d 0 R\n/Info %d 0 R\n",
"/Size %d\n/Root %d 0 R\n",
ptr->id + 1,
pdf_get_catalog_id(pdf),
ptr->id);
pdf_get_catalog_id(pdf));
fputs("/ID [", *fp);
for (int i = 0; i < 2; i++) {
fputs("<", *fp);
for (int j = 0; j < fid_size; j++)
for (int j = 0; j < MD5_DIGEST_LENGTH; j++)
fprintf(*fp, "%02x", fid[j]);
fputs(">", *fp);
@ -202,7 +191,5 @@ pdf_dump_trailer(pdf_object_t **pdf, FILE **fp, int xref)
fputs("%%EOF\n", *fp);
free(fid);
return 0;
}

View file

@ -1,10 +1,10 @@
/*
* Copyright (c) 2020-2023, yzrh <yzrh@noema.org>
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
#define VERSION "0"
#define RELEASE "3"
#define PATCH "0"
#define RELEASE "2"
#define PATCH "1"
#define EXTRA ""