Compare commits
31 commits
Author | SHA1 | Date | |
---|---|---|---|
2fa2b760ae | |||
dd5854678c | |||
123d62141c | |||
283446dba5 | |||
13cb0a1b8d | |||
a7ecc15614 | |||
56ffe14d5a | |||
c2afbb3cbc | |||
8cd8a8fbba | |||
8276423eb8 | |||
7ac0971a17 | |||
e0fe937e1a | |||
4a02b8bfc7 | |||
7d9d658461 | |||
000405693e | |||
d6fa934b5f | |||
1a1fee1034 | |||
cde014cffb | |||
9019a18449 | |||
a18de8f2ef | |||
70e1e7ea97 | |||
bffb8ce8a4 | |||
3ac51d66b9 | |||
0bbf8e65dd | |||
220a81c2ad | |||
1d899d934d | |||
226f16ddf4 | |||
9646ee61c3 | |||
5466a441df | |||
1ce3f89574 | |||
5a1afb0056 |
18 changed files with 613 additions and 348 deletions
24
CHANGE.md
24
CHANGE.md
|
@ -1,8 +1,30 @@
|
||||||
0.3.0 (2023-XX-XX)
|
0.3.0 (2023-XX-XX)
|
||||||
==================
|
==================
|
||||||
|
|
||||||
* Support HN figure placement.
|
|
||||||
* Support HN text overlay.
|
* Support HN text overlay.
|
||||||
|
* Support HN page with text.
|
||||||
|
* Handle inaccurate page count in CAJ and KDH.
|
||||||
|
|
||||||
|
0.2.5 (2023-01-05)
|
||||||
|
==================
|
||||||
|
|
||||||
|
* Improve PDF parser.
|
||||||
|
* Handle duplicated object in CAJ.
|
||||||
|
* Handle duplicated image in HN.
|
||||||
|
* Handle incomplete PDF object in CAJ and KDH.
|
||||||
|
* Handle invalid PDF object token in CAJ and KDH.
|
||||||
|
* Fix JBIG decoder.
|
||||||
|
|
||||||
|
0.2.4 (2022-12-31)
|
||||||
|
==================
|
||||||
|
|
||||||
|
* Fix HN image compositing.
|
||||||
|
* Fix PDF object check.
|
||||||
|
|
||||||
|
0.2.3 (2022-12-30)
|
||||||
|
==================
|
||||||
|
|
||||||
|
* Support HN figure placement.
|
||||||
|
|
||||||
0.2.2 (2022-12-29)
|
0.2.2 (2022-12-29)
|
||||||
==================
|
==================
|
||||||
|
|
12
README.md
12
README.md
|
@ -12,12 +12,12 @@ any failures with a sample that can reproduce the behaviour.
|
||||||
Dependency
|
Dependency
|
||||||
----------
|
----------
|
||||||
|
|
||||||
1. OpenSSL
|
1. libcrypto (OpenSSL)
|
||||||
2. libiconv
|
2. zlib
|
||||||
3. zlib
|
3. jbig2dec
|
||||||
4. jbig2dec
|
4. libjpeg-turbo
|
||||||
5. libjpeg-turbo
|
5. openjpeg
|
||||||
6. openjpeg
|
6. pkgconf
|
||||||
|
|
||||||
Usage
|
Usage
|
||||||
=====
|
=====
|
||||||
|
|
17
src/Makefile
17
src/Makefile
|
@ -4,23 +4,28 @@
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
#
|
#
|
||||||
|
|
||||||
src = melon.c iconv.c zlib.c jbig.c jbig2.c jpeg.c jp2.c \
|
src = melon.c iconv.c zlib.c jbig2.c jpeg.c jp2.c md5.c \
|
||||||
cnki_caj.c cnki_hn.c cnki_kdh.c cnki_outline_tree.c \
|
cnki_caj.c cnki_hn.c cnki_kdh.c cnki_outline_tree.c cnki_pdf.c \
|
||||||
cnki_pdf.c cnki_zlib.c cnki_jbig.c cnki_jbig2.c cnki.c \
|
cnki_zlib.c cnki_jbig.c cnki_jbig_dec.c cnki_jbig2.c cnki.c \
|
||||||
pdf_cnki.c pdf_get.c pdf_parser.c pdf_writer.c pdf.c
|
pdf_cnki.c pdf_get.c pdf_parser.c pdf_writer.c pdf.c
|
||||||
inc = extern.h version.h iconv.h zlib.h jbig.h jbig2.h jpeg.h jp2.h \
|
inc = extern.h version.h iconv.h zlib.h jbig2.h jpeg.h jp2.h md5.h \
|
||||||
cnki.h pdf_cnki.h cnki_jbig.h pdf.h
|
cnki.h pdf_cnki.h cnki_jbig.h cnki_jbig_dec.h pdf.h
|
||||||
|
|
||||||
obj = ${src:.c=.o}
|
obj = ${src:.c=.o}
|
||||||
|
|
||||||
PREFIX = /usr/local
|
PREFIX = /usr/local
|
||||||
|
|
||||||
CFLAGS = -O2 -pipe -flto -Wall -Wextra
|
CFLAGS = -O2 -pipe -flto -Wall -Wextra
|
||||||
LDFLAGS = -Wl,-O2 -lcrypto -liconv -lz -ljbig2dec -ljpeg -lopenjp2 -Wl,--as-needed
|
LDFLAGS = -Wl,-O2 -lcrypto -lz -ljbig2dec -ljpeg -lopenjp2 -Wl,--as-needed
|
||||||
|
|
||||||
CFLAGS += -I/usr/local/include
|
CFLAGS += -I/usr/local/include
|
||||||
LDFLAGS += -L/usr/local/lib
|
LDFLAGS += -L/usr/local/lib
|
||||||
|
|
||||||
|
OPENJPEG_CFLAGS != pkgconf --cflags libopenjp2
|
||||||
|
|
||||||
|
CFLAGS += ${OPENJPEG_CFLAGS}
|
||||||
|
CFLAGS += -DLIBICONV_PLUG
|
||||||
|
|
||||||
all: ${obj} ${inc}
|
all: ${obj} ${inc}
|
||||||
${CC} ${LDFLAGS} -o melon ${obj}
|
${CC} ${LDFLAGS} -o melon ${obj}
|
||||||
|
|
||||||
|
|
|
@ -58,10 +58,10 @@ typedef struct _hn_image_t {
|
||||||
int32_t format; /* hn_code */
|
int32_t format; /* hn_code */
|
||||||
int32_t address;
|
int32_t address;
|
||||||
int32_t size;
|
int32_t size;
|
||||||
int16_t x;
|
uint16_t x;
|
||||||
int16_t y;
|
uint16_t y;
|
||||||
int16_t w;
|
uint16_t w;
|
||||||
int16_t h;
|
uint16_t h;
|
||||||
char *image;
|
char *image;
|
||||||
} hn_image_t;
|
} hn_image_t;
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
#include "cnki_jbig.h"
|
#include "cnki_jbig.h"
|
||||||
#include "jbig.h"
|
#include "cnki_jbig_dec.h"
|
||||||
|
|
||||||
int
|
int
|
||||||
cnki_jbig(char **bitmap, int *bitmap_size,
|
cnki_jbig(char **bitmap, int *bitmap_size,
|
||||||
|
|
|
@ -27,8 +27,8 @@ typedef struct _dib_t {
|
||||||
uint16_t depth;
|
uint16_t depth;
|
||||||
uint32_t compression; /* dib_compression_code */
|
uint32_t compression; /* dib_compression_code */
|
||||||
uint32_t size;
|
uint32_t size;
|
||||||
uint32_t resolution_h;
|
int32_t resolution_h;
|
||||||
uint32_t resolution_v;
|
int32_t resolution_v;
|
||||||
uint32_t colour;
|
uint32_t colour;
|
||||||
uint32_t colour_used;
|
uint32_t colour_used;
|
||||||
} dib_t;
|
} dib_t;
|
||||||
|
|
|
@ -9,7 +9,7 @@
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
static const uint16_t _LSZ[256] = {
|
static const uint16_t _LSZ[0x71] = {
|
||||||
0x5a1d,
|
0x5a1d,
|
||||||
0x2586, 0x1114, 0x080b, 0x03d8, 0x01da, 0x00e5, 0x006f, 0x0036,
|
0x2586, 0x1114, 0x080b, 0x03d8, 0x01da, 0x00e5, 0x006f, 0x0036,
|
||||||
0x001a, 0x000d, 0x0006, 0x0003, 0x0001, 0x5a7f, 0x3f25, 0x2cf2,
|
0x001a, 0x000d, 0x0006, 0x0003, 0x0001, 0x5a7f, 0x3f25, 0x2cf2,
|
||||||
|
@ -28,7 +28,7 @@ static const uint16_t _LSZ[256] = {
|
||||||
0x5627, 0x50e7, 0x4b85, 0x5597, 0x504f, 0x5a10, 0x5522, 0x59eb
|
0x5627, 0x50e7, 0x4b85, 0x5597, 0x504f, 0x5a10, 0x5522, 0x59eb
|
||||||
};
|
};
|
||||||
|
|
||||||
static const uint8_t _NLPS[256] = {
|
static const uint8_t _NLPS[0x71] = {
|
||||||
1,
|
1,
|
||||||
14, 16, 18, 20, 23, 25, 28, 30,
|
14, 16, 18, 20, 23, 25, 28, 30,
|
||||||
33, 35, 9, 10, 12, 15, 36, 38,
|
33, 35, 9, 10, 12, 15, 36, 38,
|
||||||
|
@ -47,7 +47,7 @@ static const uint8_t _NLPS[256] = {
|
||||||
105, 108, 109, 110, 111, 110, 112, 112
|
105, 108, 109, 110, 111, 110, 112, 112
|
||||||
};
|
};
|
||||||
|
|
||||||
static const uint8_t _NMPS[256] = {
|
static const uint8_t _NMPS[0x71] = {
|
||||||
1,
|
1,
|
||||||
2, 3, 4, 5, 6, 7, 8, 9,
|
2, 3, 4, 5, 6, 7, 8, 9,
|
||||||
10, 11, 12, 13, 13, 15, 16, 17,
|
10, 11, 12, 13, 13, 15, 16, 17,
|
||||||
|
@ -66,7 +66,7 @@ static const uint8_t _NMPS[256] = {
|
||||||
106, 107, 103, 109, 107, 111, 109, 111
|
106, 107, 103, 109, 107, 111, 109, 111
|
||||||
};
|
};
|
||||||
|
|
||||||
static const bool _SWTCH[256] = {
|
static const bool _SWTCH[0x71] = {
|
||||||
1,
|
1,
|
||||||
0, 0, 0, 0, 0, 0, 0, 0,
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
0, 0, 0, 0, 0, 1, 0, 0,
|
0, 0, 0, 0, 0, 1, 0, 0,
|
||||||
|
@ -99,7 +99,7 @@ static int _height;
|
||||||
static int _width_padded;
|
static int _width_padded;
|
||||||
|
|
||||||
static int _ret_pos;
|
static int _ret_pos;
|
||||||
static char *_ret;
|
static unsigned char *_ret;
|
||||||
|
|
||||||
static int _scd_size;
|
static int _scd_size;
|
||||||
static unsigned char *_scd;
|
static unsigned char *_scd;
|
||||||
|
@ -108,7 +108,7 @@ static void
|
||||||
_bytein(void)
|
_bytein(void)
|
||||||
{
|
{
|
||||||
if (_ret_pos < _scd_size)
|
if (_ret_pos < _scd_size)
|
||||||
_reg_c += *(_scd + _ret_pos++) << 8;
|
_reg_c += _scd[_ret_pos++] << 8;
|
||||||
|
|
||||||
_ct = 8;
|
_ct = 8;
|
||||||
}
|
}
|
||||||
|
@ -209,25 +209,36 @@ _procline(int line, char *a, char *b, char *c)
|
||||||
/* The encoder must be erroneous */
|
/* The encoder must be erroneous */
|
||||||
uint16_t cx = 0;
|
uint16_t cx = 0;
|
||||||
|
|
||||||
|
if (line > 0) {
|
||||||
|
cx += (_ret[_width_padded * (_height - line)] & 0x20) << 2;
|
||||||
|
cx += _ret[_width_padded * (_height - line)] & 0x40;
|
||||||
|
cx += (_ret[_width_padded * (_height - line)] & 0x80) >> 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (line > 1) {
|
||||||
|
cx += (_ret[_width_padded * (_height - line + 1)] & 0x40) >> 4;
|
||||||
|
cx += (_ret[_width_padded * (_height - line + 1)] & 0x80) >> 6;
|
||||||
|
}
|
||||||
|
|
||||||
for (int i = 0; i < _width; i++) {
|
for (int i = 0; i < _width; i++) {
|
||||||
_decode(cx);
|
_decode(cx);
|
||||||
|
|
||||||
cx >>= 1;
|
cx >>= 1;
|
||||||
|
|
||||||
if (_pix == 1) {
|
if (_pix == 1) {
|
||||||
*(_ret + _width_padded * (_height - line - 1) + i / 8) |= _pix << (7 - (i & 0x07));
|
_ret[_width_padded * (_height - line - 1) + i / 8] |= _pix << (7 - (i & 0x07));
|
||||||
*(c + i) = 1;
|
c[i] = 1;
|
||||||
cx |= 0x0200;
|
cx |= 0x0200;
|
||||||
} else {
|
} else {
|
||||||
cx &= 0xfdff;
|
cx &= 0xfdff;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (i + 2 < _width && *(a + i + 2) == 1)
|
if (i + 2 < _width && a[i + 2] == 1)
|
||||||
cx |= 0x0004;
|
cx |= 0x0004;
|
||||||
else
|
else
|
||||||
cx &= 0xfffb;
|
cx &= 0xfffb;
|
||||||
|
|
||||||
if (i + 3 < _width && *(b + i + 3) == 1)
|
if (i + 3 < _width && b[i + 3] == 1)
|
||||||
cx |= 0x0080;
|
cx |= 0x0080;
|
||||||
else
|
else
|
||||||
cx &= 0xff7f;
|
cx &= 0xff7f;
|
||||||
|
@ -293,7 +304,7 @@ strdec_jbig(char **bitmap, int width, int height,
|
||||||
memset(*bitmap, 0, _height * _width_padded);
|
memset(*bitmap, 0, _height * _width_padded);
|
||||||
|
|
||||||
_ret_pos = 0;
|
_ret_pos = 0;
|
||||||
_ret = *bitmap;
|
_ret = (unsigned char *) *bitmap;
|
||||||
|
|
||||||
_scd_size = jbig_size;
|
_scd_size = jbig_size;
|
||||||
_scd = (unsigned char *) jbig;
|
_scd = (unsigned char *) jbig;
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
* Copyright (c) 2020-2023, yzrh <yzrh@noema.org>
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
*/
|
*/
|
||||||
|
@ -15,16 +15,18 @@ cnki_kdh(cnki_t **param)
|
||||||
if ((*param)->stat > 0)
|
if ((*param)->stat > 0)
|
||||||
printf("Begin 'KDH' decryption\n");
|
printf("Begin 'KDH' decryption\n");
|
||||||
|
|
||||||
|
long cur = ADDRESS_KDH_BODY;
|
||||||
|
long end;
|
||||||
|
|
||||||
fseek((*param)->fp_i, 0, SEEK_END);
|
fseek((*param)->fp_i, 0, SEEK_END);
|
||||||
|
end = ftell((*param)->fp_i);
|
||||||
long size = ftell((*param)->fp_i);
|
fseek((*param)->fp_i, cur, SEEK_SET);
|
||||||
|
|
||||||
fseek((*param)->fp_i, ADDRESS_KDH_BODY, SEEK_SET);
|
|
||||||
|
|
||||||
const char key[] = KEY_KDH;
|
const char key[] = KEY_KDH;
|
||||||
const int key_len = KEY_KDH_LENGTH;
|
const int key_len = KEY_KDH_LENGTH;
|
||||||
long key_cur = 0;
|
long key_cur = 0;
|
||||||
|
|
||||||
|
int buf_size;
|
||||||
char buf[(*param)->size_buf];
|
char buf[(*param)->size_buf];
|
||||||
|
|
||||||
FILE *tmp = tmpfile();
|
FILE *tmp = tmpfile();
|
||||||
|
@ -33,32 +35,32 @@ cnki_kdh(cnki_t **param)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
fread(buf, (*param)->size_buf, 1, (*param)->fp_i);
|
if (cur + (*param)->size_buf < end)
|
||||||
|
buf_size = (*param)->size_buf;
|
||||||
|
else
|
||||||
|
buf_size = end - cur;
|
||||||
|
|
||||||
for (int i = 0; i < (*param)->size_buf; i++) {
|
fread(buf, buf_size, 1, (*param)->fp_i);
|
||||||
buf[i] ^= key[key_cur % key_len];
|
|
||||||
key_cur++;
|
|
||||||
}
|
|
||||||
|
|
||||||
fwrite(buf, (*param)->size_buf, 1, tmp);
|
for (int i = 0; i < buf_size; i++)
|
||||||
|
buf[i] ^= key[key_cur++ % key_len];
|
||||||
|
|
||||||
if (ftell((*param)->fp_i) == size)
|
fwrite(buf, buf_size, 1, tmp);
|
||||||
|
|
||||||
|
if ((cur = ftell((*param)->fp_i)) >= end)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((*param)->stat > 0)
|
if ((*param)->stat > 0)
|
||||||
printf("Decrypted %ld byte(s)\n", ftell(tmp));
|
printf("Decrypted %ld byte(s)\n", ftell(tmp));
|
||||||
|
|
||||||
fseek(tmp, 0, SEEK_SET);
|
fclose((*param)->fp_i);
|
||||||
|
|
||||||
FILE *orig = (*param)->fp_i;
|
fseek(tmp, 0, SEEK_SET);
|
||||||
(*param)->fp_i = tmp;
|
(*param)->fp_i = tmp;
|
||||||
|
|
||||||
cnki_pdf(param);
|
cnki_pdf(param);
|
||||||
|
|
||||||
(*param)->fp_i = orig;
|
|
||||||
fclose(tmp);
|
|
||||||
|
|
||||||
if ((*param)->stat > 0)
|
if ((*param)->stat > 0)
|
||||||
printf("Conversion ended\n");
|
printf("Conversion ended\n");
|
||||||
|
|
||||||
|
|
527
src/cnki_pdf.c
527
src/cnki_pdf.c
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
* Copyright (c) 2020-2023, yzrh <yzrh@noema.org>
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
*/
|
*/
|
||||||
|
@ -145,11 +145,54 @@ _pdf_obj_sort(cnki_t **param, pdf_object_t **pdf)
|
||||||
|
|
||||||
ret = pdf_obj_sort(pdf);
|
ret = pdf_obj_sort(pdf);
|
||||||
|
|
||||||
|
if ((*param)->stat > 0)
|
||||||
|
printf("Sorted object(s)\n");
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
_pdf_obj_dedup(cnki_t **param, pdf_object_t **pdf)
|
||||||
|
{
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
pdf_object_t *tmp;
|
||||||
|
pdf_object_t *ptr;
|
||||||
|
|
||||||
|
if ((*param)->stat > 1)
|
||||||
|
printf("Deleting duplicated object\n");
|
||||||
|
|
||||||
|
ptr = *pdf;
|
||||||
|
while (ptr->next != NULL && ptr->next->next != NULL) {
|
||||||
|
if (ptr->next->id == ptr->next->next->id) {
|
||||||
|
/* Keep the bigger one, the smaller one is usually incomplete */
|
||||||
|
if (ptr->next->size < ptr->next->next->size) {
|
||||||
|
pdf_get_obj(&ptr, ptr->next->id, &tmp);
|
||||||
|
pdf_obj_del(&ptr, ptr->next->id);
|
||||||
|
} else {
|
||||||
|
pdf_get_obj(&ptr->next, ptr->next->id, &tmp);
|
||||||
|
pdf_obj_del(&ptr->next, ptr->next->id);
|
||||||
|
}
|
||||||
|
|
||||||
|
tmp->next = NULL;
|
||||||
|
pdf_obj_destroy(&tmp);
|
||||||
|
|
||||||
|
ret++;
|
||||||
|
|
||||||
|
if ((*param)->stat > 1)
|
||||||
|
printf("Deleted duplicated object %d.\n", ptr->next->id);
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
ptr = ptr->next;
|
||||||
|
}
|
||||||
|
|
||||||
if ((*param)->stat > 0) {
|
if ((*param)->stat > 0) {
|
||||||
if (ret == 0)
|
if (ret == 0)
|
||||||
printf("Sorted object(s)\n");
|
printf("No duplicated object\n");
|
||||||
else
|
else
|
||||||
printf("Object(s) not sorted\n");
|
printf("Deleted %d duplicated object(s)\n", ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -212,19 +255,23 @@ cnki_pdf(cnki_t **param)
|
||||||
int *parent = NULL;
|
int *parent = NULL;
|
||||||
pdf_get_parent_id(&pdf, &parent);
|
pdf_get_parent_id(&pdf, &parent);
|
||||||
|
|
||||||
if (parent[0] == 0)
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
if ((*param)->stat > 0)
|
if ((*param)->stat > 0)
|
||||||
printf("Discovered %d parent object(s)\n", parent[0]);
|
printf("Discovered %d parent object(s)\n", parent[0]);
|
||||||
|
|
||||||
int *parent_missing = malloc(parent[0] * sizeof(int));
|
pdf_obj_sort(&pdf);
|
||||||
|
|
||||||
if (parent_missing == NULL)
|
_pdf_obj_dedup(param, &pdf);
|
||||||
return 1;
|
|
||||||
|
|
||||||
|
int8_t *parent_missing;
|
||||||
int *kid;
|
int *kid;
|
||||||
|
|
||||||
|
if (parent[0] > 0) {
|
||||||
|
parent_missing = malloc(parent[0] * sizeof(int8_t));
|
||||||
|
|
||||||
|
if (parent_missing == NULL)
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
for (int i = 1; i <= parent[0]; i++) {
|
for (int i = 1; i <= parent[0]; i++) {
|
||||||
if ((*param)->stat > 1)
|
if ((*param)->stat > 1)
|
||||||
printf("Searching for object %d\n", parent[i]);
|
printf("Searching for object %d\n", parent[i]);
|
||||||
|
@ -266,7 +313,7 @@ cnki_pdf(cnki_t **param)
|
||||||
|
|
||||||
snprintf(buf, 64,
|
snprintf(buf, 64,
|
||||||
"]\n/Count %d\n>>",
|
"]\n/Count %d\n>>",
|
||||||
pdf_get_kid_count(&pdf, parent[i]));
|
pdf_get_kid_count(&pdf, parent[i]) > 0 ? pdf_get_kid_count(&pdf, parent[i]) : kid[0]);
|
||||||
strcat(dictionary, buf);
|
strcat(dictionary, buf);
|
||||||
|
|
||||||
pdf_obj_prepend(&pdf, parent[i], NULL, dictionary, NULL, 0);
|
pdf_obj_prepend(&pdf, parent[i], NULL, dictionary, NULL, 0);
|
||||||
|
@ -291,7 +338,7 @@ cnki_pdf(cnki_t **param)
|
||||||
if ((*param)->stat > 1)
|
if ((*param)->stat > 1)
|
||||||
printf("Searching for root object\n");
|
printf("Searching for root object\n");
|
||||||
|
|
||||||
dictionary_size = 128;
|
dictionary_size = 128 + 12 * parent[0];
|
||||||
dictionary = malloc(dictionary_size);
|
dictionary = malloc(dictionary_size);
|
||||||
|
|
||||||
if (dictionary == NULL) {
|
if (dictionary == NULL) {
|
||||||
|
@ -319,7 +366,7 @@ cnki_pdf(cnki_t **param)
|
||||||
} else {
|
} else {
|
||||||
for (int i = 0; i < parent[0]; i++)
|
for (int i = 0; i < parent[0]; i++)
|
||||||
if (parent_missing[i] == 1)
|
if (parent_missing[i] == 1)
|
||||||
root = i;
|
root = parent[i + 1];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (root == 0)
|
if (root == 0)
|
||||||
|
@ -328,7 +375,11 @@ cnki_pdf(cnki_t **param)
|
||||||
printf("Root object is %d.\n", root);
|
printf("Root object is %d.\n", root);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pdf_get_obj(&pdf, root, NULL) != 0) {
|
int root_gen;
|
||||||
|
|
||||||
|
pdf_object_t *tmp;
|
||||||
|
|
||||||
|
if ((root_gen = pdf_get_obj(&pdf, root, &tmp)) != 0) {
|
||||||
if ((*param)->stat > 0)
|
if ((*param)->stat > 0)
|
||||||
printf("Root object is missing\n");
|
printf("Root object is missing\n");
|
||||||
|
|
||||||
|
@ -336,14 +387,11 @@ cnki_pdf(cnki_t **param)
|
||||||
printf("Generating root object\n");
|
printf("Generating root object\n");
|
||||||
|
|
||||||
snprintf(buf, 64,
|
snprintf(buf, 64,
|
||||||
"<<\n/Type /Pages\n/Kids ");
|
"<<\n/Type /Pages\n/Kids [");
|
||||||
strcat(dictionary, buf);
|
strcat(dictionary, buf);
|
||||||
|
|
||||||
if (parent[0] > 1)
|
|
||||||
strcat(dictionary, "[");
|
|
||||||
|
|
||||||
for (int i = 0, j = 0; i < parent[0]; i++) {
|
for (int i = 0, j = 0; i < parent[0]; i++) {
|
||||||
if (parent_missing[i]) {
|
if (parent_missing[i] == 1) {
|
||||||
snprintf(buf, 64, "%d 0 R", parent[i + 1]);
|
snprintf(buf, 64, "%d 0 R", parent[i + 1]);
|
||||||
strcat(dictionary, buf);
|
strcat(dictionary, buf);
|
||||||
|
|
||||||
|
@ -352,12 +400,7 @@ cnki_pdf(cnki_t **param)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (parent[0] > 1)
|
snprintf(buf, 64, "]\n/Count %d\n", (*param)->file_stat->page);
|
||||||
strcat(dictionary, "]");
|
|
||||||
|
|
||||||
strcat(dictionary, "\n");
|
|
||||||
|
|
||||||
snprintf(buf, 64, "/Count %d\n", (*param)->file_stat->page);
|
|
||||||
strcat(dictionary, buf);
|
strcat(dictionary, buf);
|
||||||
|
|
||||||
strcat(dictionary, ">>");
|
strcat(dictionary, ">>");
|
||||||
|
@ -371,11 +414,27 @@ cnki_pdf(cnki_t **param)
|
||||||
root);
|
root);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (parent[0] > 0)
|
||||||
|
free(parent_missing);
|
||||||
|
|
||||||
free(parent);
|
free(parent);
|
||||||
free(parent_missing);
|
|
||||||
|
|
||||||
int outline = _pdf_cnki_outline(param, &pdf);
|
int outline = _pdf_cnki_outline(param, &pdf);
|
||||||
|
|
||||||
|
snprintf(buf, 64,
|
||||||
|
"<<\n/Type /Catalog\n/Pages %d 0 R\n",
|
||||||
|
root);
|
||||||
|
strcat(dictionary, buf);
|
||||||
|
|
||||||
|
if (outline != -1) {
|
||||||
|
snprintf(buf, 64,
|
||||||
|
"/Outlines %d 0 R\n/PageMode /UseOutlines\n",
|
||||||
|
outline);
|
||||||
|
strcat(dictionary, buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
strcat(dictionary, ">>");
|
||||||
|
|
||||||
if ((*param)->stat > 1)
|
if ((*param)->stat > 1)
|
||||||
printf("Searching for catalog object\n");
|
printf("Searching for catalog object\n");
|
||||||
|
|
||||||
|
@ -384,6 +443,16 @@ cnki_pdf(cnki_t **param)
|
||||||
if (catalog != 0) {
|
if (catalog != 0) {
|
||||||
if ((*param)->stat > 0)
|
if ((*param)->stat > 0)
|
||||||
printf("Catalog object is %d.\n", catalog);
|
printf("Catalog object is %d.\n", catalog);
|
||||||
|
|
||||||
|
if (root_gen != 0) {
|
||||||
|
if ((*param)->stat > 1)
|
||||||
|
printf("Replacing catalog object\n");
|
||||||
|
|
||||||
|
pdf_obj_replace(&pdf, catalog, NULL, dictionary, NULL, 0);
|
||||||
|
|
||||||
|
if ((*param)->stat > 0)
|
||||||
|
printf("Replaced catalog object\n");
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
if ((*param)->stat > 0)
|
if ((*param)->stat > 0)
|
||||||
printf("Catalog object is missing\n");
|
printf("Catalog object is missing\n");
|
||||||
|
@ -391,20 +460,6 @@ cnki_pdf(cnki_t **param)
|
||||||
if ((*param)->stat > 1)
|
if ((*param)->stat > 1)
|
||||||
printf("Generating catalog object\n");
|
printf("Generating catalog object\n");
|
||||||
|
|
||||||
snprintf(buf, 64,
|
|
||||||
"<<\n/Type /Catalog\n/Pages %d 0 R\n",
|
|
||||||
root);
|
|
||||||
strcat(dictionary, buf);
|
|
||||||
|
|
||||||
if (outline != -1) {
|
|
||||||
snprintf(buf, 64,
|
|
||||||
"/Outlines %d 0 R\n/PageMode /UseOutlines\n",
|
|
||||||
outline);
|
|
||||||
strcat(dictionary, buf);
|
|
||||||
}
|
|
||||||
|
|
||||||
strcat(dictionary, ">>");
|
|
||||||
|
|
||||||
pdf_obj_append(&pdf, 0, NULL, dictionary, NULL, 0);
|
pdf_obj_append(&pdf, 0, NULL, dictionary, NULL, 0);
|
||||||
|
|
||||||
if ((*param)->stat > 0)
|
if ((*param)->stat > 0)
|
||||||
|
@ -423,8 +478,6 @@ cnki_pdf(cnki_t **param)
|
||||||
if ((*param)->stat > 1)
|
if ((*param)->stat > 1)
|
||||||
printf("Deleting xref object\n");
|
printf("Deleting xref object\n");
|
||||||
|
|
||||||
pdf_object_t *tmp;
|
|
||||||
|
|
||||||
pdf_get_obj(&pdf, xref, &tmp);
|
pdf_get_obj(&pdf, xref, &tmp);
|
||||||
pdf_obj_del(&pdf, xref);
|
pdf_obj_del(&pdf, xref);
|
||||||
|
|
||||||
|
@ -460,6 +513,9 @@ cnki_pdf_hn(cnki_t **param)
|
||||||
if (pdf_obj_create(&pdf) != 0)
|
if (pdf_obj_create(&pdf) != 0)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
|
int font = pdf_get_free_id(&pdf);
|
||||||
|
pdf_obj_append(&pdf, font, NULL, "<<\n/Type /Font\n/Subtype /TrueType\n/BaseFont /NotoSansCJKSC\n>>", NULL, 0);
|
||||||
|
|
||||||
if ((*param)->stat > 1)
|
if ((*param)->stat > 1)
|
||||||
printf("Generating PDF object(s)\n");
|
printf("Generating PDF object(s)\n");
|
||||||
|
|
||||||
|
@ -468,27 +524,26 @@ cnki_pdf_hn(cnki_t **param)
|
||||||
|
|
||||||
char buf[64];
|
char buf[64];
|
||||||
|
|
||||||
|
pdf_object_t *tmp;
|
||||||
|
|
||||||
int cnt = 0;
|
int cnt = 0;
|
||||||
int *root_kid = malloc((*param)->file_stat->page * sizeof(int));
|
int *root_kid = malloc((*param)->file_stat->page * sizeof(int));
|
||||||
|
|
||||||
if (root_kid == NULL)
|
if (root_kid == NULL)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
memset(root_kid, 0, (*param)->file_stat->page);
|
memset(root_kid, 0, (*param)->file_stat->page * sizeof(int));
|
||||||
|
|
||||||
object_hn_t *ptr = (*param)->object_hn;
|
object_hn_t *ptr = (*param)->object_hn;
|
||||||
while (ptr != NULL) {
|
while (ptr != NULL) {
|
||||||
/*
|
/*
|
||||||
* External object (ptr->image_length) +
|
* External object (ptr->image_length) +
|
||||||
* content object +
|
|
||||||
* resource object +
|
* resource object +
|
||||||
|
* content object +
|
||||||
* page object
|
* page object
|
||||||
*/
|
*/
|
||||||
int *ids = NULL;
|
int *ids = NULL;
|
||||||
if (ptr->image_length > 0)
|
pdf_get_free_ids(&pdf, &ids, ptr->image_length + 3);
|
||||||
pdf_get_free_ids(&pdf, &ids, ptr->image_length + 3);
|
|
||||||
else
|
|
||||||
pdf_get_free_ids(&pdf, &ids, 2);
|
|
||||||
|
|
||||||
int bitmap_size;
|
int bitmap_size;
|
||||||
char *bitmap;
|
char *bitmap;
|
||||||
|
@ -496,10 +551,10 @@ cnki_pdf_hn(cnki_t **param)
|
||||||
int stream_size;
|
int stream_size;
|
||||||
char *stream;
|
char *stream;
|
||||||
|
|
||||||
int *dim;
|
double *dim;
|
||||||
|
|
||||||
if (ptr->image_length > 0) {
|
if (ptr->image_length > 0) {
|
||||||
dim = malloc(2 * ptr->image_length * sizeof(int));
|
dim = malloc(2 * ptr->image_length * sizeof(double));
|
||||||
|
|
||||||
if (dim == NULL) {
|
if (dim == NULL) {
|
||||||
free(root_kid);
|
free(root_kid);
|
||||||
|
@ -730,10 +785,30 @@ cnki_pdf_hn(cnki_t **param)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ptr->image_length > 0) {
|
if (ptr->image_length > 0)
|
||||||
memset(dictionary, 0, dictionary_size);
|
free(dictionary);
|
||||||
|
|
||||||
strcat(dictionary, "<<\n/XObject <<");
|
dictionary_size = 128 + 2 * ptr->text_size + 128 * ptr->image_length;
|
||||||
|
dictionary = malloc(dictionary_size);
|
||||||
|
|
||||||
|
if (dictionary == NULL) {
|
||||||
|
free(root_kid);
|
||||||
|
free(ids);
|
||||||
|
free(dim);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
memset(dictionary, 0, dictionary_size);
|
||||||
|
|
||||||
|
strcat(dictionary, "<<\n");
|
||||||
|
|
||||||
|
if (ptr->text_size > 0) {
|
||||||
|
snprintf(buf, 64, "/Font <</F0 %d 0 R>>\n", font);
|
||||||
|
strcat(dictionary, buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ptr->image_length > 0) {
|
||||||
|
strcat(dictionary, "/XObject <<");
|
||||||
|
|
||||||
for (int i = 0; i < ptr->image_length; i++) {
|
for (int i = 0; i < ptr->image_length; i++) {
|
||||||
snprintf(buf, 64, "/Im%d %d 0 R", i, ids[i]);
|
snprintf(buf, 64, "/Im%d %d 0 R", i, ids[i]);
|
||||||
|
@ -743,13 +818,15 @@ cnki_pdf_hn(cnki_t **param)
|
||||||
strcat(dictionary, " ");
|
strcat(dictionary, " ");
|
||||||
}
|
}
|
||||||
|
|
||||||
strcat(dictionary, ">>\n>>");
|
strcat(dictionary, ">>\n");
|
||||||
|
|
||||||
pdf_obj_append(&pdf, ids[ptr->image_length], NULL, dictionary, NULL, 0);
|
|
||||||
|
|
||||||
free(dictionary);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
strcat(dictionary, ">>");
|
||||||
|
|
||||||
|
pdf_obj_append(&pdf, ids[ptr->image_length], NULL, dictionary, NULL, 0);
|
||||||
|
|
||||||
|
memset(dictionary, 0, dictionary_size);
|
||||||
|
|
||||||
int conv_size;
|
int conv_size;
|
||||||
char *conv_dst;
|
char *conv_dst;
|
||||||
char conv_src[2];
|
char conv_src[2];
|
||||||
|
@ -766,104 +843,104 @@ cnki_pdf_hn(cnki_t **param)
|
||||||
ptr->text = stream;
|
ptr->text = stream;
|
||||||
}
|
}
|
||||||
|
|
||||||
dictionary_size = 64 + 2 * ptr->text_size;
|
|
||||||
dictionary = malloc(dictionary_size);
|
|
||||||
|
|
||||||
if (dictionary == NULL) {
|
|
||||||
free(root_kid);
|
|
||||||
free(ids);
|
|
||||||
free(dim);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
memset(dictionary, 0, dictionary_size);
|
|
||||||
|
|
||||||
strcat(dictionary, "BT\n");
|
strcat(dictionary, "BT\n");
|
||||||
|
|
||||||
|
strcat(dictionary, "/F0 10 Tf\n");
|
||||||
|
|
||||||
for (int i = 0, j = 0; i < ptr->text_size - 1;) {
|
for (int i = 0, j = 0; i < ptr->text_size - 1;) {
|
||||||
switch ((uint16_t) (ptr->text[i + 1] << 8 | ptr->text[i])) {
|
switch (((unsigned char) ptr->text[i + 1] << 8) + (unsigned char) ptr->text[i]) {
|
||||||
case 0x8001:
|
case 0x8001:
|
||||||
if (ptr->address_next <= ptr->address) {
|
if (ptr->address_next <= ptr->address) {
|
||||||
i += 2;
|
if (i + 7 >= ptr->text_size) {
|
||||||
|
i += 2;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
conv_src[0] = ptr->text[i + 7];
|
||||||
|
conv_src[1] = ptr->text[i + 6];
|
||||||
|
|
||||||
|
//snprintf(buf, 64, "1 0 0 1 %d %d Tm\n")
|
||||||
|
//strcat(dictionary, buf);
|
||||||
|
|
||||||
|
conv_size = 6;
|
||||||
|
|
||||||
|
if (strconv(&conv_dst, "UTF-16BE",
|
||||||
|
conv_src, "GB18030", &conv_size) == 0) {
|
||||||
|
if (conv_size - 2 > 0) {
|
||||||
|
strcat(dictionary, "<");
|
||||||
|
for (int k = 0; k < conv_size - 2; k++) {
|
||||||
|
snprintf(conv_hex, 3,
|
||||||
|
"%02x", (unsigned char) conv_dst[k]);
|
||||||
|
strcat(dictionary, conv_hex);
|
||||||
|
}
|
||||||
|
strcat(dictionary, "> Tj\n");
|
||||||
|
}
|
||||||
|
free(conv_dst);
|
||||||
|
}
|
||||||
|
|
||||||
|
i += 8;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
strcat(dictionary, "T*\n");
|
strcat(dictionary, "T*\n");
|
||||||
case 0x8070:
|
case 0x8070:
|
||||||
if (ptr->address_next > ptr->address) {
|
i += 4;
|
||||||
i += 4;
|
|
||||||
|
|
||||||
for (;;) {
|
if (ptr->address_next <= ptr->address)
|
||||||
if (i + 3 >= ptr->text_size ||
|
break;
|
||||||
(unsigned char) ptr->text[i + 1] == 0x80)
|
|
||||||
break;
|
|
||||||
|
|
||||||
conv_src[0] = ptr->text[i + 3];
|
for (;;) {
|
||||||
conv_src[1] = ptr->text[i + 2];
|
if (i + 3 >= ptr->text_size ||
|
||||||
|
(unsigned char) ptr->text[i + 1] == 0x80)
|
||||||
|
break;
|
||||||
|
|
||||||
conv_size = 6;
|
conv_src[0] = ptr->text[i + 3];
|
||||||
|
conv_src[1] = ptr->text[i + 2];
|
||||||
|
|
||||||
if (strconv(&conv_dst, "UTF-16BE",
|
//snprintf(buf, 64, "1 0 0 1 %d %d Tm\n")
|
||||||
conv_src, "GB18030", &conv_size) == 0) {
|
//strcat(dictionary, buf);
|
||||||
if (conv_size - 2 > 0) {
|
|
||||||
strcat(dictionary, "<feff");
|
conv_size = 6;
|
||||||
for (int k = 0; k < conv_size - 2; k++) {
|
|
||||||
snprintf(conv_hex, 3,
|
if (strconv(&conv_dst, "UTF-16BE",
|
||||||
"%02x", (unsigned char) conv_dst[k]);
|
conv_src, "GB18030", &conv_size) == 0) {
|
||||||
strcat(dictionary, conv_hex);
|
if (conv_size - 2 > 0) {
|
||||||
}
|
strcat(dictionary, "<");
|
||||||
strcat(dictionary, "> Tj\n");
|
for (int k = 0; k < conv_size - 2; k++) {
|
||||||
|
snprintf(conv_hex, 3,
|
||||||
|
"%02x", (unsigned char) conv_dst[k]);
|
||||||
|
strcat(dictionary, conv_hex);
|
||||||
}
|
}
|
||||||
free(conv_dst);
|
strcat(dictionary, "> Tj\n");
|
||||||
}
|
}
|
||||||
|
free(conv_dst);
|
||||||
i += 4;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
i += 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (i + 7 >= ptr->text_size) {
|
|
||||||
i += 2;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
conv_src[0] = ptr->text[i + 7];
|
|
||||||
conv_src[1] = ptr->text[i + 6];
|
|
||||||
|
|
||||||
conv_size = 6;
|
|
||||||
|
|
||||||
if (strconv(&conv_dst, "UTF-16BE",
|
|
||||||
conv_src, "GB18030", &conv_size) == 0) {
|
|
||||||
if (conv_size - 2 > 0) {
|
|
||||||
strcat(dictionary, "<feff");
|
|
||||||
for (int k = 0; k < conv_size - 2; k++) {
|
|
||||||
snprintf(conv_hex, 3,
|
|
||||||
"%02x", (unsigned char) conv_dst[k]);
|
|
||||||
strcat(dictionary, conv_hex);
|
|
||||||
}
|
|
||||||
strcat(dictionary, "> Tj\n");
|
|
||||||
}
|
|
||||||
free(conv_dst);
|
|
||||||
}
|
|
||||||
|
|
||||||
i += 8;
|
|
||||||
break;
|
break;
|
||||||
case 0x800a:
|
case 0x800a:
|
||||||
if (i + 27 >= ptr->text_size || j >= ptr->image_length) {
|
if (i + 27 >= ptr->text_size || j >= ptr->image_length) {
|
||||||
i += 2;
|
i += 2;
|
||||||
|
|
||||||
|
if (j >= ptr->image_length)
|
||||||
|
i += 26;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ptr->image_length > 0) {
|
if (ptr->image_length > 0) {
|
||||||
ptr->image_data[j].x =
|
ptr->image_data[j].x += (unsigned char) ptr->text[i + 5] << 8;
|
||||||
ptr->text[i + 5] << 8 | ptr->text[i + 4];
|
ptr->image_data[j].x += (unsigned char) ptr->text[i + 4];
|
||||||
ptr->image_data[j].y =
|
|
||||||
ptr->text[i + 7] << 8 | ptr->text[i + 6];
|
ptr->image_data[j].y += (unsigned char) ptr->text[i + 7] << 8;
|
||||||
ptr->image_data[j].w =
|
ptr->image_data[j].y += (unsigned char) ptr->text[i + 6];
|
||||||
ptr->text[i + 9] << 8 | ptr->text[i + 8];
|
|
||||||
ptr->image_data[j].h =
|
ptr->image_data[j].w += (unsigned char) ptr->text[i + 9] << 8;
|
||||||
ptr->text[i + 11] << 8 | ptr->text[i + 10];
|
ptr->image_data[j].w += (unsigned char) ptr->text[i + 8];
|
||||||
|
|
||||||
|
ptr->image_data[j].h += (unsigned char) ptr->text[i + 11] << 8;
|
||||||
|
ptr->image_data[j].h += (unsigned char) ptr->text[i + 10];
|
||||||
|
|
||||||
if ((*param)->stat > 2)
|
if ((*param)->stat > 2)
|
||||||
printf("\tItem %d: origin (%4d, %4d), width %4d, height %4d\n",
|
printf("\tItem %d: origin (%4d, %4d), width %4d, height %4d\n",
|
||||||
|
@ -875,7 +952,9 @@ cnki_pdf_hn(cnki_t **param)
|
||||||
}
|
}
|
||||||
|
|
||||||
i += 28;
|
i += 28;
|
||||||
j++;
|
|
||||||
|
if (j == 0 || ptr->image_data[j].x > 0 || ptr->image_data[j].y > 0)
|
||||||
|
j++;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
i += 4;
|
i += 4;
|
||||||
|
@ -885,39 +964,61 @@ cnki_pdf_hn(cnki_t **param)
|
||||||
|
|
||||||
strcat(dictionary, "ET");
|
strcat(dictionary, "ET");
|
||||||
|
|
||||||
/* FIXME: Use the text somehow? */
|
if (ptr->image_length > 0)
|
||||||
free(dictionary);
|
strcat(dictionary, "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
dictionary_size = 128 + 128 * ptr->image_length;
|
/* FIXME: Use the text somehow? */
|
||||||
dictionary = malloc(dictionary_size);
|
memset(dictionary, 0, dictionary_size);
|
||||||
|
|
||||||
if (dictionary == NULL) {
|
|
||||||
free(root_kid);
|
|
||||||
free(ids);
|
|
||||||
free(dim);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ptr->image_length > 0) {
|
if (ptr->image_length > 0) {
|
||||||
memset(dictionary, 0, dictionary_size);
|
|
||||||
|
|
||||||
char resize_str[64] = "0.25 0 0 0.25 0 0 cm\n";
|
|
||||||
double resize_x = 1;
|
double resize_x = 1;
|
||||||
double resize_y = 1;
|
double resize_y = 1;
|
||||||
|
|
||||||
if (dim[0] > 0 && dim[1] > 0) {
|
double margin_x = 0;
|
||||||
/* Scale within bound of A4 paper */
|
double margin_y = 0;
|
||||||
resize_x = 4 * 595.2756 / dim[0];
|
|
||||||
resize_y = 4 * 841.8898 / dim[1];
|
|
||||||
|
|
||||||
if (resize_y < resize_x)
|
if (ptr->image_data[0].x == 0 && ptr->image_data[0].y == 0 && dim[0] > 0 && dim[1] > 0) {
|
||||||
snprintf(buf, 64, "%f 0 0 %f 0 0 cm\n",
|
/* Scale within bound of A4 paper */
|
||||||
resize_y, resize_y);
|
resize_x = 2480.315 / dim[0];
|
||||||
else
|
resize_y = 3507.874 / dim[1];
|
||||||
snprintf(buf, 64, "%f 0 0 %f 0 0 cm\n",
|
|
||||||
resize_x, resize_x);
|
if (resize_y < resize_x) {
|
||||||
strcat(resize_str, buf);
|
for (int i = 0; i < ptr->image_length; i++) {
|
||||||
|
dim[i * 2] *= resize_y;
|
||||||
|
dim[i * 2 + 1] *= resize_y;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (int i = 0; i < ptr->image_length; i++) {
|
||||||
|
dim[i * 2] *= resize_x;
|
||||||
|
dim[i * 2 + 1] *= resize_x;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
margin_x = (2480.315 - dim[0]) / 2;
|
||||||
|
margin_y = (3507.874 - dim[1]) / 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Remove duplicated image, ptr->image_length is sometimes squared */
|
||||||
|
for (int i = 1; i < ptr->image_length; i++) {
|
||||||
|
if ((ptr->image_data[i].x > 0 || ptr->image_data[i].y > 0) &&
|
||||||
|
dim[i * 2] < dim[0] && dim[i * 2 + 1] < dim[1])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
for (int j = i; j < ptr->image_length; j++) {
|
||||||
|
pdf_get_obj(&pdf, ids[j], &tmp);
|
||||||
|
pdf_obj_del(&pdf, ids[j]);
|
||||||
|
|
||||||
|
tmp->next = NULL;
|
||||||
|
pdf_obj_destroy(&tmp);
|
||||||
|
|
||||||
|
dim[j * 2] = -1;
|
||||||
|
dim[j * 2 + 1] = -1;
|
||||||
|
|
||||||
|
pdf_obj_append(&pdf, ids[j], NULL, NULL, NULL, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < ptr->image_length; i++) {
|
for (int i = 0; i < ptr->image_length; i++) {
|
||||||
|
@ -926,39 +1027,47 @@ cnki_pdf_hn(cnki_t **param)
|
||||||
|
|
||||||
strcat(dictionary, "q\n");
|
strcat(dictionary, "q\n");
|
||||||
|
|
||||||
strcat(dictionary, resize_str);
|
strcat(dictionary, "0.24 0 0 0.24 0 0 cm\n");
|
||||||
|
|
||||||
/* Rotate image */
|
/* Rotate image */
|
||||||
if (ptr->image_data[i].format == JBIG || ptr->image_data[i].format == DCT_1) {
|
if (ptr->image_data[i].format == JBIG || ptr->image_data[i].format == DCT_1) {
|
||||||
snprintf(buf, 64, "1 0 0 1 0 %d cm\n",
|
snprintf(buf, 64, "1 0 0 -1 0 %f cm\n", dim[i * 2 + 1]);
|
||||||
dim[i * 2 + 1]);
|
|
||||||
strcat(dictionary, buf);
|
strcat(dictionary, buf);
|
||||||
|
|
||||||
strcat(dictionary, "1 0 0 -1 0 0 cm\n");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Translate figure */
|
/* Translate figure */
|
||||||
if (i > 0) {
|
if (ptr->image_data[i].x > 0 || ptr->image_data[i].y > 0) {
|
||||||
double origin_x = 0.4043745 * ptr->image_data[i].x;
|
double origin_x = ptr->image_data[i].x * 0.40433;
|
||||||
double origin_y = 0.4043561 * ptr->image_data[i].y;
|
double origin_y = ptr->image_data[i].y * 0.40433;
|
||||||
|
|
||||||
if (origin_x < 0)
|
if (resize_y < resize_x) {
|
||||||
origin_x += (2381.102 - dim[i * 2]) / 2;
|
origin_x *= resize_y;
|
||||||
|
origin_y *= resize_y;
|
||||||
if (origin_y < 0)
|
} else {
|
||||||
origin_y += (3367.559 + dim[i * 2 + 1]) / 2;
|
origin_x *= resize_x;
|
||||||
|
origin_y *= resize_x;
|
||||||
|
}
|
||||||
|
|
||||||
if (ptr->image_data[i].format == JBIG || ptr->image_data[i].format == DCT_1)
|
if (ptr->image_data[i].format == JBIG || ptr->image_data[i].format == DCT_1)
|
||||||
origin_y = -3367.559 + origin_y + dim[i * 2 + 1];
|
origin_y = -3507.874 + origin_y + dim[i * 2 + 1];
|
||||||
else
|
else
|
||||||
origin_y = 3367.559 - origin_y - dim[i * 2 + 1];
|
origin_y = 3507.874 - origin_y - dim[i * 2 + 1];
|
||||||
|
|
||||||
snprintf(buf, 64, "1 0 0 1 %f %f cm\n", origin_x, origin_y);
|
snprintf(buf, 64, "1 0 0 1 %f %f cm\n", origin_x, origin_y);
|
||||||
strcat(dictionary, buf);
|
strcat(dictionary, buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
snprintf(buf, 64, "%d 0 0 %d 0 0 cm\n",
|
if (margin_x > 0 || margin_y > 0) {
|
||||||
dim[i * 2], dim[i * 2 + 1]);
|
if (ptr->image_data[i].format == JBIG || ptr->image_data[i].format == DCT_1) {
|
||||||
|
snprintf(buf, 64, "1 0 0 1 %f %f cm\n", margin_x, -margin_y);
|
||||||
|
strcat(dictionary, buf);
|
||||||
|
} else {
|
||||||
|
snprintf(buf, 64, "1 0 0 1 %f %f cm\n", margin_x, margin_y);
|
||||||
|
strcat(dictionary, buf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
snprintf(buf, 64, "%f 0 0 %f 0 0 cm\n", dim[i * 2], dim[i * 2 + 1]);
|
||||||
strcat(dictionary, buf);
|
strcat(dictionary, buf);
|
||||||
|
|
||||||
snprintf(buf, 64, "/Im%d Do\n", i);
|
snprintf(buf, 64, "/Im%d Do\n", i);
|
||||||
|
@ -970,10 +1079,13 @@ cnki_pdf_hn(cnki_t **param)
|
||||||
strcat(dictionary, "\n");
|
strcat(dictionary, "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
free(dim);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (strlen(dictionary) > 0) {
|
||||||
if (strdeflate(&stream, &stream_size, dictionary, strlen(dictionary)) != 0) {
|
if (strdeflate(&stream, &stream_size, dictionary, strlen(dictionary)) != 0) {
|
||||||
free(root_kid);
|
free(root_kid);
|
||||||
free(ids);
|
free(ids);
|
||||||
free(dim);
|
|
||||||
free(dictionary);
|
free(dictionary);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -993,6 +1105,9 @@ cnki_pdf_hn(cnki_t **param)
|
||||||
NULL, dictionary, stream, stream_size);
|
NULL, dictionary, stream, stream_size);
|
||||||
|
|
||||||
free(stream);
|
free(stream);
|
||||||
|
} else {
|
||||||
|
pdf_obj_append(&pdf, ids[ptr->image_length + 1],
|
||||||
|
NULL, NULL, NULL, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
memset(dictionary, 0, dictionary_size);
|
memset(dictionary, 0, dictionary_size);
|
||||||
|
@ -1002,32 +1117,19 @@ cnki_pdf_hn(cnki_t **param)
|
||||||
/* A4 paper */
|
/* A4 paper */
|
||||||
strcat(dictionary, "/MediaBox [0 0 595.2756 841.8898]\n");
|
strcat(dictionary, "/MediaBox [0 0 595.2756 841.8898]\n");
|
||||||
|
|
||||||
if (ptr->image_length > 0) {
|
snprintf(buf, 64, "/Resources %d 0 R\n", ids[ptr->image_length]);
|
||||||
free(dim);
|
strcat(dictionary, buf);
|
||||||
|
|
||||||
snprintf(buf, 64, "/Resources %d 0 R\n", ids[ptr->image_length]);
|
snprintf(buf, 64, "/Contents %d 0 R\n", ids[ptr->image_length + 1]);
|
||||||
strcat(dictionary, buf);
|
strcat(dictionary, buf);
|
||||||
|
|
||||||
snprintf(buf, 64, "/Contents %d 0 R\n", ids[ptr->image_length + 1]);
|
/* Add /Parent when we know root */
|
||||||
strcat(dictionary, buf);
|
pdf_obj_append(&pdf, ids[ptr->image_length + 2], NULL, dictionary, NULL, 0);
|
||||||
|
|
||||||
/* Add /Parent when we know root */
|
root_kid[cnt++] = ids[ptr->image_length + 2];
|
||||||
pdf_obj_append(&pdf, ids[ptr->image_length + 2], NULL, dictionary, NULL, 0);
|
|
||||||
|
|
||||||
root_kid[cnt++] = ids[ptr->image_length + 2];
|
|
||||||
} else {
|
|
||||||
snprintf(buf, 64, "/Contents %d 0 R\n", ids[ptr->image_length]);
|
|
||||||
strcat(dictionary, buf);
|
|
||||||
|
|
||||||
/* Add /Parent when we know root */
|
|
||||||
pdf_obj_append(&pdf, ids[ptr->image_length + 1], NULL, dictionary, NULL, 0);
|
|
||||||
|
|
||||||
root_kid[cnt++] = ids[ptr->image_length + 1];
|
|
||||||
}
|
|
||||||
|
|
||||||
free(dictionary);
|
|
||||||
|
|
||||||
free(ids);
|
free(ids);
|
||||||
|
free(dictionary);
|
||||||
|
|
||||||
ptr = ptr->next;
|
ptr = ptr->next;
|
||||||
}
|
}
|
||||||
|
@ -1071,12 +1173,9 @@ cnki_pdf_hn(cnki_t **param)
|
||||||
|
|
||||||
int root = pdf_get_free_id(&pdf);
|
int root = pdf_get_free_id(&pdf);
|
||||||
|
|
||||||
snprintf(buf, 64, "<<\n/Type /Pages\n/Kids ");
|
snprintf(buf, 64, "<<\n/Type /Pages\n/Kids [");
|
||||||
strcat(dictionary, buf);
|
strcat(dictionary, buf);
|
||||||
|
|
||||||
if ((*param)->file_stat->page > 1)
|
|
||||||
strcat(dictionary, "[");
|
|
||||||
|
|
||||||
for (int i = 0; i < (*param)->file_stat->page; i++) {
|
for (int i = 0; i < (*param)->file_stat->page; i++) {
|
||||||
snprintf(buf, 64, "%d 0 R", root_kid[i]);
|
snprintf(buf, 64, "%d 0 R", root_kid[i]);
|
||||||
strcat(dictionary, buf);
|
strcat(dictionary, buf);
|
||||||
|
@ -1084,12 +1183,7 @@ cnki_pdf_hn(cnki_t **param)
|
||||||
strcat(dictionary, " ");
|
strcat(dictionary, " ");
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((*param)->file_stat->page > 1)
|
snprintf(buf, 64, "]\n/Count %d\n", (*param)->file_stat->page);
|
||||||
strcat(dictionary, "]");
|
|
||||||
|
|
||||||
strcat(dictionary, "\n");
|
|
||||||
|
|
||||||
snprintf(buf, 64, "/Count %d\n", (*param)->file_stat->page);
|
|
||||||
strcat(dictionary, buf);
|
strcat(dictionary, buf);
|
||||||
|
|
||||||
strcat(dictionary, ">>");
|
strcat(dictionary, ">>");
|
||||||
|
@ -1098,16 +1192,6 @@ cnki_pdf_hn(cnki_t **param)
|
||||||
|
|
||||||
free(dictionary);
|
free(dictionary);
|
||||||
|
|
||||||
dictionary_size = 256;
|
|
||||||
dictionary = malloc(dictionary_size);
|
|
||||||
|
|
||||||
if (dictionary == NULL) {
|
|
||||||
free(root_kid);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
pdf_object_t *tmp = NULL;
|
|
||||||
|
|
||||||
/* Add /Parent to page object */
|
/* Add /Parent to page object */
|
||||||
for (int i = 0; i < (*param)->file_stat->page; i++) {
|
for (int i = 0; i < (*param)->file_stat->page; i++) {
|
||||||
if (pdf_get_obj(&pdf, root_kid[i], &tmp) != 0) {
|
if (pdf_get_obj(&pdf, root_kid[i], &tmp) != 0) {
|
||||||
|
@ -1116,9 +1200,16 @@ cnki_pdf_hn(cnki_t **param)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
memset(dictionary, 0, dictionary_size);
|
dictionary_size = tmp->dictionary_size + 24;
|
||||||
|
dictionary = malloc(dictionary_size);
|
||||||
|
|
||||||
|
if (dictionary == NULL) {
|
||||||
|
free(root_kid);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
memcpy(dictionary, tmp->dictionary, tmp->dictionary_size);
|
memcpy(dictionary, tmp->dictionary, tmp->dictionary_size);
|
||||||
|
memset(dictionary + tmp->dictionary_size, 0, 24);
|
||||||
|
|
||||||
snprintf(buf, 64, "/Parent %d 0 R\n>>", root);
|
snprintf(buf, 64, "/Parent %d 0 R\n>>", root);
|
||||||
strcat(dictionary, buf);
|
strcat(dictionary, buf);
|
||||||
|
@ -1128,10 +1219,20 @@ cnki_pdf_hn(cnki_t **param)
|
||||||
free(root_kid);
|
free(root_kid);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
free(dictionary);
|
||||||
}
|
}
|
||||||
|
|
||||||
free(root_kid);
|
free(root_kid);
|
||||||
|
|
||||||
|
dictionary_size = 128;
|
||||||
|
dictionary = malloc(dictionary_size);
|
||||||
|
|
||||||
|
if (dictionary == NULL) {
|
||||||
|
free(root_kid);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
memset(dictionary, 0, dictionary_size);
|
memset(dictionary, 0, dictionary_size);
|
||||||
|
|
||||||
if ((*param)->stat > 0)
|
if ((*param)->stat > 0)
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2022, yzrh <yzrh@noema.org>
|
* Copyright (c) 2022-2023, yzrh <yzrh@noema.org>
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
*/
|
*/
|
||||||
|
@ -31,5 +31,6 @@ strdec_jbig2(char **bitmap,
|
||||||
}
|
}
|
||||||
|
|
||||||
jbig2_release_page(ctx, image);
|
jbig2_release_page(ctx, image);
|
||||||
|
jbig2_ctx_free(ctx);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,11 +6,7 @@
|
||||||
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
#ifdef __linux__
|
|
||||||
#include <openjpeg.h>
|
#include <openjpeg.h>
|
||||||
#else
|
|
||||||
#include <openjpeg-2.5/openjpeg.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
typedef struct _stream_user_data {
|
typedef struct _stream_user_data {
|
||||||
OPJ_SIZE_T pos;
|
OPJ_SIZE_T pos;
|
||||||
|
|
24
src/md5.c
Normal file
24
src/md5.c
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2023, yzrh <yzrh@noema.org>
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include <openssl/md5.h>
|
||||||
|
|
||||||
|
int
|
||||||
|
strmd5(unsigned char **dst, int *dst_size,
|
||||||
|
const unsigned char * restrict src, int src_size)
|
||||||
|
{
|
||||||
|
*dst_size = MD5_DIGEST_LENGTH;
|
||||||
|
*dst = malloc(*dst_size);
|
||||||
|
|
||||||
|
if (*dst == NULL)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
MD5(src, src_size, *dst);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
9
src/md5.h
Normal file
9
src/md5.h
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2023, yzrh <yzrh@noema.org>
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*/
|
||||||
|
|
||||||
|
int
|
||||||
|
strmd5(unsigned char **dst, int *dst_size,
|
||||||
|
const unsigned char * restrict src, int src_size);
|
|
@ -89,7 +89,7 @@ pdf_get_free_id(pdf_object_t **pdf)
|
||||||
|
|
||||||
int id = 0;
|
int id = 0;
|
||||||
|
|
||||||
for (int i = 1; i < 99999999; i++) {
|
for (int i = 1; i < 100000000; i++) {
|
||||||
ptr = (*pdf)->next;
|
ptr = (*pdf)->next;
|
||||||
while (ptr != NULL) {
|
while (ptr != NULL) {
|
||||||
if (ptr->id == i) {
|
if (ptr->id == i) {
|
||||||
|
@ -123,7 +123,7 @@ pdf_get_free_ids(pdf_object_t **pdf, int **ids, int count)
|
||||||
int id = 0;
|
int id = 0;
|
||||||
|
|
||||||
pdf_object_t *ptr;
|
pdf_object_t *ptr;
|
||||||
for (int i = 1; i < 99999999; i++) {
|
for (int i = 1; i < 100000000; i++) {
|
||||||
ptr = (*pdf)->next;
|
ptr = (*pdf)->next;
|
||||||
while (ptr != NULL) {
|
while (ptr != NULL) {
|
||||||
if (ptr->id == i) {
|
if (ptr->id == i) {
|
||||||
|
|
197
src/pdf_parser.c
197
src/pdf_parser.c
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
* Copyright (c) 2020-2023, yzrh <yzrh@noema.org>
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
*/
|
*/
|
||||||
|
@ -19,26 +19,35 @@ static void *
|
||||||
_memmem_whitespace(const void *p0, size_t s0, const void *p1, size_t s1)
|
_memmem_whitespace(const void *p0, size_t s0, const void *p1, size_t s1)
|
||||||
{
|
{
|
||||||
const char whitespace[6] = {
|
const char whitespace[6] = {
|
||||||
'\r',
|
0x00,
|
||||||
'\n',
|
0x09,
|
||||||
'\f',
|
0x0a,
|
||||||
'\t',
|
0x0c,
|
||||||
'\0',
|
0x0d,
|
||||||
' '
|
0x20
|
||||||
};
|
};
|
||||||
|
|
||||||
char tmp[s1 + 1];
|
char *ret = NULL;
|
||||||
memcpy(tmp, p1, s1);
|
|
||||||
|
|
||||||
char *ret;
|
char str[s1 + 1];
|
||||||
|
memcpy(str, p1, s1);
|
||||||
|
|
||||||
|
size_t tmp_size = 0;
|
||||||
|
char *tmp;
|
||||||
|
|
||||||
for (int i = 0; i < 6; i++) {
|
for (int i = 0; i < 6; i++) {
|
||||||
tmp[s1] = whitespace[i];
|
str[s1] = whitespace[i];
|
||||||
if((ret = memmem(p0, s0, tmp, s1 + 1)) != NULL)
|
|
||||||
return ret;
|
if ((tmp = memmem(p0, s0, str, s1 + 1)) == NULL)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (tmp_size == 0 || (size_t) (tmp - (char *) p0) < tmp_size) {
|
||||||
|
tmp_size = tmp - (char *) p0;
|
||||||
|
ret = tmp;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return NULL;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
@ -57,23 +66,45 @@ _locate(pdf_object_t **pdf, FILE **fp, int size_buf)
|
||||||
end = ftell(*fp);
|
end = ftell(*fp);
|
||||||
fseek(*fp, cur, SEEK_SET);
|
fseek(*fp, cur, SEEK_SET);
|
||||||
|
|
||||||
int head = 0;
|
long head = 0;
|
||||||
int tail = 0;
|
long tail = 0;
|
||||||
char *pos;
|
char *pos;
|
||||||
char *tmp;
|
char *tmp;
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
fread(buf, size_buf, 1, *fp);
|
if (cur + size_buf < end) {
|
||||||
|
fread(buf, size_buf, 1, *fp);
|
||||||
|
} else {
|
||||||
|
fread(buf, end - cur, 1, *fp);
|
||||||
|
memset(buf + end - cur, 0, size_buf - end + cur);
|
||||||
|
}
|
||||||
|
|
||||||
if (head == 0 && (pos = _memmem_whitespace(buf, size_buf, " 0 obj", 6)) != NULL)
|
if (head == 0) {
|
||||||
head = cur + (pos - buf) + 7;
|
/* Hack needed for invalid object */
|
||||||
|
pos = _memmem_whitespace(buf, size_buf, " 0 obj", 6);
|
||||||
|
tmp = memmem(buf, size_buf, " 0 obj", 6);
|
||||||
|
|
||||||
|
while (tmp != NULL && tmp[6] != 0x3c && tmp[6] != 0x5b)
|
||||||
|
tmp = memmem(tmp + 6, size_buf - (tmp - buf) - 6, " 0 obj", 6);
|
||||||
|
|
||||||
|
if (pos != NULL && tmp != NULL) {
|
||||||
|
if (pos - buf < tmp - buf)
|
||||||
|
head = cur + (pos - buf) + 7;
|
||||||
|
else
|
||||||
|
head = cur + (tmp - buf) + 6;
|
||||||
|
} else if (pos != NULL) {
|
||||||
|
head = cur + (pos - buf) + 7;
|
||||||
|
} else if (tmp != NULL) {
|
||||||
|
head = cur + (tmp - buf) + 6;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (tail == 0 && (pos = _memmem_whitespace(buf, size_buf, "endobj", 6)) != NULL) {
|
if (tail == 0 && (pos = _memmem_whitespace(buf, size_buf, "endobj", 6)) != NULL) {
|
||||||
/* We need to check if it is the object stored in stream */
|
/* We need to check if it is the object stored in stream */
|
||||||
while (memcmp(pos + 7,
|
while (memcmp(pos + 7,
|
||||||
"\r\nendstream", 11) == 0 &&
|
"\r\nendstream", 11) == 0 &&
|
||||||
(tmp = _memmem_whitespace(pos + 6,
|
(tmp = _memmem_whitespace(pos + 7,
|
||||||
size_buf - (pos - buf) - 6,
|
size_buf - (pos - buf) - 7,
|
||||||
"endobj", 6)) != NULL)
|
"endobj", 6)) != NULL)
|
||||||
pos = tmp;
|
pos = tmp;
|
||||||
|
|
||||||
|
@ -102,13 +133,17 @@ _locate(pdf_object_t **pdf, FILE **fp, int size_buf)
|
||||||
ptr->address = head;
|
ptr->address = head;
|
||||||
ptr->size = tail - head;
|
ptr->size = tail - head;
|
||||||
|
|
||||||
fseek(*fp, tail + 6, SEEK_SET);
|
fseek(*fp, tail + 7, SEEK_SET);
|
||||||
head = tail = 0;
|
head = tail = 0;
|
||||||
|
} else if (head > 0 && tail > 0) {
|
||||||
|
if (cur + size_buf < end)
|
||||||
|
fseek(*fp, head, SEEK_SET);
|
||||||
|
tail = 0;
|
||||||
} else {
|
} else {
|
||||||
fseek(*fp, -6, SEEK_CUR);
|
fseek(*fp, -7, SEEK_CUR);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((cur = ftell(*fp)) + 6 >= end)
|
if ((cur = ftell(*fp)) + 7 >= end)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -138,34 +173,86 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
|
||||||
if (buf == NULL)
|
if (buf == NULL)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
memset(buf, 0, ptr->size);
|
fseek(*fp, ptr->address, SEEK_SET);
|
||||||
|
fread(buf, ptr->size, 1, *fp);
|
||||||
|
|
||||||
fseek(*fp, ptr->address - 12, SEEK_SET);
|
/* Handle incomplete object */
|
||||||
|
head = buf;
|
||||||
|
while ((tmp = _memmem_whitespace(head,
|
||||||
|
ptr->size - (head - buf),
|
||||||
|
" 0 obj", 6)) != NULL)
|
||||||
|
head = tmp + 7;
|
||||||
|
|
||||||
|
/* Hack needed for invalid object */
|
||||||
|
while ((tmp = memmem(head,
|
||||||
|
ptr->size - (head - buf),
|
||||||
|
" 0 obj", 6)) != NULL)
|
||||||
|
head = tmp + 6;
|
||||||
|
|
||||||
|
if (head - buf > 0) {
|
||||||
|
ptr->address += head - buf;
|
||||||
|
ptr->size -= head - buf;
|
||||||
|
|
||||||
|
tmp = realloc(buf, ptr->size);
|
||||||
|
|
||||||
|
if (tmp == NULL)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
buf = tmp;
|
||||||
|
|
||||||
|
fseek(*fp, ptr->address, SEEK_SET);
|
||||||
|
fread(buf, ptr->size, 1, *fp);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Hack needed for invalid object */
|
||||||
|
fseek(*fp, ptr->address - 14, SEEK_SET);
|
||||||
fread(str, 8, 1, *fp);
|
fread(str, 8, 1, *fp);
|
||||||
|
|
||||||
for (int i = 0; i < 8; i++) {
|
if (str[7] < '0' || str[7] > '9') {
|
||||||
if (str[i] >= '0' && str[i] <= '9') {
|
fseek(*fp, ptr->address - 15, SEEK_SET);
|
||||||
ptr->id = atoi(str + i);
|
fread(str, 8, 1, *fp);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 7; i >= 0; i--) {
|
||||||
|
if (str[i] < '0' || str[i] > '9') {
|
||||||
|
if (i < 7)
|
||||||
|
ptr->id = atoi(str + i + 1);
|
||||||
|
else
|
||||||
|
ptr->id = 0;
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fseek(*fp, ptr->address, SEEK_SET);
|
|
||||||
fread(buf, ptr->size, 1, *fp);
|
|
||||||
|
|
||||||
if ((head = memmem(buf, ptr->size, "<<", 2)) != NULL &&
|
if ((head = memmem(buf, ptr->size, "<<", 2)) != NULL &&
|
||||||
(tail = _memmem_whitespace(buf, ptr->size, ">>", 2)) != NULL) {
|
((tail = _memmem_whitespace(buf, ptr->size, ">>", 2)) != NULL ||
|
||||||
/*
|
/* Hack needed for invalid object */
|
||||||
* A dictionary object may have nested dictionary,
|
(tail = memmem(buf, ptr->size, ">>", 2)) != NULL)) {
|
||||||
* but it should not be in a stream
|
if (memmem(buf, tail - buf, "stream\r\n", 8) != NULL) {
|
||||||
*/
|
tail = memmem(buf, ptr->size, ">>", 2);
|
||||||
while ((tmp = _memmem_whitespace(tail + 2,
|
|
||||||
ptr->size - (tail - buf) - 2,
|
while (ptr->size - (tail - buf) > 2 &&
|
||||||
">>", 2)) != NULL &&
|
(tmp = memmem(tail + 2,
|
||||||
memmem(tail + 2,
|
ptr->size - (tail - buf) - 2,
|
||||||
ptr->size - (tail - buf) - 2,
|
">>", 2)) != NULL &&
|
||||||
"stream\r\n", 8) == NULL)
|
memmem(tail + 2,
|
||||||
tail = tmp;
|
(tmp - tail) - 2,
|
||||||
|
"stream\r\n", 8) == NULL)
|
||||||
|
tail = tmp;
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* A dictionary object may have nested dictionary,
|
||||||
|
* but it should not be in a stream
|
||||||
|
*/
|
||||||
|
while (ptr->size - (tail - buf) > 3 &&
|
||||||
|
(tmp = _memmem_whitespace(tail + 3,
|
||||||
|
ptr->size - (tail - buf) - 3,
|
||||||
|
">>", 2)) != NULL &&
|
||||||
|
memmem(tail + 3,
|
||||||
|
(tmp - tail) - 3,
|
||||||
|
"stream\r\n", 8) == NULL)
|
||||||
|
tail = tmp;
|
||||||
|
}
|
||||||
|
|
||||||
ptr->dictionary_size = tail - head + 2;
|
ptr->dictionary_size = tail - head + 2;
|
||||||
ptr->dictionary = malloc(ptr->dictionary_size + 1);
|
ptr->dictionary = malloc(ptr->dictionary_size + 1);
|
||||||
|
@ -173,8 +260,8 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
|
||||||
if (ptr->dictionary == NULL)
|
if (ptr->dictionary == NULL)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
memset(ptr->dictionary, 0, ptr->dictionary_size + 1);
|
|
||||||
memcpy(ptr->dictionary, head, ptr->dictionary_size);
|
memcpy(ptr->dictionary, head, ptr->dictionary_size);
|
||||||
|
memset(ptr->dictionary + ptr->dictionary_size, 0, 1);
|
||||||
|
|
||||||
if ((head = memmem(tail,
|
if ((head = memmem(tail,
|
||||||
ptr->size - (tail - buf),
|
ptr->size - (tail - buf),
|
||||||
|
@ -187,11 +274,11 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
|
||||||
* contains another object that
|
* contains another object that
|
||||||
* contains another stream
|
* contains another stream
|
||||||
*/
|
*/
|
||||||
while (_memmem_whitespace(tail,
|
while (_memmem_whitespace(tail + 10,
|
||||||
ptr->size - (tail - buf),
|
ptr->size - (tail - buf) - 10,
|
||||||
"endobj", 6) != NULL &&
|
"endobj", 6) != NULL &&
|
||||||
(tmp = _memmem_whitespace(tail + 9,
|
(tmp = _memmem_whitespace(tail + 10,
|
||||||
ptr->size - (tail - buf) - 9,
|
ptr->size - (tail - buf) - 10,
|
||||||
"endstream", 9)) != NULL)
|
"endstream", 9)) != NULL)
|
||||||
tail = tmp;
|
tail = tmp;
|
||||||
|
|
||||||
|
@ -203,19 +290,13 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
|
||||||
|
|
||||||
memcpy(ptr->stream, head + 8, ptr->stream_size);
|
memcpy(ptr->stream, head + 8, ptr->stream_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
free(buf);
|
||||||
} else {
|
} else {
|
||||||
ptr->object_size = ptr->size;
|
ptr->object_size = ptr->size;
|
||||||
ptr->object = malloc(ptr->object_size + 1);
|
ptr->object = buf;
|
||||||
|
|
||||||
if (ptr->object == NULL)
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
memset(ptr->object, 0, ptr->object_size + 1);
|
|
||||||
memcpy(ptr->object, buf, ptr->object_size);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
free(buf);
|
|
||||||
|
|
||||||
ptr = ptr->next;
|
ptr = ptr->next;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
* Copyright (c) 2020-2023, yzrh <yzrh@noema.org>
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
*/
|
*/
|
||||||
|
@ -8,14 +8,32 @@
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
||||||
#include <openssl/md5.h>
|
#include "version.h"
|
||||||
|
#include "md5.h"
|
||||||
#include "pdf.h"
|
#include "pdf.h"
|
||||||
|
|
||||||
|
static int
|
||||||
|
_info_obj(pdf_object_t **pdf)
|
||||||
|
{
|
||||||
|
char dictionary[128] = "<<\n"
|
||||||
|
"/Producer (Melon " VERSION "." RELEASE "." PATCH EXTRA ")\n"
|
||||||
|
"/CreationDate (D:";
|
||||||
|
|
||||||
|
char buf[64];
|
||||||
|
|
||||||
|
time_t timestamp = time(NULL);
|
||||||
|
|
||||||
|
strftime(buf, 64, "%Y%m%d%H%M%S", gmtime(×tamp));
|
||||||
|
strcat(dictionary, buf);
|
||||||
|
strcat(dictionary, "+00'00')\n>>");
|
||||||
|
|
||||||
|
return pdf_obj_append(pdf, 0, NULL, dictionary, NULL, 0);
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
pdf_dump_obj(pdf_object_t **pdf, FILE **fp)
|
pdf_dump_obj(pdf_object_t **pdf, FILE **fp)
|
||||||
{
|
{
|
||||||
if (*pdf == NULL || *fp == NULL)
|
if (*pdf == NULL || *fp == NULL || _info_obj(pdf) != 0)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
long cur;
|
long cur;
|
||||||
|
@ -144,35 +162,28 @@ pdf_dump_trailer(pdf_object_t **pdf, FILE **fp, int xref)
|
||||||
buf_size = snprintf(buf, 64, "%lx%x", timestamp, size);
|
buf_size = snprintf(buf, 64, "%lx%x", timestamp, size);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
unsigned char str[64];
|
int fid_size;
|
||||||
memcpy(str, buf, 64);
|
unsigned char *fid;
|
||||||
|
|
||||||
unsigned char fid[MD5_DIGEST_LENGTH];
|
if (strmd5(&fid, &fid_size, (unsigned char *) buf, buf_size) != 0)
|
||||||
MD5(str, buf_size, fid);
|
return 1;
|
||||||
|
|
||||||
pdf_object_t *ptr = *pdf;
|
pdf_object_t *ptr = *pdf;
|
||||||
while (ptr->next != NULL)
|
while (ptr->next != NULL)
|
||||||
ptr = ptr->next;
|
ptr = ptr->next;
|
||||||
|
|
||||||
/*
|
|
||||||
* TODO: Document information dictionary
|
|
||||||
* `"/Producer (Melon)"'
|
|
||||||
* `"/CreationDate (D:YYYYMMDDHHmmSS+00'00')"'
|
|
||||||
*
|
|
||||||
* Trailer dictionary
|
|
||||||
* `"/Info %d 0 R"'
|
|
||||||
*/
|
|
||||||
fprintf(*fp,
|
fprintf(*fp,
|
||||||
"/Size %d\n/Root %d 0 R\n",
|
"/Size %d\n/Root %d 0 R\n/Info %d 0 R\n",
|
||||||
ptr->id + 1,
|
ptr->id + 1,
|
||||||
pdf_get_catalog_id(pdf));
|
pdf_get_catalog_id(pdf),
|
||||||
|
ptr->id);
|
||||||
|
|
||||||
fputs("/ID [", *fp);
|
fputs("/ID [", *fp);
|
||||||
|
|
||||||
for (int i = 0; i < 2; i++) {
|
for (int i = 0; i < 2; i++) {
|
||||||
fputs("<", *fp);
|
fputs("<", *fp);
|
||||||
|
|
||||||
for (int j = 0; j < MD5_DIGEST_LENGTH; j++)
|
for (int j = 0; j < fid_size; j++)
|
||||||
fprintf(*fp, "%02x", fid[j]);
|
fprintf(*fp, "%02x", fid[j]);
|
||||||
|
|
||||||
fputs(">", *fp);
|
fputs(">", *fp);
|
||||||
|
@ -191,5 +202,7 @@ pdf_dump_trailer(pdf_object_t **pdf, FILE **fp, int xref)
|
||||||
|
|
||||||
fputs("%%EOF\n", *fp);
|
fputs("%%EOF\n", *fp);
|
||||||
|
|
||||||
|
free(fid);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
* Copyright (c) 2020-2023, yzrh <yzrh@noema.org>
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define VERSION "0"
|
#define VERSION "0"
|
||||||
#define RELEASE "2"
|
#define RELEASE "3"
|
||||||
#define PATCH "2"
|
#define PATCH "0"
|
||||||
#define EXTRA ""
|
#define EXTRA ""
|
||||||
|
|
Loading…
Add table
Reference in a new issue