Compare commits
41 commits
Author | SHA1 | Date | |
---|---|---|---|
2fa2b760ae | |||
dd5854678c | |||
123d62141c | |||
283446dba5 | |||
13cb0a1b8d | |||
a7ecc15614 | |||
56ffe14d5a | |||
c2afbb3cbc | |||
8cd8a8fbba | |||
8276423eb8 | |||
7ac0971a17 | |||
e0fe937e1a | |||
4a02b8bfc7 | |||
7d9d658461 | |||
000405693e | |||
d6fa934b5f | |||
1a1fee1034 | |||
cde014cffb | |||
9019a18449 | |||
a18de8f2ef | |||
70e1e7ea97 | |||
bffb8ce8a4 | |||
3ac51d66b9 | |||
0bbf8e65dd | |||
220a81c2ad | |||
1d899d934d | |||
226f16ddf4 | |||
9646ee61c3 | |||
5466a441df | |||
1ce3f89574 | |||
5a1afb0056 | |||
060bc00a0d | |||
97931e1470 | |||
cd0af5ba3c | |||
988a751c15 | |||
8083b30530 | |||
abce2fd2e4 | |||
224a09a015 | |||
c2ad6549fb | |||
d2826fa075 | |||
288b65a1fd |
25 changed files with 1075 additions and 461 deletions
39
CHANGE.md
39
CHANGE.md
|
@ -1,7 +1,44 @@
|
|||
0.3.0 (2022-XX-XX)
|
||||
0.3.0 (2023-XX-XX)
|
||||
==================
|
||||
|
||||
* Support HN text overlay.
|
||||
* Support HN page with text.
|
||||
* Handle inaccurate page count in CAJ and KDH.
|
||||
|
||||
0.2.5 (2023-01-05)
|
||||
==================
|
||||
|
||||
* Improve PDF parser.
|
||||
* Handle duplicated object in CAJ.
|
||||
* Handle duplicated image in HN.
|
||||
* Handle incomplete PDF object in CAJ and KDH.
|
||||
* Handle invalid PDF object token in CAJ and KDH.
|
||||
* Fix JBIG decoder.
|
||||
|
||||
0.2.4 (2022-12-31)
|
||||
==================
|
||||
|
||||
* Fix HN image compositing.
|
||||
* Fix PDF object check.
|
||||
|
||||
0.2.3 (2022-12-30)
|
||||
==================
|
||||
|
||||
* Support HN figure placement.
|
||||
|
||||
0.2.2 (2022-12-29)
|
||||
==================
|
||||
|
||||
* Support JPEG 2000 for HN.
|
||||
* Handle missing but referenced root object.
|
||||
* Handle HN with more than one image per page.
|
||||
* Fix buffer overflow.
|
||||
|
||||
0.2.1 (2022-12-26)
|
||||
==================
|
||||
|
||||
* Handle different JPEG colour component.
|
||||
* Handle headless HN and page with no image.
|
||||
|
||||
0.2.0 (2022-12-22)
|
||||
==================
|
||||
|
|
19
README.md
19
README.md
|
@ -9,16 +9,15 @@ Development
|
|||
Currently, CAJ, KDH, and HN can be converted. Please report
|
||||
any failures with a sample that can reproduce the behaviour.
|
||||
|
||||
HN support does not support JPEG 2000 yet.
|
||||
|
||||
Dependency
|
||||
----------
|
||||
|
||||
1. OpenSSL
|
||||
2. libiconv
|
||||
3. zlib
|
||||
4. jbig2dec
|
||||
5. libjpeg-turbo
|
||||
1. libcrypto (OpenSSL)
|
||||
2. zlib
|
||||
3. jbig2dec
|
||||
4. libjpeg-turbo
|
||||
5. openjpeg
|
||||
6. pkgconf
|
||||
|
||||
Usage
|
||||
=====
|
||||
|
@ -36,12 +35,12 @@ Options
|
|||
Specify output file
|
||||
|
||||
-b, --buffer
|
||||
Set buffer size (default 512k)
|
||||
Set input buffer size (default 512k)
|
||||
|
||||
-v, --verbose
|
||||
Print more information (twice for even more, three times for HN image decoding information as well)
|
||||
Print more information (twice for even more, three times for HN image processing information as well)
|
||||
|
||||
Thanks
|
||||
======
|
||||
|
||||
This project is inspired by [https://github.com/JeziL/caj2pdf](https://github.com/JeziL/caj2pdf)
|
||||
This project is inspired by [https://github.com/caj2pdf/caj2pdf](https://github.com/caj2pdf/caj2pdf)
|
||||
|
|
17
src/Makefile
17
src/Makefile
|
@ -4,23 +4,28 @@
|
|||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
src = melon.c iconv.c zlib.c jbig.c jbig2.c jpeg.c \
|
||||
cnki_caj.c cnki_hn.c cnki_kdh.c cnki_outline_tree.c \
|
||||
cnki_pdf.c cnki_zlib.c cnki_jbig.c cnki_jbig2.c cnki.c \
|
||||
src = melon.c iconv.c zlib.c jbig2.c jpeg.c jp2.c md5.c \
|
||||
cnki_caj.c cnki_hn.c cnki_kdh.c cnki_outline_tree.c cnki_pdf.c \
|
||||
cnki_zlib.c cnki_jbig.c cnki_jbig_dec.c cnki_jbig2.c cnki.c \
|
||||
pdf_cnki.c pdf_get.c pdf_parser.c pdf_writer.c pdf.c
|
||||
inc = extern.h version.h iconv.h zlib.h jbig.h jbig2.h jpeg.h \
|
||||
cnki.h pdf_cnki.h cnki_jbig.h pdf.h
|
||||
inc = extern.h version.h iconv.h zlib.h jbig2.h jpeg.h jp2.h md5.h \
|
||||
cnki.h pdf_cnki.h cnki_jbig.h cnki_jbig_dec.h pdf.h
|
||||
|
||||
obj = ${src:.c=.o}
|
||||
|
||||
PREFIX = /usr/local
|
||||
|
||||
CFLAGS = -O2 -pipe -flto -Wall -Wextra
|
||||
LDFLAGS = -Wl,-O2 -lcrypto -liconv -lz -ljbig2dec -ljpeg -Wl,--as-needed
|
||||
LDFLAGS = -Wl,-O2 -lcrypto -lz -ljbig2dec -ljpeg -lopenjp2 -Wl,--as-needed
|
||||
|
||||
CFLAGS += -I/usr/local/include
|
||||
LDFLAGS += -L/usr/local/lib
|
||||
|
||||
OPENJPEG_CFLAGS != pkgconf --cflags libopenjp2
|
||||
|
||||
CFLAGS += ${OPENJPEG_CFLAGS}
|
||||
CFLAGS += -DLIBICONV_PLUG
|
||||
|
||||
all: ${obj} ${inc}
|
||||
${CC} ${LDFLAGS} -o melon ${obj}
|
||||
|
||||
|
|
31
src/cnki.c
31
src/cnki.c
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
@ -54,6 +54,11 @@ cnki_destroy(cnki_t **param)
|
|||
object_hn_t *ptr_hn;
|
||||
while ((ptr_hn = (*param)->object_hn) != NULL) {
|
||||
(*param)->object_hn = (*param)->object_hn->next;
|
||||
free(ptr_hn->text);
|
||||
if (ptr_hn->image_data != NULL)
|
||||
for (int i = 0; i < ptr_hn->image_length; i++)
|
||||
free(ptr_hn->image_data[i].image);
|
||||
free(ptr_hn->image_data);
|
||||
free(ptr_hn);
|
||||
}
|
||||
|
||||
|
@ -71,12 +76,19 @@ cnki_info(cnki_t **param)
|
|||
printf("Reading file header at 0x%x\n", ADDRESS_HEAD);
|
||||
|
||||
int addr[2];
|
||||
unsigned char str[2];
|
||||
|
||||
fseek((*param)->fp_i, ADDRESS_HEAD, SEEK_SET);
|
||||
fread((*param)->file_stat->type, 4, 1, (*param)->fp_i);
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("File type is '%s'\n", (*param)->file_stat->type);
|
||||
fread(str, 2, 1, (*param)->fp_i);
|
||||
|
||||
if ((*param)->stat > 0) {
|
||||
if ((unsigned char) (*param)->file_stat->type[0] > 0x7f)
|
||||
printf("File type is '%02x'\n", (unsigned char) (*param)->file_stat->type[0]);
|
||||
else
|
||||
printf("File type is '%s'\n", (*param)->file_stat->type);
|
||||
}
|
||||
|
||||
if (strncmp((*param)->file_stat->type, "%PDF", 4) == 0) {
|
||||
return 0;
|
||||
|
@ -86,6 +98,9 @@ cnki_info(cnki_t **param)
|
|||
} else if (strncmp((*param)->file_stat->type, "HN", 2) == 0) {
|
||||
addr[0] = ADDRESS_HN_PAGE;
|
||||
addr[1] = ADDRESS_HN_OUTLINE;
|
||||
} else if ((unsigned char) (*param)->file_stat->type[0] == 0xc8) {
|
||||
addr[0] = ADDRESS_C8_PAGE;
|
||||
addr[1] = ADDRESS_HN_OUTLINE;
|
||||
} else if (strncmp((*param)->file_stat->type, "KDH ", 4) == 0) {
|
||||
return 0;
|
||||
} else {
|
||||
|
@ -102,6 +117,14 @@ cnki_info(cnki_t **param)
|
|||
printf("Advised %d page(s)\n",
|
||||
(*param)->file_stat->page);
|
||||
|
||||
if (strncmp((*param)->file_stat->type, "HN", 2) == 0 && str[0] == 0xc8 && str[1] == 0x00) {
|
||||
fseek((*param)->fp_i, 0xd8, SEEK_SET);
|
||||
return 0;
|
||||
} else if ((unsigned char) (*param)->file_stat->type[0] == 0xc8) {
|
||||
fseek((*param)->fp_i, 0x50, SEEK_SET);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Reading outline count at 0x%x\n", addr[1]);
|
||||
|
||||
|
@ -115,7 +138,7 @@ cnki_info(cnki_t **param)
|
|||
if ((*param)->file_stat->outline > 0) {
|
||||
if ((*param)->stat > 1) {
|
||||
printf("Loading outline(s)\n");
|
||||
printf("\t%16s\t%-24s\t%12s\t%12s\t%5s\n",
|
||||
printf("\t%19s\t%-24s\t%12s\t%12s\t%5s\n",
|
||||
"title",
|
||||
"hierarchy",
|
||||
"page",
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
#define ADDRESS_HN_PAGE 0x0090
|
||||
#define ADDRESS_HN_OUTLINE 0x0158
|
||||
|
||||
#define ADDRESS_C8_PAGE 0x0008
|
||||
|
||||
#define ADDRESS_KDH_BODY 0x00fe
|
||||
|
||||
#define KEY_KDH "FZHMEI"
|
||||
|
@ -56,6 +58,10 @@ typedef struct _hn_image_t {
|
|||
int32_t format; /* hn_code */
|
||||
int32_t address;
|
||||
int32_t size;
|
||||
uint16_t x;
|
||||
uint16_t y;
|
||||
uint16_t w;
|
||||
uint16_t h;
|
||||
char *image;
|
||||
} hn_image_t;
|
||||
|
||||
|
@ -64,7 +70,8 @@ typedef struct _object_hn_t {
|
|||
int32_t text_size;
|
||||
int16_t image_length;
|
||||
int16_t page;
|
||||
int32_t unknown[2]; /* TODO: what is it? */
|
||||
int32_t unknown; /* TODO: what is it? */
|
||||
int32_t address_next;
|
||||
char *text;
|
||||
struct _hn_image_t *image_data;
|
||||
struct _object_hn_t *next;
|
||||
|
|
112
src/cnki_hn.c
112
src/cnki_hn.c
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
@ -27,12 +27,13 @@ cnki_hn(cnki_t **param)
|
|||
|
||||
if ((*param)->stat > 1) {
|
||||
printf("Loading page(s)\n");
|
||||
printf("\t%8s\t%8s\t%6s\t%4s\t%16s\t%4s\t%8s\t%8s\n",
|
||||
printf("\t%8s\t%8s\t%6s\t%4s\t%8s\t%8s\t%4s\t%8s\t%8s\n",
|
||||
"address",
|
||||
"text",
|
||||
"length",
|
||||
"page",
|
||||
"unknown",
|
||||
"next",
|
||||
"code",
|
||||
"address",
|
||||
"image");
|
||||
|
@ -44,7 +45,8 @@ cnki_hn(cnki_t **param)
|
|||
fread(&ptr->text_size, 4, 1, (*param)->fp_i);
|
||||
fread(&ptr->image_length, 2, 1, (*param)->fp_i);
|
||||
fread(&ptr->page, 2, 1, (*param)->fp_i);
|
||||
fread(&ptr->unknown, 8, 1, (*param)->fp_i);
|
||||
fread(&ptr->unknown, 4, 1, (*param)->fp_i);
|
||||
fread(&ptr->address_next, 4, 1, (*param)->fp_i);
|
||||
|
||||
ptr->text = NULL;
|
||||
ptr->image_data = NULL;
|
||||
|
@ -62,66 +64,80 @@ cnki_hn(cnki_t **param)
|
|||
|
||||
ptr = (*param)->object_hn;
|
||||
while (ptr != NULL) {
|
||||
ptr->text = malloc(ptr->text_size);
|
||||
if (ptr->text_size > 0) {
|
||||
ptr->text = malloc(ptr->text_size);
|
||||
|
||||
if (ptr->text == NULL)
|
||||
return 1;
|
||||
if (ptr->text == NULL)
|
||||
return 1;
|
||||
|
||||
fseek((*param)->fp_i, ptr->address, SEEK_SET);
|
||||
fread(ptr->text, ptr->text_size, 1, (*param)->fp_i);
|
||||
fseek((*param)->fp_i, ptr->address, SEEK_SET);
|
||||
fread(ptr->text, ptr->text_size, 1, (*param)->fp_i);
|
||||
}
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("\t%08x\t%8d\t%6d\t%4d\t{%4d, %8d}",
|
||||
printf("\t%08x\t%8d\t%6d\t%4d\t%8d\t%08x",
|
||||
ptr->address,
|
||||
ptr->text_size,
|
||||
ptr->image_length,
|
||||
ptr->page,
|
||||
ptr->unknown[0],
|
||||
ptr->unknown[1]);
|
||||
ptr->unknown,
|
||||
ptr->address_next);
|
||||
|
||||
ptr->image_data = malloc(ptr->image_length * sizeof(hn_image_t));
|
||||
if (ptr->image_length > 0) {
|
||||
ptr->image_data = malloc(ptr->image_length * sizeof(hn_image_t));
|
||||
|
||||
if (ptr->image_data == NULL)
|
||||
return 1;
|
||||
|
||||
for (int i = 0; i < ptr->image_length; i++) {
|
||||
fread(&ptr->image_data[i].format, 4, 1, (*param)->fp_i);
|
||||
fread(&ptr->image_data[i].address, 4, 1, (*param)->fp_i);
|
||||
fread(&ptr->image_data[i].size, 4, 1, (*param)->fp_i);
|
||||
fseek((*param)->fp_i,
|
||||
ptr->image_data[i].address + ptr->image_data[i].size,
|
||||
SEEK_SET);
|
||||
}
|
||||
|
||||
for (int i = 0; i < ptr->image_length; i++) {
|
||||
ptr->image_data[i].image = malloc(ptr->image_data[i].size);
|
||||
|
||||
if (ptr->image_data[i].image == NULL)
|
||||
if (ptr->image_data == NULL)
|
||||
return 1;
|
||||
|
||||
fseek((*param)->fp_i, ptr->image_data[i].address, SEEK_SET);
|
||||
fread(ptr->image_data[i].image,
|
||||
ptr->image_data[i].size, 1,
|
||||
(*param)->fp_i);
|
||||
for (int i = 0; i < ptr->image_length; i++) {
|
||||
fread(&ptr->image_data[i].format, 4, 1, (*param)->fp_i);
|
||||
fread(&ptr->image_data[i].address, 4, 1, (*param)->fp_i);
|
||||
fread(&ptr->image_data[i].size, 4, 1, (*param)->fp_i);
|
||||
ptr->image_data[i].x = 0;
|
||||
ptr->image_data[i].y = 0;
|
||||
ptr->image_data[i].w = 0;
|
||||
ptr->image_data[i].h = 0;
|
||||
fseek((*param)->fp_i,
|
||||
ptr->image_data[i].address + ptr->image_data[i].size,
|
||||
SEEK_SET);
|
||||
}
|
||||
|
||||
if ((*param)->stat > 1) {
|
||||
if (i == 0) {
|
||||
printf("\t%4d\t%08x\t%8d\n",
|
||||
ptr->image_data[i].format,
|
||||
ptr->image_data[i].address,
|
||||
ptr->image_data[i].size);
|
||||
} else {
|
||||
printf("\t%8s\t%8s\t%6s\t%4s\t%16s\t%4d\t%08x\t%8d\n",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
ptr->image_data[i].format,
|
||||
ptr->image_data[i].address,
|
||||
ptr->image_data[i].size);
|
||||
for (int i = 0; i < ptr->image_length; i++) {
|
||||
ptr->image_data[i].image = malloc(ptr->image_data[i].size);
|
||||
|
||||
if (ptr->image_data[i].image == NULL)
|
||||
return 1;
|
||||
|
||||
fseek((*param)->fp_i, ptr->image_data[i].address, SEEK_SET);
|
||||
fread(ptr->image_data[i].image,
|
||||
ptr->image_data[i].size, 1,
|
||||
(*param)->fp_i);
|
||||
|
||||
if ((*param)->stat > 1) {
|
||||
if (i == 0) {
|
||||
printf("\t%4d\t%08x\t%8d\n",
|
||||
ptr->image_data[i].format,
|
||||
ptr->image_data[i].address,
|
||||
ptr->image_data[i].size);
|
||||
} else {
|
||||
printf("\t%8s\t%8s\t%6s\t%4s\t%8s\t%8s\t%4d\t%08x\t%8d\n",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
ptr->image_data[i].format,
|
||||
ptr->image_data[i].address,
|
||||
ptr->image_data[i].size);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if ((*param)->stat > 1) {
|
||||
printf("\t%4s\t%8s\t%8s\n",
|
||||
"",
|
||||
"",
|
||||
"");
|
||||
}
|
||||
|
||||
ptr = ptr->next;
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
#include <string.h>
|
||||
|
||||
#include "cnki_jbig.h"
|
||||
#include "jbig.h"
|
||||
#include "cnki_jbig_dec.h"
|
||||
|
||||
int
|
||||
cnki_jbig(char **bitmap, int *bitmap_size,
|
||||
|
|
|
@ -27,8 +27,8 @@ typedef struct _dib_t {
|
|||
uint16_t depth;
|
||||
uint32_t compression; /* dib_compression_code */
|
||||
uint32_t size;
|
||||
uint32_t resolution_h;
|
||||
uint32_t resolution_v;
|
||||
int32_t resolution_h;
|
||||
int32_t resolution_v;
|
||||
uint32_t colour;
|
||||
uint32_t colour_used;
|
||||
} dib_t;
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
static const uint16_t _LSZ[256] = {
|
||||
static const uint16_t _LSZ[0x71] = {
|
||||
0x5a1d,
|
||||
0x2586, 0x1114, 0x080b, 0x03d8, 0x01da, 0x00e5, 0x006f, 0x0036,
|
||||
0x001a, 0x000d, 0x0006, 0x0003, 0x0001, 0x5a7f, 0x3f25, 0x2cf2,
|
||||
|
@ -28,7 +28,7 @@ static const uint16_t _LSZ[256] = {
|
|||
0x5627, 0x50e7, 0x4b85, 0x5597, 0x504f, 0x5a10, 0x5522, 0x59eb
|
||||
};
|
||||
|
||||
static const uint8_t _NLPS[256] = {
|
||||
static const uint8_t _NLPS[0x71] = {
|
||||
1,
|
||||
14, 16, 18, 20, 23, 25, 28, 30,
|
||||
33, 35, 9, 10, 12, 15, 36, 38,
|
||||
|
@ -47,7 +47,7 @@ static const uint8_t _NLPS[256] = {
|
|||
105, 108, 109, 110, 111, 110, 112, 112
|
||||
};
|
||||
|
||||
static const uint8_t _NMPS[256] = {
|
||||
static const uint8_t _NMPS[0x71] = {
|
||||
1,
|
||||
2, 3, 4, 5, 6, 7, 8, 9,
|
||||
10, 11, 12, 13, 13, 15, 16, 17,
|
||||
|
@ -66,7 +66,7 @@ static const uint8_t _NMPS[256] = {
|
|||
106, 107, 103, 109, 107, 111, 109, 111
|
||||
};
|
||||
|
||||
static const bool _SWTCH[256] = {
|
||||
static const bool _SWTCH[0x71] = {
|
||||
1,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 1, 0, 0,
|
||||
|
@ -99,7 +99,7 @@ static int _height;
|
|||
static int _width_padded;
|
||||
|
||||
static int _ret_pos;
|
||||
static char *_ret;
|
||||
static unsigned char *_ret;
|
||||
|
||||
static int _scd_size;
|
||||
static unsigned char *_scd;
|
||||
|
@ -108,7 +108,7 @@ static void
|
|||
_bytein(void)
|
||||
{
|
||||
if (_ret_pos < _scd_size)
|
||||
_reg_c += *(_scd + _ret_pos++) << 8;
|
||||
_reg_c += _scd[_ret_pos++] << 8;
|
||||
|
||||
_ct = 8;
|
||||
}
|
||||
|
@ -207,7 +207,18 @@ static void
|
|||
_procline(int line, char *a, char *b, char *c)
|
||||
{
|
||||
/* The encoder must be erroneous */
|
||||
uint16_t cx = (*b & 0x01) << 2;
|
||||
uint16_t cx = 0;
|
||||
|
||||
if (line > 0) {
|
||||
cx += (_ret[_width_padded * (_height - line)] & 0x20) << 2;
|
||||
cx += _ret[_width_padded * (_height - line)] & 0x40;
|
||||
cx += (_ret[_width_padded * (_height - line)] & 0x80) >> 2;
|
||||
}
|
||||
|
||||
if (line > 1) {
|
||||
cx += (_ret[_width_padded * (_height - line + 1)] & 0x40) >> 4;
|
||||
cx += (_ret[_width_padded * (_height - line + 1)] & 0x80) >> 6;
|
||||
}
|
||||
|
||||
for (int i = 0; i < _width; i++) {
|
||||
_decode(cx);
|
||||
|
@ -215,19 +226,19 @@ _procline(int line, char *a, char *b, char *c)
|
|||
cx >>= 1;
|
||||
|
||||
if (_pix == 1) {
|
||||
*(_ret + _width_padded * (_height - line - 1) + i / 8) |= _pix << (7 - (i & 0x07));
|
||||
*(c + i) = 1;
|
||||
_ret[_width_padded * (_height - line - 1) + i / 8] |= _pix << (7 - (i & 0x07));
|
||||
c[i] = 1;
|
||||
cx |= 0x0200;
|
||||
} else {
|
||||
cx &= 0xfdff;
|
||||
}
|
||||
|
||||
if (i + 2 < _width && *(a + i + 2) == 1)
|
||||
if (i + 2 < _width && a[i + 2] == 1)
|
||||
cx |= 0x0004;
|
||||
else
|
||||
cx &= 0xfffb;
|
||||
|
||||
if (i + 3 < _width && *(b + i + 3) == 1)
|
||||
if (i + 3 < _width && b[i + 3] == 1)
|
||||
cx |= 0x0080;
|
||||
else
|
||||
cx &= 0xff7f;
|
||||
|
@ -293,7 +304,7 @@ strdec_jbig(char **bitmap, int width, int height,
|
|||
memset(*bitmap, 0, _height * _width_padded);
|
||||
|
||||
_ret_pos = 0;
|
||||
_ret = *bitmap;
|
||||
_ret = (unsigned char *) *bitmap;
|
||||
|
||||
_scd_size = jbig_size;
|
||||
_scd = (unsigned char *) jbig;
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2023, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
@ -15,16 +15,18 @@ cnki_kdh(cnki_t **param)
|
|||
if ((*param)->stat > 0)
|
||||
printf("Begin 'KDH' decryption\n");
|
||||
|
||||
long cur = ADDRESS_KDH_BODY;
|
||||
long end;
|
||||
|
||||
fseek((*param)->fp_i, 0, SEEK_END);
|
||||
|
||||
long size = ftell((*param)->fp_i);
|
||||
|
||||
fseek((*param)->fp_i, ADDRESS_KDH_BODY, SEEK_SET);
|
||||
end = ftell((*param)->fp_i);
|
||||
fseek((*param)->fp_i, cur, SEEK_SET);
|
||||
|
||||
const char key[] = KEY_KDH;
|
||||
const int key_len = KEY_KDH_LENGTH;
|
||||
long key_cur = 0;
|
||||
|
||||
int buf_size;
|
||||
char buf[(*param)->size_buf];
|
||||
|
||||
FILE *tmp = tmpfile();
|
||||
|
@ -33,32 +35,32 @@ cnki_kdh(cnki_t **param)
|
|||
return 1;
|
||||
|
||||
for (;;) {
|
||||
fread(buf, (*param)->size_buf, 1, (*param)->fp_i);
|
||||
if (cur + (*param)->size_buf < end)
|
||||
buf_size = (*param)->size_buf;
|
||||
else
|
||||
buf_size = end - cur;
|
||||
|
||||
for (int i = 0; i < (*param)->size_buf; i++) {
|
||||
buf[i] ^= key[key_cur % key_len];
|
||||
key_cur++;
|
||||
}
|
||||
fread(buf, buf_size, 1, (*param)->fp_i);
|
||||
|
||||
fwrite(buf, (*param)->size_buf, 1, tmp);
|
||||
for (int i = 0; i < buf_size; i++)
|
||||
buf[i] ^= key[key_cur++ % key_len];
|
||||
|
||||
if (ftell((*param)->fp_i) == size)
|
||||
fwrite(buf, buf_size, 1, tmp);
|
||||
|
||||
if ((cur = ftell((*param)->fp_i)) >= end)
|
||||
break;
|
||||
}
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Decrypted %ld byte(s)\n", ftell(tmp));
|
||||
|
||||
fseek(tmp, 0, SEEK_SET);
|
||||
fclose((*param)->fp_i);
|
||||
|
||||
FILE *orig = (*param)->fp_i;
|
||||
fseek(tmp, 0, SEEK_SET);
|
||||
(*param)->fp_i = tmp;
|
||||
|
||||
cnki_pdf(param);
|
||||
|
||||
(*param)->fp_i = orig;
|
||||
fclose(tmp);
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Conversion ended\n");
|
||||
|
||||
|
|
793
src/cnki_pdf.c
793
src/cnki_pdf.c
File diff suppressed because it is too large
Load diff
|
@ -13,12 +13,17 @@ int
|
|||
cnki_zlib(char **dst, int *dst_size,
|
||||
const char * restrict src, int src_size)
|
||||
{
|
||||
uint8_t padding = 0;
|
||||
int32_t size;
|
||||
memcpy(&size, src + 20, 4);
|
||||
|
||||
if (strncmp(src + 8, "COMPRESSTEXT", 12) == 0)
|
||||
padding = 8;
|
||||
|
||||
memcpy(&size, src + 12 + padding, 4);
|
||||
|
||||
*dst_size = size;
|
||||
|
||||
if (strinflate(dst, size, src + 24, src_size - 24) != 0)
|
||||
if (strinflate(dst, size, src + 16 + padding, src_size - 16 - padding) != 0)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2022, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2022-2023, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
@ -31,5 +31,6 @@ strdec_jbig2(char **bitmap,
|
|||
}
|
||||
|
||||
jbig2_release_page(ctx, image);
|
||||
jbig2_ctx_free(ctx);
|
||||
return 0;
|
||||
}
|
||||
|
|
115
src/jp2.c
Normal file
115
src/jp2.c
Normal file
|
@ -0,0 +1,115 @@
|
|||
/*
|
||||
* Copyright (c) 2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include <openjpeg.h>
|
||||
|
||||
typedef struct _stream_user_data {
|
||||
OPJ_SIZE_T pos;
|
||||
OPJ_SIZE_T size;
|
||||
const unsigned char *data;
|
||||
} stream_user_data;
|
||||
|
||||
static OPJ_SIZE_T
|
||||
_opj_stream_read(void *p_buffer, OPJ_SIZE_T p_nb_bytes, void *p_user_data)
|
||||
{
|
||||
stream_user_data *d = (stream_user_data *) p_user_data;
|
||||
|
||||
if (d->pos >= d->size)
|
||||
return (OPJ_SIZE_T) - 1;
|
||||
|
||||
OPJ_SIZE_T ret_size = p_nb_bytes;
|
||||
|
||||
if (d->pos + ret_size > d->size)
|
||||
ret_size = d->size - d->pos;
|
||||
|
||||
memcpy(p_buffer, d->data + d->pos, ret_size);
|
||||
|
||||
d->pos += ret_size;
|
||||
|
||||
return ret_size;
|
||||
}
|
||||
|
||||
static OPJ_OFF_T
|
||||
_opj_stream_skip(OPJ_OFF_T p_nb_bytes, void *p_user_data)
|
||||
{
|
||||
stream_user_data *d = (stream_user_data *) p_user_data;
|
||||
|
||||
if (d->pos + p_nb_bytes <= d->size)
|
||||
d->pos += p_nb_bytes;
|
||||
else
|
||||
d->pos = d->size;
|
||||
|
||||
return d->pos;
|
||||
}
|
||||
|
||||
static OPJ_BOOL
|
||||
_opj_stream_seek(OPJ_OFF_T p_nb_bytes, void *p_user_data)
|
||||
{
|
||||
stream_user_data *d = (stream_user_data *) p_user_data;
|
||||
|
||||
if (p_nb_bytes <= (OPJ_OFF_T) d->size) {
|
||||
d->pos = p_nb_bytes;
|
||||
return OPJ_TRUE;
|
||||
}
|
||||
|
||||
return OPJ_FALSE;
|
||||
}
|
||||
|
||||
int
|
||||
strinfo_jp2_dim(int *jp2_width, int *jp2_height,
|
||||
const char * restrict data, int data_size)
|
||||
{
|
||||
opj_codec_t *codec;
|
||||
opj_dparameters_t param;
|
||||
opj_stream_t *stream;
|
||||
opj_image_t *image;
|
||||
stream_user_data d;
|
||||
|
||||
if (data_size < 2)
|
||||
return 1;
|
||||
|
||||
opj_set_default_decoder_parameters(¶m);
|
||||
|
||||
if ((unsigned char) data[0] == 0xff && (unsigned char) data[1] == 0x4f)
|
||||
codec = opj_create_decompress(OPJ_CODEC_J2K);
|
||||
else
|
||||
codec = opj_create_decompress(OPJ_CODEC_JP2);
|
||||
|
||||
if (!opj_setup_decoder(codec, ¶m)) {
|
||||
opj_destroy_codec(codec);
|
||||
return 1;
|
||||
}
|
||||
|
||||
stream = opj_stream_default_create(OPJ_TRUE);
|
||||
|
||||
d.pos = 0;
|
||||
d.size = data_size;
|
||||
d.data = (unsigned char *) data;
|
||||
|
||||
opj_stream_set_read_function(stream, _opj_stream_read);
|
||||
opj_stream_set_skip_function(stream, _opj_stream_skip);
|
||||
opj_stream_set_seek_function(stream, _opj_stream_seek);
|
||||
opj_stream_set_user_data(stream, &d, NULL);
|
||||
opj_stream_set_user_data_length(stream, data_size);
|
||||
|
||||
if (!opj_read_header(stream, codec, &image)) {
|
||||
opj_destroy_codec(codec);
|
||||
opj_stream_destroy(stream);
|
||||
return 1;
|
||||
}
|
||||
|
||||
opj_destroy_codec(codec);
|
||||
opj_stream_destroy(stream);
|
||||
|
||||
*jp2_width = image->x1 - image->x0;
|
||||
*jp2_height = image->y1 - image->y0;
|
||||
|
||||
opj_image_destroy(image);
|
||||
|
||||
return 0;
|
||||
}
|
8
src/jp2.h
Normal file
8
src/jp2.h
Normal file
|
@ -0,0 +1,8 @@
|
|||
/*
|
||||
* Copyright (c) 2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
int strinfo_jp2_dim(int *jp2_width, int *jp2_height,
|
||||
const char * restrict data, int data_size);
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
@ -9,7 +9,7 @@
|
|||
#include <jpeglib.h>
|
||||
|
||||
int
|
||||
strinfo_jpeg_dim(int *jpeg_width, int *jpeg_height,
|
||||
strinfo_jpeg_dim(int *jpeg_width, int *jpeg_height, int *jpeg_components,
|
||||
const char * restrict data, int data_size)
|
||||
{
|
||||
struct jpeg_decompress_struct cinfo;
|
||||
|
@ -27,6 +27,7 @@ strinfo_jpeg_dim(int *jpeg_width, int *jpeg_height,
|
|||
|
||||
*jpeg_width = cinfo.output_width;
|
||||
*jpeg_height = cinfo.output_height;
|
||||
*jpeg_components = cinfo.output_components;
|
||||
|
||||
jpeg_destroy((struct jpeg_common_struct *) &cinfo);
|
||||
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
int strinfo_jpeg_dim(int *jpeg_width, int *jpeg_height,
|
||||
int strinfo_jpeg_dim(int *jpeg_width, int *jpeg_height, int *jpeg_components,
|
||||
const char * restrict data, int data_size);
|
||||
|
|
24
src/md5.c
Normal file
24
src/md5.c
Normal file
|
@ -0,0 +1,24 @@
|
|||
/*
|
||||
* Copyright (c) 2023, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <openssl/md5.h>
|
||||
|
||||
int
|
||||
strmd5(unsigned char **dst, int *dst_size,
|
||||
const unsigned char * restrict src, int src_size)
|
||||
{
|
||||
*dst_size = MD5_DIGEST_LENGTH;
|
||||
*dst = malloc(*dst_size);
|
||||
|
||||
if (*dst == NULL)
|
||||
return 1;
|
||||
|
||||
MD5(src, src_size, *dst);
|
||||
|
||||
return 0;
|
||||
}
|
9
src/md5.h
Normal file
9
src/md5.h
Normal file
|
@ -0,0 +1,9 @@
|
|||
/*
|
||||
* Copyright (c) 2023, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
int
|
||||
strmd5(unsigned char **dst, int *dst_size,
|
||||
const unsigned char * restrict src, int src_size);
|
|
@ -98,7 +98,8 @@ main(int argc, char **argv)
|
|||
strerror(errno));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
} else if (strncmp(param->file_stat->type, "HN", 2) == 0) {
|
||||
} else if (strncmp(param->file_stat->type, "HN", 2) == 0 ||
|
||||
(unsigned char) param->file_stat->type[0] == 0xc8) {
|
||||
if (cnki_hn(¶m) != 0) {
|
||||
fprintf(stderr, "%s: %s\n", argv[0],
|
||||
strerror(errno));
|
||||
|
|
|
@ -89,7 +89,7 @@ pdf_get_free_id(pdf_object_t **pdf)
|
|||
|
||||
int id = 0;
|
||||
|
||||
for (int i = 1; i < 99999999; i++) {
|
||||
for (int i = 1; i < 100000000; i++) {
|
||||
ptr = (*pdf)->next;
|
||||
while (ptr != NULL) {
|
||||
if (ptr->id == i) {
|
||||
|
@ -123,7 +123,7 @@ pdf_get_free_ids(pdf_object_t **pdf, int **ids, int count)
|
|||
int id = 0;
|
||||
|
||||
pdf_object_t *ptr;
|
||||
for (int i = 1; i < 99999999; i++) {
|
||||
for (int i = 1; i < 100000000; i++) {
|
||||
ptr = (*pdf)->next;
|
||||
while (ptr != NULL) {
|
||||
if (ptr->id == i) {
|
||||
|
|
200
src/pdf_parser.c
200
src/pdf_parser.c
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2023, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
@ -19,26 +19,35 @@ static void *
|
|||
_memmem_whitespace(const void *p0, size_t s0, const void *p1, size_t s1)
|
||||
{
|
||||
const char whitespace[6] = {
|
||||
'\r',
|
||||
'\n',
|
||||
'\f',
|
||||
'\t',
|
||||
'\0',
|
||||
' '
|
||||
0x00,
|
||||
0x09,
|
||||
0x0a,
|
||||
0x0c,
|
||||
0x0d,
|
||||
0x20
|
||||
};
|
||||
|
||||
char tmp[s1 + 1];
|
||||
memcpy(tmp, p1, s1);
|
||||
char *ret = NULL;
|
||||
|
||||
char *ret;
|
||||
char str[s1 + 1];
|
||||
memcpy(str, p1, s1);
|
||||
|
||||
size_t tmp_size = 0;
|
||||
char *tmp;
|
||||
|
||||
for (int i = 0; i < 6; i++) {
|
||||
tmp[s1] = whitespace[i];
|
||||
if((ret = memmem(p0, s0, tmp, s1 + 1)) != NULL)
|
||||
return ret;
|
||||
str[s1] = whitespace[i];
|
||||
|
||||
if ((tmp = memmem(p0, s0, str, s1 + 1)) == NULL)
|
||||
continue;
|
||||
|
||||
if (tmp_size == 0 || (size_t) (tmp - (char *) p0) < tmp_size) {
|
||||
tmp_size = tmp - (char *) p0;
|
||||
ret = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -57,23 +66,45 @@ _locate(pdf_object_t **pdf, FILE **fp, int size_buf)
|
|||
end = ftell(*fp);
|
||||
fseek(*fp, cur, SEEK_SET);
|
||||
|
||||
int head = 0;
|
||||
int tail = 0;
|
||||
long head = 0;
|
||||
long tail = 0;
|
||||
char *pos;
|
||||
char *tmp;
|
||||
|
||||
for (;;) {
|
||||
fread(buf, size_buf, 1, *fp);
|
||||
if (cur + size_buf < end) {
|
||||
fread(buf, size_buf, 1, *fp);
|
||||
} else {
|
||||
fread(buf, end - cur, 1, *fp);
|
||||
memset(buf + end - cur, 0, size_buf - end + cur);
|
||||
}
|
||||
|
||||
if (head == 0 && (pos = _memmem_whitespace(buf, size_buf, " 0 obj", 6)) != NULL)
|
||||
head = cur + (pos - buf) + 7;
|
||||
if (head == 0) {
|
||||
/* Hack needed for invalid object */
|
||||
pos = _memmem_whitespace(buf, size_buf, " 0 obj", 6);
|
||||
tmp = memmem(buf, size_buf, " 0 obj", 6);
|
||||
|
||||
while (tmp != NULL && tmp[6] != 0x3c && tmp[6] != 0x5b)
|
||||
tmp = memmem(tmp + 6, size_buf - (tmp - buf) - 6, " 0 obj", 6);
|
||||
|
||||
if (pos != NULL && tmp != NULL) {
|
||||
if (pos - buf < tmp - buf)
|
||||
head = cur + (pos - buf) + 7;
|
||||
else
|
||||
head = cur + (tmp - buf) + 6;
|
||||
} else if (pos != NULL) {
|
||||
head = cur + (pos - buf) + 7;
|
||||
} else if (tmp != NULL) {
|
||||
head = cur + (tmp - buf) + 6;
|
||||
}
|
||||
}
|
||||
|
||||
if (tail == 0 && (pos = _memmem_whitespace(buf, size_buf, "endobj", 6)) != NULL) {
|
||||
/* We need to check if it is the object stored in stream */
|
||||
while (memcmp(pos + 7,
|
||||
"\r\nendstream", 11) == 0 &&
|
||||
(tmp = _memmem_whitespace(pos + 6,
|
||||
size_buf - (pos - buf) - 6,
|
||||
(tmp = _memmem_whitespace(pos + 7,
|
||||
size_buf - (pos - buf) - 7,
|
||||
"endobj", 6)) != NULL)
|
||||
pos = tmp;
|
||||
|
||||
|
@ -102,13 +133,17 @@ _locate(pdf_object_t **pdf, FILE **fp, int size_buf)
|
|||
ptr->address = head;
|
||||
ptr->size = tail - head;
|
||||
|
||||
fseek(*fp, tail + 6, SEEK_SET);
|
||||
fseek(*fp, tail + 7, SEEK_SET);
|
||||
head = tail = 0;
|
||||
} else if (head > 0 && tail > 0) {
|
||||
if (cur + size_buf < end)
|
||||
fseek(*fp, head, SEEK_SET);
|
||||
tail = 0;
|
||||
} else {
|
||||
fseek(*fp, -6, SEEK_CUR);
|
||||
fseek(*fp, -7, SEEK_CUR);
|
||||
}
|
||||
|
||||
if ((cur = ftell(*fp)) + 6 >= end)
|
||||
if ((cur = ftell(*fp)) + 7 >= end)
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -126,6 +161,7 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
|
|||
|
||||
pdf_object_t *ptr = (*pdf)->next;
|
||||
|
||||
char str[8];
|
||||
char *buf;
|
||||
char *head;
|
||||
char *tail;
|
||||
|
@ -137,34 +173,86 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
|
|||
if (buf == NULL)
|
||||
return 1;
|
||||
|
||||
memset(buf, 0, ptr->size);
|
||||
fseek(*fp, ptr->address, SEEK_SET);
|
||||
fread(buf, ptr->size, 1, *fp);
|
||||
|
||||
fseek(*fp, ptr->address - 12, SEEK_SET);
|
||||
fread(buf, 8, 1, *fp);
|
||||
/* Handle incomplete object */
|
||||
head = buf;
|
||||
while ((tmp = _memmem_whitespace(head,
|
||||
ptr->size - (head - buf),
|
||||
" 0 obj", 6)) != NULL)
|
||||
head = tmp + 7;
|
||||
|
||||
/* Hack needed for invalid object */
|
||||
while ((tmp = memmem(head,
|
||||
ptr->size - (head - buf),
|
||||
" 0 obj", 6)) != NULL)
|
||||
head = tmp + 6;
|
||||
|
||||
if (head - buf > 0) {
|
||||
ptr->address += head - buf;
|
||||
ptr->size -= head - buf;
|
||||
|
||||
tmp = realloc(buf, ptr->size);
|
||||
|
||||
if (tmp == NULL)
|
||||
return 1;
|
||||
|
||||
buf = tmp;
|
||||
|
||||
fseek(*fp, ptr->address, SEEK_SET);
|
||||
fread(buf, ptr->size, 1, *fp);
|
||||
}
|
||||
|
||||
/* Hack needed for invalid object */
|
||||
fseek(*fp, ptr->address - 14, SEEK_SET);
|
||||
fread(str, 8, 1, *fp);
|
||||
|
||||
if (str[7] < '0' || str[7] > '9') {
|
||||
fseek(*fp, ptr->address - 15, SEEK_SET);
|
||||
fread(str, 8, 1, *fp);
|
||||
}
|
||||
|
||||
for (int i = 7; i >= 0; i--) {
|
||||
if (str[i] < '0' || str[i] > '9') {
|
||||
if (i < 7)
|
||||
ptr->id = atoi(str + i + 1);
|
||||
else
|
||||
ptr->id = 0;
|
||||
|
||||
for (int i = 0; i < 8; i++) {
|
||||
if (buf[i] >= '0' && buf[i] <= '9') {
|
||||
ptr->id = atoi(buf + i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fseek(*fp, ptr->address, SEEK_SET);
|
||||
fread(buf, ptr->size, 1, *fp);
|
||||
|
||||
if ((head = memmem(buf, ptr->size, "<<", 2)) != NULL &&
|
||||
(tail = _memmem_whitespace(buf, ptr->size, ">>", 2)) != NULL) {
|
||||
/*
|
||||
* A dictionary object may have nested dictionary,
|
||||
* but it should not be in a stream
|
||||
*/
|
||||
while ((tmp = _memmem_whitespace(tail + 2,
|
||||
ptr->size - (tail - buf) - 2,
|
||||
">>", 2)) != NULL &&
|
||||
memmem(tail + 2,
|
||||
ptr->size - (tail - buf) - 2,
|
||||
"stream\r\n", 8) == NULL)
|
||||
tail = tmp;
|
||||
((tail = _memmem_whitespace(buf, ptr->size, ">>", 2)) != NULL ||
|
||||
/* Hack needed for invalid object */
|
||||
(tail = memmem(buf, ptr->size, ">>", 2)) != NULL)) {
|
||||
if (memmem(buf, tail - buf, "stream\r\n", 8) != NULL) {
|
||||
tail = memmem(buf, ptr->size, ">>", 2);
|
||||
|
||||
while (ptr->size - (tail - buf) > 2 &&
|
||||
(tmp = memmem(tail + 2,
|
||||
ptr->size - (tail - buf) - 2,
|
||||
">>", 2)) != NULL &&
|
||||
memmem(tail + 2,
|
||||
(tmp - tail) - 2,
|
||||
"stream\r\n", 8) == NULL)
|
||||
tail = tmp;
|
||||
} else {
|
||||
/*
|
||||
* A dictionary object may have nested dictionary,
|
||||
* but it should not be in a stream
|
||||
*/
|
||||
while (ptr->size - (tail - buf) > 3 &&
|
||||
(tmp = _memmem_whitespace(tail + 3,
|
||||
ptr->size - (tail - buf) - 3,
|
||||
">>", 2)) != NULL &&
|
||||
memmem(tail + 3,
|
||||
(tmp - tail) - 3,
|
||||
"stream\r\n", 8) == NULL)
|
||||
tail = tmp;
|
||||
}
|
||||
|
||||
ptr->dictionary_size = tail - head + 2;
|
||||
ptr->dictionary = malloc(ptr->dictionary_size + 1);
|
||||
|
@ -172,8 +260,8 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
|
|||
if (ptr->dictionary == NULL)
|
||||
return 1;
|
||||
|
||||
memset(ptr->dictionary, 0, ptr->dictionary_size + 1);
|
||||
memcpy(ptr->dictionary, head, ptr->dictionary_size);
|
||||
memset(ptr->dictionary + ptr->dictionary_size, 0, 1);
|
||||
|
||||
if ((head = memmem(tail,
|
||||
ptr->size - (tail - buf),
|
||||
|
@ -186,11 +274,11 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
|
|||
* contains another object that
|
||||
* contains another stream
|
||||
*/
|
||||
while (_memmem_whitespace(tail,
|
||||
ptr->size - (tail - buf),
|
||||
while (_memmem_whitespace(tail + 10,
|
||||
ptr->size - (tail - buf) - 10,
|
||||
"endobj", 6) != NULL &&
|
||||
(tmp = _memmem_whitespace(tail + 9,
|
||||
ptr->size - (tail - buf) - 9,
|
||||
(tmp = _memmem_whitespace(tail + 10,
|
||||
ptr->size - (tail - buf) - 10,
|
||||
"endstream", 9)) != NULL)
|
||||
tail = tmp;
|
||||
|
||||
|
@ -202,19 +290,13 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
|
|||
|
||||
memcpy(ptr->stream, head + 8, ptr->stream_size);
|
||||
}
|
||||
|
||||
free(buf);
|
||||
} else {
|
||||
ptr->object_size = ptr->size;
|
||||
ptr->object = malloc(ptr->object_size + 1);
|
||||
|
||||
if (ptr->object == NULL)
|
||||
return 1;
|
||||
|
||||
memset(ptr->object, 0, ptr->object_size + 1);
|
||||
memcpy(ptr->object, buf, ptr->object_size);
|
||||
ptr->object = buf;
|
||||
}
|
||||
|
||||
free(buf);
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2023, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
@ -8,14 +8,32 @@
|
|||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
#include <openssl/md5.h>
|
||||
|
||||
#include "version.h"
|
||||
#include "md5.h"
|
||||
#include "pdf.h"
|
||||
|
||||
static int
|
||||
_info_obj(pdf_object_t **pdf)
|
||||
{
|
||||
char dictionary[128] = "<<\n"
|
||||
"/Producer (Melon " VERSION "." RELEASE "." PATCH EXTRA ")\n"
|
||||
"/CreationDate (D:";
|
||||
|
||||
char buf[64];
|
||||
|
||||
time_t timestamp = time(NULL);
|
||||
|
||||
strftime(buf, 64, "%Y%m%d%H%M%S", gmtime(×tamp));
|
||||
strcat(dictionary, buf);
|
||||
strcat(dictionary, "+00'00')\n>>");
|
||||
|
||||
return pdf_obj_append(pdf, 0, NULL, dictionary, NULL, 0);
|
||||
}
|
||||
|
||||
int
|
||||
pdf_dump_obj(pdf_object_t **pdf, FILE **fp)
|
||||
{
|
||||
if (*pdf == NULL || *fp == NULL)
|
||||
if (*pdf == NULL || *fp == NULL || _info_obj(pdf) != 0)
|
||||
return 1;
|
||||
|
||||
long cur;
|
||||
|
@ -144,35 +162,28 @@ pdf_dump_trailer(pdf_object_t **pdf, FILE **fp, int xref)
|
|||
buf_size = snprintf(buf, 64, "%lx%x", timestamp, size);
|
||||
#endif
|
||||
|
||||
unsigned char str[64];
|
||||
memcpy(str, buf, 64);
|
||||
int fid_size;
|
||||
unsigned char *fid;
|
||||
|
||||
unsigned char fid[MD5_DIGEST_LENGTH];
|
||||
MD5(str, buf_size, fid);
|
||||
if (strmd5(&fid, &fid_size, (unsigned char *) buf, buf_size) != 0)
|
||||
return 1;
|
||||
|
||||
pdf_object_t *ptr = *pdf;
|
||||
while (ptr->next != NULL)
|
||||
ptr = ptr->next;
|
||||
|
||||
/*
|
||||
* TODO: Document information dictionary
|
||||
* `"/Producer (Melon)"'
|
||||
* `"/CreationDate (D:YYYYMMDDHHmmSS+00'00')"'
|
||||
*
|
||||
* Trailer dictionary
|
||||
* `"/Info %d 0 R"'
|
||||
*/
|
||||
fprintf(*fp,
|
||||
"/Size %d\n/Root %d 0 R\n",
|
||||
"/Size %d\n/Root %d 0 R\n/Info %d 0 R\n",
|
||||
ptr->id + 1,
|
||||
pdf_get_catalog_id(pdf));
|
||||
pdf_get_catalog_id(pdf),
|
||||
ptr->id);
|
||||
|
||||
fputs("/ID [", *fp);
|
||||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
fputs("<", *fp);
|
||||
|
||||
for (int j = 0; j < MD5_DIGEST_LENGTH; j++)
|
||||
for (int j = 0; j < fid_size; j++)
|
||||
fprintf(*fp, "%02x", fid[j]);
|
||||
|
||||
fputs(">", *fp);
|
||||
|
@ -191,5 +202,7 @@ pdf_dump_trailer(pdf_object_t **pdf, FILE **fp, int xref)
|
|||
|
||||
fputs("%%EOF\n", *fp);
|
||||
|
||||
free(fid);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2023, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#define VERSION "0"
|
||||
#define RELEASE "2"
|
||||
#define RELEASE "3"
|
||||
#define PATCH "0"
|
||||
#define EXTRA ""
|
||||
|
|
Loading…
Add table
Reference in a new issue