Compare commits
60 commits
Author | SHA1 | Date | |
---|---|---|---|
2fa2b760ae | |||
dd5854678c | |||
123d62141c | |||
283446dba5 | |||
13cb0a1b8d | |||
a7ecc15614 | |||
56ffe14d5a | |||
c2afbb3cbc | |||
8cd8a8fbba | |||
8276423eb8 | |||
7ac0971a17 | |||
e0fe937e1a | |||
4a02b8bfc7 | |||
7d9d658461 | |||
000405693e | |||
d6fa934b5f | |||
1a1fee1034 | |||
cde014cffb | |||
9019a18449 | |||
a18de8f2ef | |||
70e1e7ea97 | |||
bffb8ce8a4 | |||
3ac51d66b9 | |||
0bbf8e65dd | |||
220a81c2ad | |||
1d899d934d | |||
226f16ddf4 | |||
9646ee61c3 | |||
5466a441df | |||
1ce3f89574 | |||
5a1afb0056 | |||
060bc00a0d | |||
97931e1470 | |||
cd0af5ba3c | |||
988a751c15 | |||
8083b30530 | |||
abce2fd2e4 | |||
224a09a015 | |||
c2ad6549fb | |||
d2826fa075 | |||
288b65a1fd | |||
9c1f1d0b75 | |||
ac3b1dda63 | |||
63728e1340 | |||
3550095959 | |||
86b6487fff | |||
409acceffa | |||
7270c1771f | |||
7a5dd05425 | |||
057a7acc51 | |||
f685e91d35 | |||
2aab394684 | |||
1994f122cc | |||
b20c6ad3ed | |||
3bd7ea7520 | |||
1f62c53da6 | |||
98691d4203 | |||
8d6fbb43c9 | |||
5c5ddc926b | |||
bcb8ef9cd9 |
41 changed files with 2561 additions and 477 deletions
51
CHANGE.md
51
CHANGE.md
|
@ -1,3 +1,54 @@
|
|||
0.3.0 (2023-XX-XX)
|
||||
==================
|
||||
|
||||
* Support HN text overlay.
|
||||
* Support HN page with text.
|
||||
* Handle inaccurate page count in CAJ and KDH.
|
||||
|
||||
0.2.5 (2023-01-05)
|
||||
==================
|
||||
|
||||
* Improve PDF parser.
|
||||
* Handle duplicated object in CAJ.
|
||||
* Handle duplicated image in HN.
|
||||
* Handle incomplete PDF object in CAJ and KDH.
|
||||
* Handle invalid PDF object token in CAJ and KDH.
|
||||
* Fix JBIG decoder.
|
||||
|
||||
0.2.4 (2022-12-31)
|
||||
==================
|
||||
|
||||
* Fix HN image compositing.
|
||||
* Fix PDF object check.
|
||||
|
||||
0.2.3 (2022-12-30)
|
||||
==================
|
||||
|
||||
* Support HN figure placement.
|
||||
|
||||
0.2.2 (2022-12-29)
|
||||
==================
|
||||
|
||||
* Support JPEG 2000 for HN.
|
||||
* Handle missing but referenced root object.
|
||||
* Handle HN with more than one image per page.
|
||||
* Fix buffer overflow.
|
||||
|
||||
0.2.1 (2022-12-26)
|
||||
==================
|
||||
|
||||
* Handle different JPEG colour component.
|
||||
* Handle headless HN and page with no image.
|
||||
|
||||
0.2.0 (2022-12-22)
|
||||
==================
|
||||
|
||||
* KDH conversion now produces a valid PDF
|
||||
* Handle binary data in dictionary.
|
||||
* Add preliminary support for HN
|
||||
* Fix root object dictionary generation when root object has more than two children.
|
||||
* Fix memory leak and data type.
|
||||
|
||||
0.1.0 (2020-04-08)
|
||||
==================
|
||||
|
||||
|
|
20
README.md
20
README.md
|
@ -6,12 +6,18 @@ Melon: Converter that produces PDF from CNKI proprietary formats
|
|||
Development
|
||||
-----------
|
||||
|
||||
Currently, PDF, CAJ, and KDH can be converted. Please report
|
||||
Currently, CAJ, KDH, and HN can be converted. Please report
|
||||
any failures with a sample that can reproduce the behaviour.
|
||||
|
||||
KDH is essentially an invalid PDF file xor'ed with a predetermined key.
|
||||
You may want to convert the decrypted KDH to valid PDF, although some
|
||||
PDF readers can display the invalid PDF.
|
||||
Dependency
|
||||
----------
|
||||
|
||||
1. libcrypto (OpenSSL)
|
||||
2. zlib
|
||||
3. jbig2dec
|
||||
4. libjpeg-turbo
|
||||
5. openjpeg
|
||||
6. pkgconf
|
||||
|
||||
Usage
|
||||
=====
|
||||
|
@ -29,12 +35,12 @@ Options
|
|||
Specify output file
|
||||
|
||||
-b, --buffer
|
||||
Set buffer size (default 512k)
|
||||
Set input buffer size (default 512k)
|
||||
|
||||
-v, --verbose
|
||||
Print more information (twice for even more)
|
||||
Print more information (twice for even more, three times for HN image processing information as well)
|
||||
|
||||
Thanks
|
||||
======
|
||||
|
||||
This project is inspired by [https://github.com/JeziL/caj2pdf](https://github.com/JeziL/caj2pdf)
|
||||
This project is inspired by [https://github.com/caj2pdf/caj2pdf](https://github.com/caj2pdf/caj2pdf)
|
||||
|
|
|
@ -1,28 +0,0 @@
|
|||
#
|
||||
# Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
src != ls *.c
|
||||
obj = ${src:.c=.o}
|
||||
|
||||
PREFIX = /usr/local
|
||||
|
||||
CFLAGS = -O3 -march=native -pipe -Wall
|
||||
LDFLAGS = -Wl,-O3 -lcrypto -Wl,--as-needed
|
||||
|
||||
all: ${obj}
|
||||
${CC} ${LDFLAGS} -o melon $^
|
||||
|
||||
clean:
|
||||
rm -f melon ${obj}
|
||||
|
||||
install:
|
||||
install -d ${PREFIX}/bin
|
||||
install melon ${PREFIX}/bin/
|
||||
|
||||
deinstall:
|
||||
rm -f ${PREFIX}/bin/melon
|
||||
|
||||
.PHONY: all clean install deinstall
|
26
src/Makefile
26
src/Makefile
|
@ -1,19 +1,33 @@
|
|||
#
|
||||
# Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
# Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
src != ls *.c
|
||||
src = melon.c iconv.c zlib.c jbig2.c jpeg.c jp2.c md5.c \
|
||||
cnki_caj.c cnki_hn.c cnki_kdh.c cnki_outline_tree.c cnki_pdf.c \
|
||||
cnki_zlib.c cnki_jbig.c cnki_jbig_dec.c cnki_jbig2.c cnki.c \
|
||||
pdf_cnki.c pdf_get.c pdf_parser.c pdf_writer.c pdf.c
|
||||
inc = extern.h version.h iconv.h zlib.h jbig2.h jpeg.h jp2.h md5.h \
|
||||
cnki.h pdf_cnki.h cnki_jbig.h cnki_jbig_dec.h pdf.h
|
||||
|
||||
obj = ${src:.c=.o}
|
||||
|
||||
PREFIX = /usr/local
|
||||
|
||||
CFLAGS = -O3 -march=native -pipe -flto=thin -Wall
|
||||
LDFLAGS = -Wl,-O3 -lcrypto -Wl,--as-needed
|
||||
CFLAGS = -O2 -pipe -flto -Wall -Wextra
|
||||
LDFLAGS = -Wl,-O2 -lcrypto -lz -ljbig2dec -ljpeg -lopenjp2 -Wl,--as-needed
|
||||
|
||||
all: ${obj}
|
||||
${CC} ${LDFLAGS} -o melon $>
|
||||
CFLAGS += -I/usr/local/include
|
||||
LDFLAGS += -L/usr/local/lib
|
||||
|
||||
OPENJPEG_CFLAGS != pkgconf --cflags libopenjp2
|
||||
|
||||
CFLAGS += ${OPENJPEG_CFLAGS}
|
||||
CFLAGS += -DLIBICONV_PLUG
|
||||
|
||||
all: ${obj} ${inc}
|
||||
${CC} ${LDFLAGS} -o melon ${obj}
|
||||
|
||||
clean:
|
||||
rm -f melon ${obj}
|
||||
|
|
64
src/cnki.c
64
src/cnki.c
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
@ -33,7 +33,7 @@ cnki_create(cnki_t **param)
|
|||
memset((*param)->file_stat, 0, sizeof(file_stat_t));
|
||||
|
||||
(*param)->object_outline = NULL;
|
||||
(*param)->object_nh = NULL;
|
||||
(*param)->object_hn = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -44,10 +44,24 @@ cnki_destroy(cnki_t **param)
|
|||
if (*param != NULL) {
|
||||
if ((*param)->file_stat != NULL)
|
||||
free((*param)->file_stat);
|
||||
if ((*param)->object_outline != NULL)
|
||||
free((*param)->object_outline);
|
||||
if ((*param)->object_nh != NULL)
|
||||
free((*param)->object_nh);
|
||||
|
||||
object_outline_t *ptr_outline;
|
||||
while ((ptr_outline = (*param)->object_outline) != NULL) {
|
||||
(*param)->object_outline = (*param)->object_outline->next;
|
||||
free(ptr_outline);
|
||||
}
|
||||
|
||||
object_hn_t *ptr_hn;
|
||||
while ((ptr_hn = (*param)->object_hn) != NULL) {
|
||||
(*param)->object_hn = (*param)->object_hn->next;
|
||||
free(ptr_hn->text);
|
||||
if (ptr_hn->image_data != NULL)
|
||||
for (int i = 0; i < ptr_hn->image_length; i++)
|
||||
free(ptr_hn->image_data[i].image);
|
||||
free(ptr_hn->image_data);
|
||||
free(ptr_hn);
|
||||
}
|
||||
|
||||
free(*param);
|
||||
}
|
||||
}
|
||||
|
@ -59,32 +73,42 @@ cnki_info(cnki_t **param)
|
|||
return 1;
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Reading file header at %x\n", ADDRESS_HEAD);
|
||||
printf("Reading file header at 0x%x\n", ADDRESS_HEAD);
|
||||
|
||||
int addr[2];
|
||||
unsigned char str[2];
|
||||
|
||||
fseek((*param)->fp_i, ADDRESS_HEAD, SEEK_SET);
|
||||
fread((*param)->file_stat->type, 4, 1, (*param)->fp_i);
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("File type is '%s'\n", (*param)->file_stat->type);
|
||||
fread(str, 2, 1, (*param)->fp_i);
|
||||
|
||||
if (strcmp((*param)->file_stat->type, "%PDF") == 0) {
|
||||
if ((*param)->stat > 0) {
|
||||
if ((unsigned char) (*param)->file_stat->type[0] > 0x7f)
|
||||
printf("File type is '%02x'\n", (unsigned char) (*param)->file_stat->type[0]);
|
||||
else
|
||||
printf("File type is '%s'\n", (*param)->file_stat->type);
|
||||
}
|
||||
|
||||
if (strncmp((*param)->file_stat->type, "%PDF", 4) == 0) {
|
||||
return 0;
|
||||
} else if (strcmp((*param)->file_stat->type, "CAJ") == 0) {
|
||||
} else if (strncmp((*param)->file_stat->type, "CAJ", 3) == 0) {
|
||||
addr[0] = ADDRESS_CAJ_PAGE;
|
||||
addr[1] = ADDRESS_CAJ_OUTLINE;
|
||||
} else if (strcmp((*param)->file_stat->type, "HN") == 0) {
|
||||
} else if (strncmp((*param)->file_stat->type, "HN", 2) == 0) {
|
||||
addr[0] = ADDRESS_HN_PAGE;
|
||||
addr[1] = ADDRESS_HN_OUTLINE;
|
||||
} else if (strcmp((*param)->file_stat->type, "KDH ") == 0) {
|
||||
} else if ((unsigned char) (*param)->file_stat->type[0] == 0xc8) {
|
||||
addr[0] = ADDRESS_C8_PAGE;
|
||||
addr[1] = ADDRESS_HN_OUTLINE;
|
||||
} else if (strncmp((*param)->file_stat->type, "KDH ", 4) == 0) {
|
||||
return 0;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Reading page count at %x\n", addr[0]);
|
||||
printf("Reading page count at 0x%x\n", addr[0]);
|
||||
|
||||
fseek((*param)->fp_i, addr[0], SEEK_SET);
|
||||
fread(&(*param)->file_stat->page, 4, 1, (*param)->fp_i);
|
||||
|
@ -93,8 +117,16 @@ cnki_info(cnki_t **param)
|
|||
printf("Advised %d page(s)\n",
|
||||
(*param)->file_stat->page);
|
||||
|
||||
if (strncmp((*param)->file_stat->type, "HN", 2) == 0 && str[0] == 0xc8 && str[1] == 0x00) {
|
||||
fseek((*param)->fp_i, 0xd8, SEEK_SET);
|
||||
return 0;
|
||||
} else if ((unsigned char) (*param)->file_stat->type[0] == 0xc8) {
|
||||
fseek((*param)->fp_i, 0x50, SEEK_SET);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Reading outline count at %x\n", addr[1]);
|
||||
printf("Reading outline count at 0x%x\n", addr[1]);
|
||||
|
||||
fseek((*param)->fp_i, addr[1], SEEK_SET);
|
||||
fread(&(*param)->file_stat->outline, 4, 1, (*param)->fp_i);
|
||||
|
@ -106,7 +138,7 @@ cnki_info(cnki_t **param)
|
|||
if ((*param)->file_stat->outline > 0) {
|
||||
if ((*param)->stat > 1) {
|
||||
printf("Loading outline(s)\n");
|
||||
printf("\t%16s\t%-24s\t%12s\t%12s\t%5s\n",
|
||||
printf("\t%19s\t%-24s\t%12s\t%12s\t%5s\n",
|
||||
"title",
|
||||
"hierarchy",
|
||||
"page",
|
||||
|
|
60
src/cnki.h
60
src/cnki.h
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
@ -16,6 +16,8 @@
|
|||
#define ADDRESS_HN_PAGE 0x0090
|
||||
#define ADDRESS_HN_OUTLINE 0x0158
|
||||
|
||||
#define ADDRESS_C8_PAGE 0x0008
|
||||
|
||||
#define ADDRESS_KDH_BODY 0x00fe
|
||||
|
||||
#define KEY_KDH "FZHMEI"
|
||||
|
@ -44,26 +46,36 @@ typedef struct _object_outline_tree_t {
|
|||
struct _object_outline_tree_t *right;
|
||||
} object_outline_tree_t;
|
||||
|
||||
typedef enum _nh_code {
|
||||
CCITTFAX,
|
||||
typedef enum _hn_code {
|
||||
JBIG, /* Inverted */
|
||||
DCT_0,
|
||||
DCT_1,
|
||||
DCT_1, /* Inverted */
|
||||
JBIG2,
|
||||
JPX
|
||||
} nh_code;
|
||||
} hn_code;
|
||||
|
||||
typedef struct _object_nh_t {
|
||||
int32_t address; /* Starting at end of object_outline_t */
|
||||
typedef struct _hn_image_t {
|
||||
int32_t format; /* hn_code */
|
||||
int32_t address;
|
||||
int32_t size;
|
||||
int16_t page[2];
|
||||
int32_t zero[2];
|
||||
char *text;
|
||||
int32_t image_format; /* nh_code */
|
||||
int32_t image_address;
|
||||
int32_t image_size;
|
||||
uint16_t x;
|
||||
uint16_t y;
|
||||
uint16_t w;
|
||||
uint16_t h;
|
||||
char *image;
|
||||
struct _object_nh_t *next;
|
||||
} object_nh_t;
|
||||
} hn_image_t;
|
||||
|
||||
typedef struct _object_hn_t {
|
||||
int32_t address; /* Starting at end of object_outline_t */
|
||||
int32_t text_size;
|
||||
int16_t image_length;
|
||||
int16_t page;
|
||||
int32_t unknown; /* TODO: what is it? */
|
||||
int32_t address_next;
|
||||
char *text;
|
||||
struct _hn_image_t *image_data;
|
||||
struct _object_hn_t *next;
|
||||
} object_hn_t;
|
||||
|
||||
typedef struct _cnki_t {
|
||||
int stat;
|
||||
|
@ -72,15 +84,27 @@ typedef struct _cnki_t {
|
|||
FILE *fp_o;
|
||||
file_stat_t *file_stat;
|
||||
object_outline_t *object_outline;
|
||||
object_nh_t *object_nh;
|
||||
object_hn_t *object_hn;
|
||||
} cnki_t;
|
||||
|
||||
/* cnki_pdf.c */
|
||||
int cnki_pdf(cnki_t **param);
|
||||
int cnki_pdf_hn(cnki_t **param);
|
||||
|
||||
/* cnki_outline_tree.c */
|
||||
int cnki_outline_tree(object_outline_tree_t **outline_tree,
|
||||
object_outline_t **outline, int *ids);
|
||||
|
||||
/* cnki_xml.c */
|
||||
int cnki_xml(char **xml, FILE **fp);
|
||||
/* cnki_zlib.c */
|
||||
int cnki_zlib(char **dst, int *dst_size,
|
||||
const char * restrict src, int src_size);
|
||||
|
||||
/* cnki_jbig.c */
|
||||
int cnki_jbig(char **bitmap, int *bitmap_size,
|
||||
int *bitmap_width, int *bitmap_height,
|
||||
const char * restrict jbig, int jbig_size);
|
||||
|
||||
/* cnki_jbig2.c */
|
||||
int cnki_jbig2(char **bitmap, int *bitmap_size,
|
||||
int *bitmap_width, int *bitmap_height,
|
||||
const char * restrict jbig, int jbig_size);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
@ -18,7 +18,7 @@ cnki_caj(cnki_t **param)
|
|||
printf("Begin 'CAJ' conversion\n");
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Reading document body address at %x\n", ADDRESS_CAJ_BODY);
|
||||
printf("Reading document body address at 0x%x\n", ADDRESS_CAJ_BODY);
|
||||
|
||||
int addr;
|
||||
|
||||
|
@ -29,7 +29,7 @@ cnki_caj(cnki_t **param)
|
|||
fseek((*param)->fp_i, addr, SEEK_SET);
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Advised document body address is %x\n", addr);
|
||||
printf("Advised document body address is 0x%x\n", addr);
|
||||
|
||||
cnki_pdf(param);
|
||||
|
||||
|
|
155
src/cnki_hn.c
Normal file
155
src/cnki_hn.c
Normal file
|
@ -0,0 +1,155 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "cnki.h"
|
||||
|
||||
int
|
||||
cnki_hn(cnki_t **param)
|
||||
{
|
||||
if (*param == NULL)
|
||||
return 1;
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Begin 'HN' conversion\n");
|
||||
|
||||
if ((*param)->file_stat->page > 0)
|
||||
(*param)->object_hn = malloc(sizeof(object_hn_t));
|
||||
else
|
||||
return 1;
|
||||
|
||||
if ((*param)->object_hn == NULL)
|
||||
return 1;
|
||||
|
||||
if ((*param)->stat > 1) {
|
||||
printf("Loading page(s)\n");
|
||||
printf("\t%8s\t%8s\t%6s\t%4s\t%8s\t%8s\t%4s\t%8s\t%8s\n",
|
||||
"address",
|
||||
"text",
|
||||
"length",
|
||||
"page",
|
||||
"unknown",
|
||||
"next",
|
||||
"code",
|
||||
"address",
|
||||
"image");
|
||||
}
|
||||
|
||||
object_hn_t *ptr = (*param)->object_hn;
|
||||
for (int i = 0; i < (*param)->file_stat->page; i++) {
|
||||
fread(&ptr->address, 4, 1, (*param)->fp_i);
|
||||
fread(&ptr->text_size, 4, 1, (*param)->fp_i);
|
||||
fread(&ptr->image_length, 2, 1, (*param)->fp_i);
|
||||
fread(&ptr->page, 2, 1, (*param)->fp_i);
|
||||
fread(&ptr->unknown, 4, 1, (*param)->fp_i);
|
||||
fread(&ptr->address_next, 4, 1, (*param)->fp_i);
|
||||
|
||||
ptr->text = NULL;
|
||||
ptr->image_data = NULL;
|
||||
ptr->next = NULL;
|
||||
|
||||
if (i < (*param)->file_stat->page - 1) {
|
||||
ptr->next = malloc(sizeof(object_hn_t));
|
||||
|
||||
if (ptr->next == NULL)
|
||||
return 1;
|
||||
}
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
ptr = (*param)->object_hn;
|
||||
while (ptr != NULL) {
|
||||
if (ptr->text_size > 0) {
|
||||
ptr->text = malloc(ptr->text_size);
|
||||
|
||||
if (ptr->text == NULL)
|
||||
return 1;
|
||||
|
||||
fseek((*param)->fp_i, ptr->address, SEEK_SET);
|
||||
fread(ptr->text, ptr->text_size, 1, (*param)->fp_i);
|
||||
}
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("\t%08x\t%8d\t%6d\t%4d\t%8d\t%08x",
|
||||
ptr->address,
|
||||
ptr->text_size,
|
||||
ptr->image_length,
|
||||
ptr->page,
|
||||
ptr->unknown,
|
||||
ptr->address_next);
|
||||
|
||||
if (ptr->image_length > 0) {
|
||||
ptr->image_data = malloc(ptr->image_length * sizeof(hn_image_t));
|
||||
|
||||
if (ptr->image_data == NULL)
|
||||
return 1;
|
||||
|
||||
for (int i = 0; i < ptr->image_length; i++) {
|
||||
fread(&ptr->image_data[i].format, 4, 1, (*param)->fp_i);
|
||||
fread(&ptr->image_data[i].address, 4, 1, (*param)->fp_i);
|
||||
fread(&ptr->image_data[i].size, 4, 1, (*param)->fp_i);
|
||||
ptr->image_data[i].x = 0;
|
||||
ptr->image_data[i].y = 0;
|
||||
ptr->image_data[i].w = 0;
|
||||
ptr->image_data[i].h = 0;
|
||||
fseek((*param)->fp_i,
|
||||
ptr->image_data[i].address + ptr->image_data[i].size,
|
||||
SEEK_SET);
|
||||
}
|
||||
|
||||
for (int i = 0; i < ptr->image_length; i++) {
|
||||
ptr->image_data[i].image = malloc(ptr->image_data[i].size);
|
||||
|
||||
if (ptr->image_data[i].image == NULL)
|
||||
return 1;
|
||||
|
||||
fseek((*param)->fp_i, ptr->image_data[i].address, SEEK_SET);
|
||||
fread(ptr->image_data[i].image,
|
||||
ptr->image_data[i].size, 1,
|
||||
(*param)->fp_i);
|
||||
|
||||
if ((*param)->stat > 1) {
|
||||
if (i == 0) {
|
||||
printf("\t%4d\t%08x\t%8d\n",
|
||||
ptr->image_data[i].format,
|
||||
ptr->image_data[i].address,
|
||||
ptr->image_data[i].size);
|
||||
} else {
|
||||
printf("\t%8s\t%8s\t%6s\t%4s\t%8s\t%8s\t%4d\t%08x\t%8d\n",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
ptr->image_data[i].format,
|
||||
ptr->image_data[i].address,
|
||||
ptr->image_data[i].size);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if ((*param)->stat > 1) {
|
||||
printf("\t%4s\t%8s\t%8s\n",
|
||||
"",
|
||||
"",
|
||||
"");
|
||||
}
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Loaded %d page(s)\n", (*param)->file_stat->page);
|
||||
|
||||
cnki_pdf_hn(param);
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Conversion ended\n");
|
||||
|
||||
return 0;
|
||||
}
|
43
src/cnki_jbig.c
Normal file
43
src/cnki_jbig.c
Normal file
|
@ -0,0 +1,43 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "cnki_jbig.h"
|
||||
#include "cnki_jbig_dec.h"
|
||||
|
||||
int
|
||||
cnki_jbig(char **bitmap, int *bitmap_size,
|
||||
int *bitmap_width, int *bitmap_height,
|
||||
const char * restrict jbig, int jbig_size)
|
||||
{
|
||||
dib_t *dib = malloc(sizeof(dib_t));
|
||||
|
||||
if (dib == NULL)
|
||||
return 1;
|
||||
|
||||
memcpy(dib, jbig, 40);
|
||||
|
||||
int width_padded = (dib->width * dib->depth + 7) / 8;
|
||||
|
||||
*bitmap_size = dib->height * width_padded;
|
||||
*bitmap = malloc(*bitmap_size);
|
||||
|
||||
if (*bitmap == NULL) {
|
||||
free(dib);
|
||||
return 1;
|
||||
}
|
||||
|
||||
strdec_jbig(bitmap, dib->width, dib->height, jbig + 48, jbig_size - 48);
|
||||
|
||||
*bitmap_width = dib->width;
|
||||
*bitmap_height = dib->height;
|
||||
|
||||
free(dib);
|
||||
|
||||
return 0;
|
||||
}
|
41
src/cnki_jbig.h
Normal file
41
src/cnki_jbig.h
Normal file
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef enum _dib_compression_code {
|
||||
BI_RGB,
|
||||
BI_RLE8,
|
||||
BI_RLE4,
|
||||
BI_BITFIELDS,
|
||||
BI_JPEG,
|
||||
BI_PNG,
|
||||
BI_ALPHABITFIELDS,
|
||||
BI_CMYK = 11,
|
||||
BI_CMYKRLE8 = 12,
|
||||
BI_CMYKRLE4 = 13
|
||||
} dib_compression_code;
|
||||
|
||||
typedef struct _dib_t {
|
||||
uint32_t dib_size; /* Always 40 */
|
||||
int32_t width;
|
||||
int32_t height;
|
||||
uint16_t plane; /* Always 1 */
|
||||
uint16_t depth;
|
||||
uint32_t compression; /* dib_compression_code */
|
||||
uint32_t size;
|
||||
int32_t resolution_h;
|
||||
int32_t resolution_v;
|
||||
uint32_t colour;
|
||||
uint32_t colour_used;
|
||||
} dib_t;
|
||||
|
||||
typedef struct _colour_table {
|
||||
uint16_t blue;
|
||||
uint16_t green;
|
||||
uint16_t red;
|
||||
uint16_t fill; /* Always 0 */
|
||||
} colour_table;
|
43
src/cnki_jbig2.c
Normal file
43
src/cnki_jbig2.c
Normal file
|
@ -0,0 +1,43 @@
|
|||
/*
|
||||
* Copyright (c) 2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "cnki_jbig.h"
|
||||
#include "jbig2.h"
|
||||
|
||||
int
|
||||
cnki_jbig2(char **bitmap, int *bitmap_size,
|
||||
int *bitmap_width, int *bitmap_height,
|
||||
const char * restrict jbig, int jbig_size)
|
||||
{
|
||||
dib_t *dib = malloc(sizeof(dib_t));
|
||||
|
||||
if (dib == NULL)
|
||||
return 1;
|
||||
|
||||
memcpy(dib, jbig, 40);
|
||||
|
||||
int width_padded = (dib->width * dib->depth + 7) / 8;
|
||||
|
||||
*bitmap_size = dib->height * width_padded;
|
||||
*bitmap = malloc(*bitmap_size);
|
||||
|
||||
if (*bitmap == NULL) {
|
||||
free(dib);
|
||||
return 1;
|
||||
}
|
||||
|
||||
strdec_jbig2(bitmap, jbig + 48, jbig_size - 48);
|
||||
|
||||
*bitmap_width = dib->width;
|
||||
*bitmap_height = dib->height;
|
||||
|
||||
free(dib);
|
||||
|
||||
return 0;
|
||||
}
|
314
src/cnki_jbig_dec.c
Normal file
314
src/cnki_jbig_dec.c
Normal file
|
@ -0,0 +1,314 @@
|
|||
/*
|
||||
* Copyright (c) 2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
static const uint16_t _LSZ[0x71] = {
|
||||
0x5a1d,
|
||||
0x2586, 0x1114, 0x080b, 0x03d8, 0x01da, 0x00e5, 0x006f, 0x0036,
|
||||
0x001a, 0x000d, 0x0006, 0x0003, 0x0001, 0x5a7f, 0x3f25, 0x2cf2,
|
||||
0x207c, 0x17b9, 0x1182, 0x0cef, 0x09a1, 0x072f, 0x055c, 0x0406,
|
||||
0x0303, 0x0240, 0x01b1, 0x0144, 0x00f5, 0x00b7, 0x008a, 0x0068,
|
||||
0x004e, 0x003b, 0x002c, 0x5ae1, 0x484c, 0x3a0d, 0x2ef1, 0x261f,
|
||||
0x1f33, 0x19a8, 0x1518, 0x1177, 0x0e74, 0x0bfb, 0x09f8, 0x0861,
|
||||
0x0706, 0x05cd, 0x04de, 0x040f, 0x0363, 0x02d4, 0x025c, 0x01f8,
|
||||
|
||||
0x01a4, 0x0160, 0x0125, 0x00f6, 0x00cb, 0x00ab, 0x008f, 0x5b12,
|
||||
0x4d04, 0x412c, 0x37d8, 0x2fe8, 0x293c, 0x2379, 0x1edf, 0x1aa9,
|
||||
0x174e, 0x1424, 0x119c, 0x0f6b, 0x0d51, 0x0bb6, 0x0a40, 0x5832,
|
||||
0x4d1c, 0x438e, 0x3bdd, 0x34ee, 0x2eae, 0x299a, 0x2516, 0x5570,
|
||||
0x4ca9, 0x44d9, 0x3e22, 0x3824, 0x32b4, 0x2e17, 0x56a8, 0x4f46,
|
||||
0x47e5, 0x41cf, 0x3c3d, 0x375e, 0x5231, 0x4c0f, 0x4639, 0x415e,
|
||||
0x5627, 0x50e7, 0x4b85, 0x5597, 0x504f, 0x5a10, 0x5522, 0x59eb
|
||||
};
|
||||
|
||||
static const uint8_t _NLPS[0x71] = {
|
||||
1,
|
||||
14, 16, 18, 20, 23, 25, 28, 30,
|
||||
33, 35, 9, 10, 12, 15, 36, 38,
|
||||
39, 40, 42, 43, 45, 46, 48, 49,
|
||||
51, 52, 54, 56, 57, 59, 60, 62,
|
||||
63, 32, 33, 37, 64, 65, 67, 68,
|
||||
69, 70, 72, 73, 74, 75, 77, 78,
|
||||
79, 48, 50, 50, 51, 52, 53, 54,
|
||||
|
||||
55, 56, 57, 58, 59, 61, 61, 65,
|
||||
80, 81, 82, 83, 84, 86, 87, 87,
|
||||
72, 72, 74, 74, 75, 77, 77, 80,
|
||||
88, 89, 90, 91, 92, 93, 86, 88,
|
||||
95, 96, 97, 99, 99, 93, 95, 101,
|
||||
102, 103, 104, 99, 105, 106, 107, 103,
|
||||
105, 108, 109, 110, 111, 110, 112, 112
|
||||
};
|
||||
|
||||
static const uint8_t _NMPS[0x71] = {
|
||||
1,
|
||||
2, 3, 4, 5, 6, 7, 8, 9,
|
||||
10, 11, 12, 13, 13, 15, 16, 17,
|
||||
18, 19, 20, 21, 22, 23, 24, 25,
|
||||
26, 27, 28, 29, 30, 31, 32, 33,
|
||||
34, 35, 9, 37, 38, 39, 40, 41,
|
||||
42, 43, 44, 45, 46, 47, 48, 49,
|
||||
50, 51, 52, 53, 54, 55, 56, 57,
|
||||
|
||||
58, 59, 60, 61, 62, 63, 32, 65,
|
||||
66, 67, 68, 69, 70, 71, 72, 73,
|
||||
74, 75, 76, 77, 78, 79, 48, 81,
|
||||
82, 83, 84, 85, 86, 87, 71, 89,
|
||||
90, 91, 92, 93, 94, 86, 96, 97,
|
||||
98, 99, 100, 93, 102, 103, 104, 99,
|
||||
106, 107, 103, 109, 107, 111, 109, 111
|
||||
};
|
||||
|
||||
static const bool _SWTCH[0x71] = {
|
||||
1,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 1, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 1, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0, 0, 0, 0, 0, 0, 0, 1,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 1,
|
||||
0, 0, 0, 0, 0, 0, 0, 1,
|
||||
0, 0, 0, 0, 0, 0, 1, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 0, 0, 0, 0, 1, 0, 1
|
||||
};
|
||||
|
||||
static uint8_t _ct;
|
||||
static uint8_t _pix;
|
||||
|
||||
static uint16_t _reg_a;
|
||||
static uint32_t _reg_c;
|
||||
static uint8_t _mps[0x1000];
|
||||
static uint8_t _st[0x1000];
|
||||
|
||||
static int _width;
|
||||
static int _height;
|
||||
|
||||
static int _width_padded;
|
||||
|
||||
static int _ret_pos;
|
||||
static unsigned char *_ret;
|
||||
|
||||
static int _scd_size;
|
||||
static unsigned char *_scd;
|
||||
|
||||
static void
|
||||
_bytein(void)
|
||||
{
|
||||
if (_ret_pos < _scd_size)
|
||||
_reg_c += _scd[_ret_pos++] << 8;
|
||||
|
||||
_ct = 8;
|
||||
}
|
||||
|
||||
static void
|
||||
_initdec(void)
|
||||
{
|
||||
memset(_mps, 0, 0x1000);
|
||||
memset(_st, 0, 0x1000);
|
||||
|
||||
_reg_c = 0;
|
||||
_bytein();
|
||||
_reg_c <<= 8;
|
||||
_bytein();
|
||||
_reg_c <<= 8;
|
||||
_bytein();
|
||||
_reg_a = 0x0000;
|
||||
}
|
||||
|
||||
static void
|
||||
_exchange_lps(uint16_t cx)
|
||||
{
|
||||
uint8_t st_cx = _st[cx];
|
||||
uint16_t lsz_st_cx = _LSZ[_st[cx]];
|
||||
|
||||
if (_reg_a < lsz_st_cx) {
|
||||
_pix = _mps[cx];
|
||||
_st[cx] = _NMPS[st_cx];
|
||||
} else {
|
||||
_pix = 1 - _mps[cx];
|
||||
|
||||
if (_SWTCH[st_cx])
|
||||
_mps[cx] = _pix;
|
||||
|
||||
_st[cx] = _NLPS[st_cx];
|
||||
}
|
||||
|
||||
_reg_c -= _reg_a << 16;
|
||||
_reg_a = lsz_st_cx;
|
||||
}
|
||||
|
||||
static void
|
||||
_exchange_mps(uint16_t cx)
|
||||
{
|
||||
uint8_t st_cx = _st[cx];
|
||||
uint16_t lsz_st_cx = _LSZ[_st[cx]];
|
||||
|
||||
if (_reg_a < lsz_st_cx) {
|
||||
_pix = 1 - _mps[cx];
|
||||
|
||||
if (_SWTCH[st_cx])
|
||||
_mps[cx] = _pix;
|
||||
|
||||
_st[cx] = _NLPS[st_cx];
|
||||
} else {
|
||||
_pix = _mps[cx];
|
||||
_st[cx] = _NMPS[st_cx];
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
_renormd(void)
|
||||
{
|
||||
do {
|
||||
if (_ct == 0)
|
||||
_bytein();
|
||||
|
||||
_reg_a <<= 1;
|
||||
_reg_c <<= 1;
|
||||
_ct--;
|
||||
} while (_reg_a < 0x8000);
|
||||
|
||||
if (_ct == 0)
|
||||
_bytein();
|
||||
}
|
||||
|
||||
static void
|
||||
_decode(uint16_t cx)
|
||||
{
|
||||
_reg_a -= _LSZ[_st[cx]];
|
||||
|
||||
if (_reg_a > _reg_c >> 16) {
|
||||
if (_reg_a < 0x8000) {
|
||||
_exchange_mps(cx);
|
||||
_renormd();
|
||||
} else {
|
||||
_pix = _mps[cx];
|
||||
}
|
||||
} else {
|
||||
_exchange_lps(cx);
|
||||
_renormd();
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
_procline(int line, char *a, char *b, char *c)
|
||||
{
|
||||
/* The encoder must be erroneous */
|
||||
uint16_t cx = 0;
|
||||
|
||||
if (line > 0) {
|
||||
cx += (_ret[_width_padded * (_height - line)] & 0x20) << 2;
|
||||
cx += _ret[_width_padded * (_height - line)] & 0x40;
|
||||
cx += (_ret[_width_padded * (_height - line)] & 0x80) >> 2;
|
||||
}
|
||||
|
||||
if (line > 1) {
|
||||
cx += (_ret[_width_padded * (_height - line + 1)] & 0x40) >> 4;
|
||||
cx += (_ret[_width_padded * (_height - line + 1)] & 0x80) >> 6;
|
||||
}
|
||||
|
||||
for (int i = 0; i < _width; i++) {
|
||||
_decode(cx);
|
||||
|
||||
cx >>= 1;
|
||||
|
||||
if (_pix == 1) {
|
||||
_ret[_width_padded * (_height - line - 1) + i / 8] |= _pix << (7 - (i & 0x07));
|
||||
c[i] = 1;
|
||||
cx |= 0x0200;
|
||||
} else {
|
||||
cx &= 0xfdff;
|
||||
}
|
||||
|
||||
if (i + 2 < _width && a[i + 2] == 1)
|
||||
cx |= 0x0004;
|
||||
else
|
||||
cx &= 0xfffb;
|
||||
|
||||
if (i + 3 < _width && b[i + 3] == 1)
|
||||
cx |= 0x0080;
|
||||
else
|
||||
cx &= 0xff7f;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
_procstripe(void)
|
||||
{
|
||||
if (_height <= 0 || _width_padded <= 0)
|
||||
return 1;
|
||||
|
||||
int pix_size = 8 * _width_padded;
|
||||
|
||||
char *buf = malloc(3 * pix_size);
|
||||
|
||||
if (buf == NULL)
|
||||
return 1;
|
||||
|
||||
memset(buf, 0, 3 * pix_size);
|
||||
|
||||
char *a = buf;
|
||||
char *b = a + pix_size;
|
||||
char *c = b + pix_size;
|
||||
char *z;
|
||||
|
||||
for (int i = 0; i < _height; i++) {
|
||||
_decode(0x029c);
|
||||
|
||||
if (_pix == 1) {
|
||||
if (i > 0)
|
||||
memcpy(_ret + _width_padded * (_height - i - 1),
|
||||
_ret + _width_padded * (_height - i),
|
||||
_width_padded);
|
||||
|
||||
memcpy(c, b, pix_size);
|
||||
} else {
|
||||
/* line atypical */
|
||||
memset(c, 0, pix_size);
|
||||
_procline(i, a, b, c);
|
||||
}
|
||||
|
||||
z = a;
|
||||
a = b;
|
||||
b = c;
|
||||
c = z;
|
||||
}
|
||||
|
||||
free(buf);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
strdec_jbig(char **bitmap, int width, int height,
|
||||
const char * restrict jbig, int jbig_size)
|
||||
{
|
||||
_width = width;
|
||||
_height = height;
|
||||
|
||||
_width_padded = (_width + 7) / 8;
|
||||
|
||||
memset(*bitmap, 0, _height * _width_padded);
|
||||
|
||||
_ret_pos = 0;
|
||||
_ret = (unsigned char *) *bitmap;
|
||||
|
||||
_scd_size = jbig_size;
|
||||
_scd = (unsigned char *) jbig;
|
||||
|
||||
_initdec();
|
||||
return _procstripe();
|
||||
}
|
8
src/cnki_jbig_dec.h
Normal file
8
src/cnki_jbig_dec.h
Normal file
|
@ -0,0 +1,8 @@
|
|||
/*
|
||||
* Copyright (c) 2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
int strdec_jbig(char **bitmap, int width, int height,
|
||||
const char * restrict jbig, int jbig_size);
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
* Copyright (c) 2020-2023, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
@ -15,35 +15,54 @@ cnki_kdh(cnki_t **param)
|
|||
if ((*param)->stat > 0)
|
||||
printf("Begin 'KDH' decryption\n");
|
||||
|
||||
long cur = ADDRESS_KDH_BODY;
|
||||
long end;
|
||||
|
||||
fseek((*param)->fp_i, 0, SEEK_END);
|
||||
|
||||
long size = ftell((*param)->fp_i);
|
||||
|
||||
fseek((*param)->fp_i, ADDRESS_KDH_BODY, SEEK_SET);
|
||||
end = ftell((*param)->fp_i);
|
||||
fseek((*param)->fp_i, cur, SEEK_SET);
|
||||
|
||||
const char key[] = KEY_KDH;
|
||||
const int key_len = KEY_KDH_LENGTH;
|
||||
long key_cur = 0;
|
||||
|
||||
int buf_size;
|
||||
char buf[(*param)->size_buf];
|
||||
|
||||
FILE *tmp = tmpfile();
|
||||
|
||||
if (tmp == NULL)
|
||||
return 1;
|
||||
|
||||
for (;;) {
|
||||
fread(buf, (*param)->size_buf, 1, (*param)->fp_i);
|
||||
if (cur + (*param)->size_buf < end)
|
||||
buf_size = (*param)->size_buf;
|
||||
else
|
||||
buf_size = end - cur;
|
||||
|
||||
for (int i = 0; i < (*param)->size_buf; i++) {
|
||||
buf[i] ^= key[key_cur % key_len];
|
||||
key_cur++;
|
||||
}
|
||||
fread(buf, buf_size, 1, (*param)->fp_i);
|
||||
|
||||
fwrite(buf, (*param)->size_buf, 1, (*param)->fp_o);
|
||||
for (int i = 0; i < buf_size; i++)
|
||||
buf[i] ^= key[key_cur++ % key_len];
|
||||
|
||||
if (ftell((*param)->fp_i) == size)
|
||||
fwrite(buf, buf_size, 1, tmp);
|
||||
|
||||
if ((cur = ftell((*param)->fp_i)) >= end)
|
||||
break;
|
||||
}
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Decryption ended total %ld byte(s) written\n",
|
||||
ftell((*param)->fp_o));
|
||||
printf("Decrypted %ld byte(s)\n", ftell(tmp));
|
||||
|
||||
fclose((*param)->fp_i);
|
||||
|
||||
fseek(tmp, 0, SEEK_SET);
|
||||
(*param)->fp_i = tmp;
|
||||
|
||||
cnki_pdf(param);
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Conversion ended\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
110
src/cnki_nh.c
110
src/cnki_nh.c
|
@ -1,110 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "cnki.h"
|
||||
|
||||
int
|
||||
cnki_nh(cnki_t **param)
|
||||
{
|
||||
if (*param == NULL)
|
||||
return 1;
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Begin 'HN' conversion\n");
|
||||
|
||||
if ((*param)->file_stat->page > 0)
|
||||
(*param)->object_nh = malloc(sizeof(object_nh_t));
|
||||
else
|
||||
return 1;
|
||||
|
||||
if ((*param)->object_nh == NULL)
|
||||
return 1;
|
||||
|
||||
if ((*param)->stat > 1) {
|
||||
printf("Loading page(s)\n");
|
||||
printf("\t%8s\t%8s\t%13s\t%6s\t%4s\t%8s\t%8s\n",
|
||||
"address",
|
||||
"text",
|
||||
"page",
|
||||
"zero",
|
||||
"code",
|
||||
"address",
|
||||
"image");
|
||||
}
|
||||
|
||||
object_nh_t *ptr = (*param)->object_nh;
|
||||
for (int i = 0; i < (*param)->file_stat->page; i++) {
|
||||
fread(&ptr->address, 4, 1, (*param)->fp_i);
|
||||
fread(&ptr->size, 4, 1, (*param)->fp_i);
|
||||
fread(&ptr->page, 4, 1, (*param)->fp_i);
|
||||
fread(&ptr->zero, 8, 1, (*param)->fp_i);
|
||||
|
||||
ptr->text = NULL;
|
||||
ptr->image_format = -1;
|
||||
ptr->image_address = 0;
|
||||
ptr->image_size = 0;
|
||||
ptr->image = NULL;
|
||||
ptr->next = NULL;
|
||||
|
||||
if (i < (*param)->file_stat->page - 1) {
|
||||
ptr->next = malloc(sizeof(object_nh_t));
|
||||
|
||||
if (ptr->next == NULL)
|
||||
return 1;
|
||||
}
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
ptr = (*param)->object_nh;
|
||||
while (ptr != NULL) {
|
||||
ptr->text = malloc(ptr->size);
|
||||
|
||||
if (ptr->text == NULL)
|
||||
return 1;
|
||||
|
||||
fseek((*param)->fp_i, ptr->address, SEEK_SET);
|
||||
fread(ptr->text, ptr->size, 1, (*param)->fp_i);
|
||||
fread(&ptr->image_format, 4, 1, (*param)->fp_i);
|
||||
fread(&ptr->image_address, 4, 1, (*param)->fp_i);
|
||||
fread(&ptr->image_size, 4, 1, (*param)->fp_i);
|
||||
|
||||
ptr->image = malloc(ptr->image_size);
|
||||
|
||||
if (ptr->image == NULL)
|
||||
return 1;
|
||||
|
||||
fseek((*param)->fp_i, ptr->image_address, SEEK_SET);
|
||||
fread(ptr->image, ptr->image_size, 1, (*param)->fp_i);
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("\t%08x\t%8d\t{%d, %8d}\t{%d, %d}\t%4d\t%08x\t%8d\n",
|
||||
ptr->address,
|
||||
ptr->size,
|
||||
ptr->page[0],
|
||||
ptr->page[1],
|
||||
ptr->zero[0],
|
||||
ptr->zero[1],
|
||||
ptr->image_format,
|
||||
ptr->image_address,
|
||||
ptr->image_size);
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Loaded %d page(s)\n", (*param)->file_stat->page);
|
||||
|
||||
/* TODO: Study signed int __fastcall CAJDoc::OpenNHCAJFile(int a1, int a2) */
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Conversion ended\n");
|
||||
|
||||
/* TODO: Finish me please :) */
|
||||
return 1;
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
|
1164
src/cnki_pdf.c
1164
src/cnki_pdf.c
File diff suppressed because it is too large
Load diff
|
@ -1,14 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
int
|
||||
cnki_xml(char **xml, FILE **fp)
|
||||
{
|
||||
/* TODO: Extract XML and embed into `/Metadata' */
|
||||
return 1;
|
||||
}
|
30
src/cnki_zlib.c
Normal file
30
src/cnki_zlib.c
Normal file
|
@ -0,0 +1,30 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "zlib.h"
|
||||
|
||||
int
|
||||
cnki_zlib(char **dst, int *dst_size,
|
||||
const char * restrict src, int src_size)
|
||||
{
|
||||
uint8_t padding = 0;
|
||||
int32_t size;
|
||||
|
||||
if (strncmp(src + 8, "COMPRESSTEXT", 12) == 0)
|
||||
padding = 8;
|
||||
|
||||
memcpy(&size, src + 12 + padding, 4);
|
||||
|
||||
*dst_size = size;
|
||||
|
||||
if (strinflate(dst, size, src + 16 + padding, src_size - 16 - padding) != 0)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
@ -14,8 +14,8 @@ int cnki_info(cnki_t **param);
|
|||
/* cnki_caj.c */
|
||||
int cnki_caj(cnki_t **param);
|
||||
|
||||
/* cnki_nh.c */
|
||||
int cnki_nh(cnki_t **param);
|
||||
/* cnki_hn.c */
|
||||
int cnki_hn(cnki_t **param);
|
||||
|
||||
/* cnki_kdh.c */
|
||||
int cnki_kdh(cnki_t **param);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
@ -9,7 +9,6 @@
|
|||
|
||||
#include <iconv.h>
|
||||
|
||||
/* So, why would anyone use something other than UTF-8? */
|
||||
int
|
||||
strconv(char **dst,
|
||||
const char * restrict dst_code,
|
||||
|
@ -51,8 +50,7 @@ strconv(char **dst,
|
|||
free(src_start);
|
||||
return 1;
|
||||
} else {
|
||||
/* Not including NULL */
|
||||
*size -= dst_size + 2;
|
||||
*size -= dst_size;
|
||||
|
||||
*dst = malloc(*size);
|
||||
|
||||
|
|
|
@ -1,11 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
int
|
||||
strconv(char **dst,
|
||||
int strconv(char **dst,
|
||||
const char * restrict dst_code,
|
||||
const char * restrict src,
|
||||
const char * restrict src_code,
|
||||
|
|
36
src/jbig2.c
Normal file
36
src/jbig2.c
Normal file
|
@ -0,0 +1,36 @@
|
|||
/*
|
||||
* Copyright (c) 2022-2023, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <jbig2.h>
|
||||
|
||||
int
|
||||
strdec_jbig2(char **bitmap,
|
||||
const char * restrict jbig2, int jbig2_size)
|
||||
{
|
||||
Jbig2Ctx *ctx = jbig2_ctx_new(NULL, JBIG2_OPTIONS_EMBEDDED, NULL, NULL, NULL);
|
||||
|
||||
jbig2_data_in(ctx, (unsigned char *) jbig2, jbig2_size);
|
||||
|
||||
jbig2_complete_page(ctx);
|
||||
|
||||
Jbig2Image *image = jbig2_page_out(ctx);
|
||||
|
||||
int width_padded = (image->width + 7) / 8;
|
||||
unsigned char *data = image->data;
|
||||
|
||||
for (unsigned int i = 0; i < image->height; i++) {
|
||||
memcpy(*bitmap + i * width_padded, data, width_padded);
|
||||
data += image->stride;
|
||||
}
|
||||
|
||||
jbig2_release_page(ctx, image);
|
||||
jbig2_ctx_free(ctx);
|
||||
return 0;
|
||||
}
|
7
src/jbig2.h
Normal file
7
src/jbig2.h
Normal file
|
@ -0,0 +1,7 @@
|
|||
/*
|
||||
* Copyright (c) 2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
int strdec_jbig2(char **bitmap, const char * restrict jbig2, int jbig2_size);
|
115
src/jp2.c
Normal file
115
src/jp2.c
Normal file
|
@ -0,0 +1,115 @@
|
|||
/*
|
||||
* Copyright (c) 2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include <openjpeg.h>
|
||||
|
||||
typedef struct _stream_user_data {
|
||||
OPJ_SIZE_T pos;
|
||||
OPJ_SIZE_T size;
|
||||
const unsigned char *data;
|
||||
} stream_user_data;
|
||||
|
||||
static OPJ_SIZE_T
|
||||
_opj_stream_read(void *p_buffer, OPJ_SIZE_T p_nb_bytes, void *p_user_data)
|
||||
{
|
||||
stream_user_data *d = (stream_user_data *) p_user_data;
|
||||
|
||||
if (d->pos >= d->size)
|
||||
return (OPJ_SIZE_T) - 1;
|
||||
|
||||
OPJ_SIZE_T ret_size = p_nb_bytes;
|
||||
|
||||
if (d->pos + ret_size > d->size)
|
||||
ret_size = d->size - d->pos;
|
||||
|
||||
memcpy(p_buffer, d->data + d->pos, ret_size);
|
||||
|
||||
d->pos += ret_size;
|
||||
|
||||
return ret_size;
|
||||
}
|
||||
|
||||
static OPJ_OFF_T
|
||||
_opj_stream_skip(OPJ_OFF_T p_nb_bytes, void *p_user_data)
|
||||
{
|
||||
stream_user_data *d = (stream_user_data *) p_user_data;
|
||||
|
||||
if (d->pos + p_nb_bytes <= d->size)
|
||||
d->pos += p_nb_bytes;
|
||||
else
|
||||
d->pos = d->size;
|
||||
|
||||
return d->pos;
|
||||
}
|
||||
|
||||
static OPJ_BOOL
|
||||
_opj_stream_seek(OPJ_OFF_T p_nb_bytes, void *p_user_data)
|
||||
{
|
||||
stream_user_data *d = (stream_user_data *) p_user_data;
|
||||
|
||||
if (p_nb_bytes <= (OPJ_OFF_T) d->size) {
|
||||
d->pos = p_nb_bytes;
|
||||
return OPJ_TRUE;
|
||||
}
|
||||
|
||||
return OPJ_FALSE;
|
||||
}
|
||||
|
||||
int
|
||||
strinfo_jp2_dim(int *jp2_width, int *jp2_height,
|
||||
const char * restrict data, int data_size)
|
||||
{
|
||||
opj_codec_t *codec;
|
||||
opj_dparameters_t param;
|
||||
opj_stream_t *stream;
|
||||
opj_image_t *image;
|
||||
stream_user_data d;
|
||||
|
||||
if (data_size < 2)
|
||||
return 1;
|
||||
|
||||
opj_set_default_decoder_parameters(¶m);
|
||||
|
||||
if ((unsigned char) data[0] == 0xff && (unsigned char) data[1] == 0x4f)
|
||||
codec = opj_create_decompress(OPJ_CODEC_J2K);
|
||||
else
|
||||
codec = opj_create_decompress(OPJ_CODEC_JP2);
|
||||
|
||||
if (!opj_setup_decoder(codec, ¶m)) {
|
||||
opj_destroy_codec(codec);
|
||||
return 1;
|
||||
}
|
||||
|
||||
stream = opj_stream_default_create(OPJ_TRUE);
|
||||
|
||||
d.pos = 0;
|
||||
d.size = data_size;
|
||||
d.data = (unsigned char *) data;
|
||||
|
||||
opj_stream_set_read_function(stream, _opj_stream_read);
|
||||
opj_stream_set_skip_function(stream, _opj_stream_skip);
|
||||
opj_stream_set_seek_function(stream, _opj_stream_seek);
|
||||
opj_stream_set_user_data(stream, &d, NULL);
|
||||
opj_stream_set_user_data_length(stream, data_size);
|
||||
|
||||
if (!opj_read_header(stream, codec, &image)) {
|
||||
opj_destroy_codec(codec);
|
||||
opj_stream_destroy(stream);
|
||||
return 1;
|
||||
}
|
||||
|
||||
opj_destroy_codec(codec);
|
||||
opj_stream_destroy(stream);
|
||||
|
||||
*jp2_width = image->x1 - image->x0;
|
||||
*jp2_height = image->y1 - image->y0;
|
||||
|
||||
opj_image_destroy(image);
|
||||
|
||||
return 0;
|
||||
}
|
8
src/jp2.h
Normal file
8
src/jp2.h
Normal file
|
@ -0,0 +1,8 @@
|
|||
/*
|
||||
* Copyright (c) 2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
int strinfo_jp2_dim(int *jp2_width, int *jp2_height,
|
||||
const char * restrict data, int data_size);
|
37
src/jpeg.c
Normal file
37
src/jpeg.c
Normal file
|
@ -0,0 +1,37 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include <jpeglib.h>
|
||||
|
||||
int
|
||||
strinfo_jpeg_dim(int *jpeg_width, int *jpeg_height, int *jpeg_components,
|
||||
const char * restrict data, int data_size)
|
||||
{
|
||||
struct jpeg_decompress_struct cinfo;
|
||||
struct jpeg_error_mgr jerr;
|
||||
|
||||
cinfo.err = jpeg_std_error(&jerr);
|
||||
|
||||
jpeg_create_decompress(&cinfo);
|
||||
|
||||
jpeg_mem_src(&cinfo, (unsigned char *) data, data_size);
|
||||
|
||||
jpeg_read_header(&cinfo, TRUE);
|
||||
|
||||
jpeg_calc_output_dimensions(&cinfo);
|
||||
|
||||
*jpeg_width = cinfo.output_width;
|
||||
*jpeg_height = cinfo.output_height;
|
||||
*jpeg_components = cinfo.output_components;
|
||||
|
||||
jpeg_destroy((struct jpeg_common_struct *) &cinfo);
|
||||
|
||||
jpeg_destroy_decompress(&cinfo);
|
||||
|
||||
return 0;
|
||||
}
|
8
src/jpeg.h
Normal file
8
src/jpeg.h
Normal file
|
@ -0,0 +1,8 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
int strinfo_jpeg_dim(int *jpeg_width, int *jpeg_height, int *jpeg_components,
|
||||
const char * restrict data, int data_size);
|
24
src/md5.c
Normal file
24
src/md5.c
Normal file
|
@ -0,0 +1,24 @@
|
|||
/*
|
||||
* Copyright (c) 2023, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <openssl/md5.h>
|
||||
|
||||
int
|
||||
strmd5(unsigned char **dst, int *dst_size,
|
||||
const unsigned char * restrict src, int src_size)
|
||||
{
|
||||
*dst_size = MD5_DIGEST_LENGTH;
|
||||
*dst = malloc(*dst_size);
|
||||
|
||||
if (*dst == NULL)
|
||||
return 1;
|
||||
|
||||
MD5(src, src_size, *dst);
|
||||
|
||||
return 0;
|
||||
}
|
9
src/md5.h
Normal file
9
src/md5.h
Normal file
|
@ -0,0 +1,9 @@
|
|||
/*
|
||||
* Copyright (c) 2023, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
int
|
||||
strmd5(unsigned char **dst, int *dst_size,
|
||||
const unsigned char * restrict src, int src_size);
|
22
src/melon.c
22
src/melon.c
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
@ -14,11 +14,8 @@
|
|||
#include "version.h"
|
||||
|
||||
int
|
||||
main(int argc, char **argv, char **envp)
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
printf("Melon " VERSION "." RELEASE "." PATCH EXTRA "\n");
|
||||
printf("Copyright (c) 2020, yzrh <yzrh@tuta.io>\n\n");
|
||||
|
||||
cnki_t *param = NULL;
|
||||
|
||||
if (cnki_create(¶m) != 0) {
|
||||
|
@ -83,27 +80,32 @@ main(int argc, char **argv, char **envp)
|
|||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
if (param->stat > 0)
|
||||
printf("Melon " VERSION "." RELEASE "." PATCH EXTRA "\n"
|
||||
"Copyright (c) 2020-2022, yzrh <yzrh@noema.org>\n\n");
|
||||
|
||||
cnki_info(¶m);
|
||||
|
||||
if (strcmp(param->file_stat->type, "%PDF") == 0) {
|
||||
if (strncmp(param->file_stat->type, "%PDF", 4) == 0) {
|
||||
if (cnki_pdf(¶m) != 0) {
|
||||
fprintf(stderr, "%s: %s\n", argv[0],
|
||||
strerror(errno));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
} else if (strcmp(param->file_stat->type, "CAJ") == 0) {
|
||||
} else if (strncmp(param->file_stat->type, "CAJ", 3) == 0) {
|
||||
if (cnki_caj(¶m) != 0) {
|
||||
fprintf(stderr, "%s: %s\n", argv[0],
|
||||
strerror(errno));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
} else if (strcmp(param->file_stat->type, "HN") == 0) {
|
||||
if (cnki_nh(¶m) != 0) {
|
||||
} else if (strncmp(param->file_stat->type, "HN", 2) == 0 ||
|
||||
(unsigned char) param->file_stat->type[0] == 0xc8) {
|
||||
if (cnki_hn(¶m) != 0) {
|
||||
fprintf(stderr, "%s: %s\n", argv[0],
|
||||
strerror(errno));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
} else if (strcmp(param->file_stat->type, "KDH ") == 0) {
|
||||
} else if (strncmp(param->file_stat->type, "KDH ", 4) == 0) {
|
||||
if (cnki_kdh(¶m) != 0) {
|
||||
fprintf(stderr, "%s: %s\n", argv[0],
|
||||
strerror(errno));
|
||||
|
|
86
src/pdf.c
86
src/pdf.c
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
@ -67,7 +67,8 @@ int
|
|||
pdf_obj_add(pdf_object_t **pdf, int id,
|
||||
const char * restrict object,
|
||||
const char * restrict dictionary,
|
||||
const char * restrict stream)
|
||||
const char * restrict stream,
|
||||
int stream_size)
|
||||
{
|
||||
if (*pdf != NULL || id <= 0 ||
|
||||
(object != NULL && dictionary != NULL))
|
||||
|
@ -84,24 +85,24 @@ pdf_obj_add(pdf_object_t **pdf, int id,
|
|||
(*pdf)->id = id;
|
||||
|
||||
if (dictionary != NULL) {
|
||||
(*pdf)->dictionary_size = strlen(dictionary) + 1;
|
||||
(*pdf)->dictionary_size = strlen(dictionary);
|
||||
(*pdf)->dictionary = malloc((*pdf)->dictionary_size);
|
||||
|
||||
if ((*pdf)->dictionary == NULL)
|
||||
return 1;
|
||||
|
||||
strncpy((*pdf)->dictionary, dictionary, (*pdf)->dictionary_size);
|
||||
memcpy((*pdf)->dictionary, dictionary, (*pdf)->dictionary_size);
|
||||
|
||||
(*pdf)->object_size = 0;
|
||||
(*pdf)->object = NULL;
|
||||
} else if (object != NULL) {
|
||||
(*pdf)->object_size = strlen(object) + 1;
|
||||
(*pdf)->object_size = strlen(object);
|
||||
(*pdf)->object = malloc((*pdf)->object_size);
|
||||
|
||||
if ((*pdf)->object == NULL)
|
||||
return 1;
|
||||
|
||||
strncpy((*pdf)->object, object, (*pdf)->object_size);
|
||||
memcpy((*pdf)->object, object, (*pdf)->object_size);
|
||||
|
||||
(*pdf)->dictionary_size = 0;
|
||||
(*pdf)->dictionary = NULL;
|
||||
|
@ -112,14 +113,15 @@ pdf_obj_add(pdf_object_t **pdf, int id,
|
|||
(*pdf)->dictionary = NULL;
|
||||
}
|
||||
|
||||
if (stream != NULL) {
|
||||
(*pdf)->stream_size = sizeof(stream);
|
||||
if (stream != NULL && stream_size > 0) {
|
||||
(*pdf)->stream_size = stream_size + 1;
|
||||
(*pdf)->stream = malloc((*pdf)->stream_size);
|
||||
|
||||
if ((*pdf)->stream == NULL)
|
||||
return 1;
|
||||
|
||||
memcpy((*pdf)->stream, stream, (*pdf)->stream_size);
|
||||
memcpy((*pdf)->stream, stream, stream_size);
|
||||
(*pdf)->stream[(*pdf)->stream_size - 1] = '\n';
|
||||
} else {
|
||||
(*pdf)->stream_size = 0;
|
||||
(*pdf)->stream = NULL;
|
||||
|
@ -153,7 +155,8 @@ int
|
|||
pdf_obj_prepend(pdf_object_t **pdf, int id,
|
||||
const char * restrict object,
|
||||
const char * restrict dictionary,
|
||||
const char * restrict stream)
|
||||
const char * restrict stream,
|
||||
int stream_size)
|
||||
{
|
||||
if (*pdf == NULL)
|
||||
return 1;
|
||||
|
@ -163,7 +166,8 @@ pdf_obj_prepend(pdf_object_t **pdf, int id,
|
|||
|
||||
pdf_object_t *ptr = NULL;
|
||||
|
||||
if (pdf_obj_add(&ptr, id, object, dictionary, stream) != 0) {
|
||||
if (pdf_obj_add(&ptr, id, object, dictionary,
|
||||
stream, stream_size) != 0) {
|
||||
free(ptr);
|
||||
return 1;
|
||||
}
|
||||
|
@ -178,7 +182,8 @@ int
|
|||
pdf_obj_append(pdf_object_t **pdf, int id,
|
||||
const char * restrict object,
|
||||
const char * restrict dictionary,
|
||||
const char * restrict stream)
|
||||
const char * restrict stream,
|
||||
int stream_size)
|
||||
{
|
||||
if (*pdf == NULL)
|
||||
return 1;
|
||||
|
@ -190,12 +195,67 @@ pdf_obj_append(pdf_object_t **pdf, int id,
|
|||
while (ptr->next != NULL)
|
||||
ptr = ptr->next;
|
||||
|
||||
if (pdf_obj_add(&ptr->next, id, object, dictionary, stream) != 0)
|
||||
if (pdf_obj_add(&ptr->next, id, object, dictionary,
|
||||
stream, stream_size) != 0)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
pdf_obj_replace(pdf_object_t **pdf, int id,
|
||||
const char * restrict object,
|
||||
const char * restrict dictionary,
|
||||
const char * restrict stream,
|
||||
int stream_size)
|
||||
{
|
||||
pdf_object_t *ptr;
|
||||
char *ret;
|
||||
|
||||
if (pdf_get_obj(pdf, id, &ptr) != 0)
|
||||
return 1;
|
||||
|
||||
if (object != NULL && dictionary != NULL)
|
||||
return 1;
|
||||
|
||||
if (dictionary != NULL) {
|
||||
ret = realloc(ptr->dictionary, strlen(dictionary));
|
||||
|
||||
if (ret == NULL)
|
||||
return 1;
|
||||
|
||||
ptr->dictionary_size = strlen(dictionary);
|
||||
ptr->dictionary = ret;
|
||||
|
||||
memcpy(ptr->dictionary, dictionary, ptr->dictionary_size);
|
||||
} else if (object != NULL) {
|
||||
ret = realloc(ptr->object, strlen(object));
|
||||
|
||||
if (ret == NULL)
|
||||
return 1;
|
||||
|
||||
ptr->object_size = strlen(object);
|
||||
ptr->object = ret;
|
||||
|
||||
memcpy(ptr->object, object, ptr->object_size);
|
||||
}
|
||||
|
||||
if (stream != NULL && stream_size > 0) {
|
||||
ret = realloc(ptr->stream, stream_size + 1);
|
||||
|
||||
if (ret == NULL)
|
||||
return 1;
|
||||
|
||||
ptr->stream_size = stream_size + 1;
|
||||
ptr->stream = ret;
|
||||
|
||||
memcpy(ptr->stream, stream, stream_size);
|
||||
ptr->stream[ptr->stream_size - 1] = '\n';
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
pdf_obj_sort(pdf_object_t **pdf)
|
||||
{
|
||||
|
|
19
src/pdf.h
19
src/pdf.h
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
@ -21,23 +21,29 @@ typedef struct _pdf_object_t {
|
|||
|
||||
/* pdf.c */
|
||||
/* TODO: Rewrite object dictionary */
|
||||
/* TODO: Compact object id */
|
||||
/* TODO: `mutool clean -gggsz' */
|
||||
int pdf_obj_create(pdf_object_t **pdf);
|
||||
void pdf_obj_destroy(pdf_object_t **pdf);
|
||||
int pdf_obj_add(pdf_object_t **pdf, int id,
|
||||
const char * restrict object,
|
||||
const char * restrict dictionary,
|
||||
const char * restrict stream);
|
||||
const char * restrict stream,
|
||||
int stream_size);
|
||||
int pdf_obj_del(pdf_object_t **pdf, int id);
|
||||
int pdf_obj_prepend(pdf_object_t **pdf, int id,
|
||||
const char * restrict object,
|
||||
const char * restrict dictionary,
|
||||
const char * restrict stream);
|
||||
const char * restrict stream,
|
||||
int stream_size);
|
||||
int pdf_obj_append(pdf_object_t **pdf, int id,
|
||||
const char * restrict object,
|
||||
const char * restrict dictionary,
|
||||
const char * restrict stream);
|
||||
const char * restrict stream,
|
||||
int stream_size);
|
||||
int pdf_obj_replace(pdf_object_t **pdf, int id,
|
||||
const char * restrict object,
|
||||
const char * restrict dictionary,
|
||||
const char * restrict stream,
|
||||
int stream_size);
|
||||
int pdf_obj_sort(pdf_object_t **pdf);
|
||||
|
||||
/* pdf_parser.c */
|
||||
|
@ -56,6 +62,7 @@ int pdf_get_size(pdf_object_t **pdf);
|
|||
int pdf_get_free_id(pdf_object_t **pdf);
|
||||
int pdf_get_free_ids(pdf_object_t **pdf, int **ids, int count);
|
||||
int pdf_get_catalog_id(pdf_object_t **pdf);
|
||||
int pdf_get_xref_id(pdf_object_t **pdf);
|
||||
int pdf_get_parent_id(pdf_object_t **pdf, int **id);
|
||||
int pdf_get_kid_id(pdf_object_t **pdf, int id, int **kid);
|
||||
int pdf_get_kid_count(pdf_object_t **pdf, int id);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
@ -42,7 +42,7 @@ _outline(pdf_object_t **pdf, object_outline_tree_t **outline_tree, int id, int *
|
|||
|
||||
strcat(dictionary, "<<\n");
|
||||
|
||||
size = 512;
|
||||
size = 514;
|
||||
str = NULL;
|
||||
|
||||
if (strconv(&str, "UTF-16BE",
|
||||
|
@ -50,7 +50,7 @@ _outline(pdf_object_t **pdf, object_outline_tree_t **outline_tree, int id, int *
|
|||
&size) == 0) {
|
||||
strcat(dictionary, "/Title <feff");
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
for (int i = 0; i < size - 2; i++) {
|
||||
snprintf(buf, 64, "%02x", (unsigned char) str[i]);
|
||||
strcat(dictionary, buf);
|
||||
}
|
||||
|
@ -89,11 +89,11 @@ _outline(pdf_object_t **pdf, object_outline_tree_t **outline_tree, int id, int *
|
|||
}
|
||||
|
||||
/* Page starts from 0 */
|
||||
snprintf(buf, 64, "/Dest [%d /XYZ null null null]\n>>\n",
|
||||
snprintf(buf, 64, "/Dest [%d /XYZ null null null]\n>>",
|
||||
atoi(ptr->item->page) - 1);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
pdf_obj_append(pdf, ptr->id, NULL, dictionary, NULL);
|
||||
pdf_obj_append(pdf, ptr->id, NULL, dictionary, NULL, 0);
|
||||
|
||||
if (ptr->left == NULL)
|
||||
(*stat)[1] = ptr->id;
|
||||
|
@ -106,6 +106,26 @@ _outline(pdf_object_t **pdf, object_outline_tree_t **outline_tree, int id, int *
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
_outline_free(object_outline_tree_t **outline_tree)
|
||||
{
|
||||
object_outline_tree_t *ptr = *outline_tree;
|
||||
for (;;) {
|
||||
if (ptr->right != NULL)
|
||||
_outline_free(&ptr->right);
|
||||
|
||||
if (ptr->left != NULL) {
|
||||
ptr = ptr->left;
|
||||
free(ptr->up);
|
||||
} else {
|
||||
free(ptr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
pdf_cnki_outline(pdf_object_t **pdf, object_outline_t **outline, int **ids)
|
||||
{
|
||||
|
@ -119,16 +139,15 @@ pdf_cnki_outline(pdf_object_t **pdf, object_outline_t **outline, int **ids)
|
|||
int *ret;
|
||||
|
||||
_outline(pdf, &outline_tree->left, outline_tree->id, &ret);
|
||||
|
||||
free(outline_tree);
|
||||
_outline_free(&outline_tree);
|
||||
|
||||
snprintf(buf, 128,
|
||||
"<<\n/Type Outlines\n/First %d 0 R\n/Last %d 0 R\n/Count %d\n>>\n",
|
||||
"<<\n/Type Outlines\n/First %d 0 R\n/Last %d 0 R\n/Count %d\n>>",
|
||||
ret[0], ret[1], ret[2]);
|
||||
|
||||
free(ret);
|
||||
|
||||
pdf_obj_append(pdf, (*ids)[0], NULL, buf, NULL);
|
||||
pdf_obj_append(pdf, (*ids)[0], NULL, buf, NULL, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
|
|
@ -1,9 +1,15 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#ifdef __linux__
|
||||
|
||||
#define _GNU_SOURCE
|
||||
|
||||
#endif /* __linux__ */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
|
@ -83,7 +89,7 @@ pdf_get_free_id(pdf_object_t **pdf)
|
|||
|
||||
int id = 0;
|
||||
|
||||
for (int i = 1; i < 99999999; i++) {
|
||||
for (int i = 1; i < 100000000; i++) {
|
||||
ptr = (*pdf)->next;
|
||||
while (ptr != NULL) {
|
||||
if (ptr->id == i) {
|
||||
|
@ -117,7 +123,7 @@ pdf_get_free_ids(pdf_object_t **pdf, int **ids, int count)
|
|||
int id = 0;
|
||||
|
||||
pdf_object_t *ptr;
|
||||
for (int i = 1; i < 99999999; i++) {
|
||||
for (int i = 1; i < 100000000; i++) {
|
||||
ptr = (*pdf)->next;
|
||||
while (ptr != NULL) {
|
||||
if (ptr->id == i) {
|
||||
|
@ -130,7 +136,7 @@ pdf_get_free_ids(pdf_object_t **pdf, int **ids, int count)
|
|||
if (i != id) {
|
||||
(*ids)[pos] = i;
|
||||
|
||||
if (pos == count)
|
||||
if (pos == count - 1)
|
||||
return 0;
|
||||
|
||||
pos++;
|
||||
|
@ -152,7 +158,8 @@ pdf_get_catalog_id(pdf_object_t **pdf)
|
|||
|
||||
while (ptr != NULL) {
|
||||
if (ptr->dictionary != NULL &&
|
||||
strstr(ptr->dictionary, "/Catalog") != NULL)
|
||||
memmem(ptr->dictionary, ptr->dictionary_size,
|
||||
"/Catalog", 8) != NULL)
|
||||
catalog_id = ptr->id;
|
||||
|
||||
ptr = ptr->next;
|
||||
|
@ -161,6 +168,28 @@ pdf_get_catalog_id(pdf_object_t **pdf)
|
|||
return catalog_id;
|
||||
}
|
||||
|
||||
int
|
||||
pdf_get_xref_id(pdf_object_t **pdf)
|
||||
{
|
||||
if (*pdf == NULL)
|
||||
return 1;
|
||||
|
||||
int xref_id = 0;
|
||||
|
||||
pdf_object_t *ptr = (*pdf)->next;
|
||||
|
||||
while (ptr != NULL) {
|
||||
if (ptr->dictionary != NULL &&
|
||||
memmem(ptr->dictionary, ptr->dictionary_size,
|
||||
"/XRef", 5) != NULL)
|
||||
xref_id = ptr->id;
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
return xref_id;
|
||||
}
|
||||
|
||||
int
|
||||
pdf_get_parent_id(pdf_object_t **pdf, int **id)
|
||||
{
|
||||
|
@ -187,10 +216,11 @@ pdf_get_parent_id(pdf_object_t **pdf, int **id)
|
|||
|
||||
while (ptr != NULL) {
|
||||
if (ptr->dictionary != NULL &&
|
||||
(head = strstr(ptr->dictionary, "/Parent ")) != NULL &&
|
||||
(head = memmem(ptr->dictionary, ptr->dictionary_size,
|
||||
"/Parent ", 8)) != NULL &&
|
||||
(tail = strchr(head + 8, ' ')) != NULL) {
|
||||
memset(str, 0, 8);
|
||||
strncpy(str, head + 8, (tail - head) - 8);
|
||||
memcpy(str, head + 8, (tail - head) - 8);
|
||||
str_val = atoi(str);
|
||||
|
||||
if (!_id_in(str_val, *id)) {
|
||||
|
@ -237,7 +267,8 @@ pdf_get_kid_id(pdf_object_t **pdf, int id, int **kid)
|
|||
}
|
||||
|
||||
if (ptr->dictionary != NULL &&
|
||||
strstr(ptr->dictionary, str) != NULL) {
|
||||
memmem(ptr->dictionary, ptr->dictionary_size,
|
||||
str, strlen(str)) != NULL) {
|
||||
ret = realloc(*kid, ++kid_size * sizeof(int));
|
||||
|
||||
if (ret == NULL)
|
||||
|
@ -276,13 +307,15 @@ pdf_get_kid_count(pdf_object_t **pdf, int id)
|
|||
|
||||
while (ptr != NULL) {
|
||||
if (ptr->dictionary != NULL &&
|
||||
strstr(ptr->dictionary, id_str) != NULL &&
|
||||
(pos = strstr(ptr->dictionary, "/Count ")) != NULL) {
|
||||
memmem(ptr->dictionary, ptr->dictionary_size,
|
||||
id_str, strlen(id_str)) != NULL &&
|
||||
(pos = memmem(ptr->dictionary, ptr->dictionary_size,
|
||||
"/Count ", 7)) != NULL) {
|
||||
for (int i = 8; i >= 0; i--) {
|
||||
if (i + 7 <= ptr->dictionary_size - (pos - ptr->dictionary) &&
|
||||
pos[i + 7] >= '0' && pos[i + 7] <= '9') {
|
||||
memset(str, 0, 8);
|
||||
strncpy(str, pos + 7, i + 1);
|
||||
memcpy(str, pos + 7, i + 1);
|
||||
str_val = atoi(str);
|
||||
count += str_val;
|
||||
break;
|
||||
|
|
194
src/pdf_parser.c
194
src/pdf_parser.c
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
* Copyright (c) 2020-2023, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
@ -19,26 +19,35 @@ static void *
|
|||
_memmem_whitespace(const void *p0, size_t s0, const void *p1, size_t s1)
|
||||
{
|
||||
const char whitespace[6] = {
|
||||
'\r',
|
||||
'\n',
|
||||
'\f',
|
||||
'\t',
|
||||
'\0',
|
||||
' '
|
||||
0x00,
|
||||
0x09,
|
||||
0x0a,
|
||||
0x0c,
|
||||
0x0d,
|
||||
0x20
|
||||
};
|
||||
|
||||
char tmp[s1 + 1];
|
||||
memcpy(tmp, p1, s1);
|
||||
char *ret = NULL;
|
||||
|
||||
char *ret;
|
||||
char str[s1 + 1];
|
||||
memcpy(str, p1, s1);
|
||||
|
||||
size_t tmp_size = 0;
|
||||
char *tmp;
|
||||
|
||||
for (int i = 0; i < 6; i++) {
|
||||
tmp[s1] = whitespace[i];
|
||||
if((ret = memmem(p0, s0, tmp, s1 + 1)) != NULL)
|
||||
return ret;
|
||||
str[s1] = whitespace[i];
|
||||
|
||||
if ((tmp = memmem(p0, s0, str, s1 + 1)) == NULL)
|
||||
continue;
|
||||
|
||||
if (tmp_size == 0 || (size_t) (tmp - (char *) p0) < tmp_size) {
|
||||
tmp_size = tmp - (char *) p0;
|
||||
ret = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -57,23 +66,45 @@ _locate(pdf_object_t **pdf, FILE **fp, int size_buf)
|
|||
end = ftell(*fp);
|
||||
fseek(*fp, cur, SEEK_SET);
|
||||
|
||||
int head = 0;
|
||||
int tail = 0;
|
||||
long head = 0;
|
||||
long tail = 0;
|
||||
char *pos;
|
||||
char *tmp;
|
||||
|
||||
for (;;) {
|
||||
fread(buf, size_buf, 1, *fp);
|
||||
if (cur + size_buf < end) {
|
||||
fread(buf, size_buf, 1, *fp);
|
||||
} else {
|
||||
fread(buf, end - cur, 1, *fp);
|
||||
memset(buf + end - cur, 0, size_buf - end + cur);
|
||||
}
|
||||
|
||||
if (head == 0 && (pos = _memmem_whitespace(buf, size_buf, " 0 obj", 6)) != NULL)
|
||||
head = cur + (pos - buf) + 7;
|
||||
if (head == 0) {
|
||||
/* Hack needed for invalid object */
|
||||
pos = _memmem_whitespace(buf, size_buf, " 0 obj", 6);
|
||||
tmp = memmem(buf, size_buf, " 0 obj", 6);
|
||||
|
||||
while (tmp != NULL && tmp[6] != 0x3c && tmp[6] != 0x5b)
|
||||
tmp = memmem(tmp + 6, size_buf - (tmp - buf) - 6, " 0 obj", 6);
|
||||
|
||||
if (pos != NULL && tmp != NULL) {
|
||||
if (pos - buf < tmp - buf)
|
||||
head = cur + (pos - buf) + 7;
|
||||
else
|
||||
head = cur + (tmp - buf) + 6;
|
||||
} else if (pos != NULL) {
|
||||
head = cur + (pos - buf) + 7;
|
||||
} else if (tmp != NULL) {
|
||||
head = cur + (tmp - buf) + 6;
|
||||
}
|
||||
}
|
||||
|
||||
if (tail == 0 && (pos = _memmem_whitespace(buf, size_buf, "endobj", 6)) != NULL) {
|
||||
/* We need to check if it is the object stored in stream */
|
||||
while (memcmp(pos + 7,
|
||||
"\r\nendstream", 11) == 0 &&
|
||||
(tmp = _memmem_whitespace(pos + 6,
|
||||
size_buf - (pos - buf) - 6,
|
||||
(tmp = _memmem_whitespace(pos + 7,
|
||||
size_buf - (pos - buf) - 7,
|
||||
"endobj", 6)) != NULL)
|
||||
pos = tmp;
|
||||
|
||||
|
@ -102,13 +133,17 @@ _locate(pdf_object_t **pdf, FILE **fp, int size_buf)
|
|||
ptr->address = head;
|
||||
ptr->size = tail - head;
|
||||
|
||||
fseek(*fp, tail + 6, SEEK_SET);
|
||||
fseek(*fp, tail + 7, SEEK_SET);
|
||||
head = tail = 0;
|
||||
} else if (head > 0 && tail > 0) {
|
||||
if (cur + size_buf < end)
|
||||
fseek(*fp, head, SEEK_SET);
|
||||
tail = 0;
|
||||
} else {
|
||||
fseek(*fp, -6, SEEK_CUR);
|
||||
fseek(*fp, -7, SEEK_CUR);
|
||||
}
|
||||
|
||||
if ((cur = ftell(*fp)) + 6 >= end)
|
||||
if ((cur = ftell(*fp)) + 7 >= end)
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -126,6 +161,7 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
|
|||
|
||||
pdf_object_t *ptr = (*pdf)->next;
|
||||
|
||||
char str[8];
|
||||
char *buf;
|
||||
char *head;
|
||||
char *tail;
|
||||
|
@ -137,28 +173,86 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
|
|||
if (buf == NULL)
|
||||
return 1;
|
||||
|
||||
memset(buf, 0, ptr->size);
|
||||
fseek(*fp, ptr->address, SEEK_SET);
|
||||
fread(buf, ptr->size, 1, *fp);
|
||||
|
||||
fseek(*fp, ptr->address - 12, SEEK_SET);
|
||||
fread(buf, 8, 1, *fp);
|
||||
/* Handle incomplete object */
|
||||
head = buf;
|
||||
while ((tmp = _memmem_whitespace(head,
|
||||
ptr->size - (head - buf),
|
||||
" 0 obj", 6)) != NULL)
|
||||
head = tmp + 7;
|
||||
|
||||
/* Hack needed for invalid object */
|
||||
while ((tmp = memmem(head,
|
||||
ptr->size - (head - buf),
|
||||
" 0 obj", 6)) != NULL)
|
||||
head = tmp + 6;
|
||||
|
||||
if (head - buf > 0) {
|
||||
ptr->address += head - buf;
|
||||
ptr->size -= head - buf;
|
||||
|
||||
tmp = realloc(buf, ptr->size);
|
||||
|
||||
if (tmp == NULL)
|
||||
return 1;
|
||||
|
||||
buf = tmp;
|
||||
|
||||
fseek(*fp, ptr->address, SEEK_SET);
|
||||
fread(buf, ptr->size, 1, *fp);
|
||||
}
|
||||
|
||||
/* Hack needed for invalid object */
|
||||
fseek(*fp, ptr->address - 14, SEEK_SET);
|
||||
fread(str, 8, 1, *fp);
|
||||
|
||||
if (str[7] < '0' || str[7] > '9') {
|
||||
fseek(*fp, ptr->address - 15, SEEK_SET);
|
||||
fread(str, 8, 1, *fp);
|
||||
}
|
||||
|
||||
for (int i = 7; i >= 0; i--) {
|
||||
if (str[i] < '0' || str[i] > '9') {
|
||||
if (i < 7)
|
||||
ptr->id = atoi(str + i + 1);
|
||||
else
|
||||
ptr->id = 0;
|
||||
|
||||
for (int i = 0; i < 8; i++) {
|
||||
if (buf[i] >= '0' && buf[i] <= '9') {
|
||||
ptr->id = atoi(buf + i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fseek(*fp, ptr->address, SEEK_SET);
|
||||
fread(buf, ptr->size, 1, *fp);
|
||||
|
||||
if ((head = memmem(buf, ptr->size, "<<", 2)) != NULL &&
|
||||
(tail = _memmem_whitespace(buf, ptr->size, ">>", 2)) != NULL) {
|
||||
/* A dictionary object may have nested dictionary */
|
||||
while ((tmp = _memmem_whitespace(tail + 2,
|
||||
ptr->size - (tail - buf) - 2,
|
||||
">>", 2)) != NULL)
|
||||
tail = tmp;
|
||||
((tail = _memmem_whitespace(buf, ptr->size, ">>", 2)) != NULL ||
|
||||
/* Hack needed for invalid object */
|
||||
(tail = memmem(buf, ptr->size, ">>", 2)) != NULL)) {
|
||||
if (memmem(buf, tail - buf, "stream\r\n", 8) != NULL) {
|
||||
tail = memmem(buf, ptr->size, ">>", 2);
|
||||
|
||||
while (ptr->size - (tail - buf) > 2 &&
|
||||
(tmp = memmem(tail + 2,
|
||||
ptr->size - (tail - buf) - 2,
|
||||
">>", 2)) != NULL &&
|
||||
memmem(tail + 2,
|
||||
(tmp - tail) - 2,
|
||||
"stream\r\n", 8) == NULL)
|
||||
tail = tmp;
|
||||
} else {
|
||||
/*
|
||||
* A dictionary object may have nested dictionary,
|
||||
* but it should not be in a stream
|
||||
*/
|
||||
while (ptr->size - (tail - buf) > 3 &&
|
||||
(tmp = _memmem_whitespace(tail + 3,
|
||||
ptr->size - (tail - buf) - 3,
|
||||
">>", 2)) != NULL &&
|
||||
memmem(tail + 3,
|
||||
(tmp - tail) - 3,
|
||||
"stream\r\n", 8) == NULL)
|
||||
tail = tmp;
|
||||
}
|
||||
|
||||
ptr->dictionary_size = tail - head + 2;
|
||||
ptr->dictionary = malloc(ptr->dictionary_size + 1);
|
||||
|
@ -166,8 +260,8 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
|
|||
if (ptr->dictionary == NULL)
|
||||
return 1;
|
||||
|
||||
memset(ptr->dictionary, 0, ptr->dictionary_size + 1);
|
||||
memcpy(ptr->dictionary, head, ptr->dictionary_size);
|
||||
memset(ptr->dictionary + ptr->dictionary_size, 0, 1);
|
||||
|
||||
if ((head = memmem(tail,
|
||||
ptr->size - (tail - buf),
|
||||
|
@ -180,11 +274,11 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
|
|||
* contains another object that
|
||||
* contains another stream
|
||||
*/
|
||||
while (_memmem_whitespace(tail,
|
||||
ptr->size - (tail - buf),
|
||||
while (_memmem_whitespace(tail + 10,
|
||||
ptr->size - (tail - buf) - 10,
|
||||
"endobj", 6) != NULL &&
|
||||
(tmp = _memmem_whitespace(tail + 9,
|
||||
ptr->size - (tail - buf) - 9,
|
||||
(tmp = _memmem_whitespace(tail + 10,
|
||||
ptr->size - (tail - buf) - 10,
|
||||
"endstream", 9)) != NULL)
|
||||
tail = tmp;
|
||||
|
||||
|
@ -196,19 +290,13 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
|
|||
|
||||
memcpy(ptr->stream, head + 8, ptr->stream_size);
|
||||
}
|
||||
|
||||
free(buf);
|
||||
} else {
|
||||
ptr->object_size = ptr->size;
|
||||
ptr->object = malloc(ptr->object_size + 1);
|
||||
|
||||
if (ptr->object == NULL)
|
||||
return 1;
|
||||
|
||||
memset(ptr->object, 0, ptr->object_size + 1);
|
||||
memcpy(ptr->object, buf, ptr->object_size);
|
||||
ptr->object = buf;
|
||||
}
|
||||
|
||||
free(buf);
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
* Copyright (c) 2020-2023, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
@ -8,14 +8,32 @@
|
|||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
#include <openssl/md5.h>
|
||||
|
||||
#include "version.h"
|
||||
#include "md5.h"
|
||||
#include "pdf.h"
|
||||
|
||||
static int
|
||||
_info_obj(pdf_object_t **pdf)
|
||||
{
|
||||
char dictionary[128] = "<<\n"
|
||||
"/Producer (Melon " VERSION "." RELEASE "." PATCH EXTRA ")\n"
|
||||
"/CreationDate (D:";
|
||||
|
||||
char buf[64];
|
||||
|
||||
time_t timestamp = time(NULL);
|
||||
|
||||
strftime(buf, 64, "%Y%m%d%H%M%S", gmtime(×tamp));
|
||||
strcat(dictionary, buf);
|
||||
strcat(dictionary, "+00'00')\n>>");
|
||||
|
||||
return pdf_obj_append(pdf, 0, NULL, dictionary, NULL, 0);
|
||||
}
|
||||
|
||||
int
|
||||
pdf_dump_obj(pdf_object_t **pdf, FILE **fp)
|
||||
{
|
||||
if (*pdf == NULL || *fp == NULL)
|
||||
if (*pdf == NULL || *fp == NULL || _info_obj(pdf) != 0)
|
||||
return 1;
|
||||
|
||||
long cur;
|
||||
|
@ -26,12 +44,15 @@ pdf_dump_obj(pdf_object_t **pdf, FILE **fp)
|
|||
|
||||
fprintf(*fp, "%d 0 obj\n", ptr->id);
|
||||
|
||||
if (ptr->dictionary != NULL)
|
||||
fputs(ptr->dictionary, *fp);
|
||||
else if (ptr->object != NULL)
|
||||
fputs(ptr->object, *fp);
|
||||
else if (ptr->stream == NULL)
|
||||
if (ptr->dictionary != NULL) {
|
||||
fwrite(ptr->dictionary, ptr->dictionary_size, 1, *fp);
|
||||
fputs("\n", *fp);
|
||||
} else if (ptr->object != NULL) {
|
||||
fwrite(ptr->object, ptr->object_size, 1, *fp);
|
||||
fputs("\n", *fp);
|
||||
} else if (ptr->stream == NULL) {
|
||||
fputs("null\n", *fp);
|
||||
}
|
||||
|
||||
if (ptr->stream != NULL) {
|
||||
fputs("stream\r\n", *fp);
|
||||
|
@ -135,37 +156,34 @@ pdf_dump_trailer(pdf_object_t **pdf, FILE **fp, int xref)
|
|||
int buf_size;
|
||||
char buf[64];
|
||||
|
||||
#ifdef __ILP32__
|
||||
buf_size = snprintf(buf, 64, "%x%x", timestamp, size);
|
||||
#else
|
||||
buf_size = snprintf(buf, 64, "%lx%x", timestamp, size);
|
||||
#endif
|
||||
|
||||
unsigned char str[64];
|
||||
memcpy(str, buf, 64);
|
||||
int fid_size;
|
||||
unsigned char *fid;
|
||||
|
||||
unsigned char fid[MD5_DIGEST_LENGTH];
|
||||
MD5(str, buf_size, fid);
|
||||
if (strmd5(&fid, &fid_size, (unsigned char *) buf, buf_size) != 0)
|
||||
return 1;
|
||||
|
||||
pdf_object_t *ptr = *pdf;
|
||||
while (ptr->next != NULL)
|
||||
ptr = ptr->next;
|
||||
|
||||
/*
|
||||
* TODO: Document information dictionary
|
||||
* `"/Producer (Melon)"'
|
||||
* `"/CreationDate (D:YYYYMMDDHHmmSS+00'00')"'
|
||||
*
|
||||
* Trailer dictionary
|
||||
* `"/Info %d 0 R"'
|
||||
*/
|
||||
fprintf(*fp,
|
||||
"/Size %d\n/Root %d 0 R\n",
|
||||
"/Size %d\n/Root %d 0 R\n/Info %d 0 R\n",
|
||||
ptr->id + 1,
|
||||
pdf_get_catalog_id(pdf));
|
||||
pdf_get_catalog_id(pdf),
|
||||
ptr->id);
|
||||
|
||||
fputs("/ID [", *fp);
|
||||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
fputs("<", *fp);
|
||||
|
||||
for (int j = 0; j < MD5_DIGEST_LENGTH; j++)
|
||||
for (int j = 0; j < fid_size; j++)
|
||||
fprintf(*fp, "%02x", fid[j]);
|
||||
|
||||
fputs(">", *fp);
|
||||
|
@ -184,5 +202,7 @@ pdf_dump_trailer(pdf_object_t **pdf, FILE **fp, int xref)
|
|||
|
||||
fputs("%%EOF\n", *fp);
|
||||
|
||||
free(fid);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
* Copyright (c) 2020-2023, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#define VERSION "0"
|
||||
#define RELEASE "1"
|
||||
#define RELEASE "3"
|
||||
#define PATCH "0"
|
||||
#define EXTRA ""
|
||||
|
|
53
src/zlib.c
Normal file
53
src/zlib.c
Normal file
|
@ -0,0 +1,53 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <zlib.h>
|
||||
|
||||
int
|
||||
strinflate(char **dst, int dst_size,
|
||||
const char * restrict src, int src_size)
|
||||
{
|
||||
*dst = malloc(dst_size);
|
||||
|
||||
if (*dst == NULL)
|
||||
return 1;
|
||||
|
||||
unsigned long size = dst_size;
|
||||
|
||||
if (uncompress((Bytef *) *dst,
|
||||
&size, (const Bytef *) src, src_size) != Z_OK) {
|
||||
free(*dst);
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
strdeflate(char **dst, int *dst_size,
|
||||
const char * restrict src, int src_size)
|
||||
{
|
||||
*dst_size = compressBound(src_size);
|
||||
*dst = malloc(*dst_size);
|
||||
|
||||
if (*dst == NULL)
|
||||
return 1;
|
||||
|
||||
unsigned long size = *dst_size;
|
||||
|
||||
if (compress((Bytef *) *dst, &size,
|
||||
(const Bytef *) src, src_size) != Z_OK) {
|
||||
free(*dst);
|
||||
return 1;
|
||||
}
|
||||
|
||||
*dst_size = size;
|
||||
|
||||
return 0;
|
||||
}
|
11
src/zlib.h
Normal file
11
src/zlib.h
Normal file
|
@ -0,0 +1,11 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
int strinflate(char **dst, int dst_size,
|
||||
const char * restrict src, int src_size);
|
||||
|
||||
int strdeflate(char **dst, int *dst_size,
|
||||
const char * restrict src, int src_size);
|
Loading…
Add table
Reference in a new issue