Fix HN conversion and add JBIG2 support.
Signed-off-by: yzrh <yzrh@noema.org>
This commit is contained in:
parent
ac3b1dda63
commit
9c1f1d0b75
19 changed files with 519 additions and 194 deletions
|
@ -1,10 +1,16 @@
|
|||
0.2.0 (2021-XX-XX)
|
||||
0.3.0 (2022-XX-XX)
|
||||
==================
|
||||
|
||||
* Support JPEG 2000 for HN.
|
||||
|
||||
0.2.0 (2022-12-22)
|
||||
==================
|
||||
|
||||
* KDH conversion now produces a valid PDF
|
||||
* Handle binary data in dictionary.
|
||||
* Add preliminary support for HN
|
||||
* Fix root object dictionary generation when root object has more than two children.
|
||||
* Fix memory leak and data type.
|
||||
|
||||
0.1.0 (2020-04-08)
|
||||
==================
|
||||
|
|
|
@ -6,10 +6,10 @@ Melon: Converter that produces PDF from CNKI proprietary formats
|
|||
Development
|
||||
-----------
|
||||
|
||||
Currently, CAJ and KDH can be converted. Please report
|
||||
Currently, CAJ, KDH, and HN can be converted. Please report
|
||||
any failures with a sample that can reproduce the behaviour.
|
||||
|
||||
HN support is being worked on.
|
||||
HN support does not support JPEG 2000 yet.
|
||||
|
||||
Dependency
|
||||
----------
|
||||
|
@ -17,7 +17,7 @@ Dependency
|
|||
1. OpenSSL
|
||||
2. libiconv
|
||||
3. zlib
|
||||
4. JBIG-KIT
|
||||
4. jbig2dec
|
||||
5. libjpeg-turbo
|
||||
|
||||
Usage
|
||||
|
@ -39,7 +39,7 @@ Specify output file
|
|||
Set buffer size (default 512k)
|
||||
|
||||
-v, --verbose
|
||||
Print more information (twice for even more)
|
||||
Print more information (twice for even more, three times for HN image decoding information as well)
|
||||
|
||||
Thanks
|
||||
======
|
||||
|
|
10
src/Makefile
10
src/Makefile
|
@ -4,19 +4,19 @@
|
|||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
src = melon.c iconv.c zlib.c jbig.c jpeg.c \
|
||||
src = melon.c iconv.c zlib.c jbig.c jbig2.c jpeg.c \
|
||||
cnki_caj.c cnki_hn.c cnki_kdh.c cnki_outline_tree.c \
|
||||
cnki_pdf.c cnki_xml.c cnki_zlib.c cnki_jbig.c cnki.c \
|
||||
cnki_pdf.c cnki_zlib.c cnki_jbig.c cnki_jbig2.c cnki.c \
|
||||
pdf_cnki.c pdf_get.c pdf_parser.c pdf_writer.c pdf.c
|
||||
inc = extern.h version.h iconv.h zlib.h jbig.h jpeg.h \
|
||||
inc = extern.h version.h iconv.h zlib.h jbig.h jbig2.h jpeg.h \
|
||||
cnki.h pdf_cnki.h cnki_jbig.h pdf.h
|
||||
|
||||
obj = ${src:.c=.o}
|
||||
|
||||
PREFIX = /usr/local
|
||||
|
||||
CFLAGS = -O2 -pipe -flto -Wall -Wextra -Wno-unused-parameter
|
||||
LDFLAGS = -Wl,-O2 -lcrypto -liconv -lz -ljbig -ljpeg -Wl,--as-needed
|
||||
CFLAGS = -O2 -pipe -flto -Wall -Wextra
|
||||
LDFLAGS = -Wl,-O2 -lcrypto -liconv -lz -ljbig2dec -ljpeg -Wl,--as-needed
|
||||
|
||||
CFLAGS += -I/usr/local/include
|
||||
LDFLAGS += -L/usr/local/lib
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
@ -97,5 +97,7 @@ int cnki_jbig(char **bitmap, int *bitmap_size,
|
|||
int *bitmap_width, int *bitmap_height,
|
||||
const char * restrict jbig, int jbig_size);
|
||||
|
||||
/* cnki_xml.c */
|
||||
int cnki_xml(char **xml, FILE **fp);
|
||||
/* cnki_jbig2.c */
|
||||
int cnki_jbig2(char **bitmap, int *bitmap_size,
|
||||
int *bitmap_width, int *bitmap_height,
|
||||
const char * restrict jbig, int jbig_size);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
@ -22,68 +22,22 @@ cnki_jbig(char **bitmap, int *bitmap_size,
|
|||
|
||||
memcpy(dib, jbig, 40);
|
||||
|
||||
bih_t *bih = malloc(sizeof(bih_t));
|
||||
int width_padded = (dib->width * dib->depth + 7) / 8;
|
||||
|
||||
if (bih == NULL) {
|
||||
*bitmap_size = dib->height * width_padded;
|
||||
*bitmap = malloc(*bitmap_size);
|
||||
|
||||
if (*bitmap == NULL) {
|
||||
free(dib);
|
||||
return 1;
|
||||
}
|
||||
|
||||
memset(bih, 0, sizeof(bih_t));
|
||||
strdec_jbig(bitmap, dib->width, dib->height, jbig + 48, jbig_size - 48);
|
||||
|
||||
bih->d_l = 0;
|
||||
bih->d = 0;
|
||||
|
||||
bih->p = 1;
|
||||
|
||||
bih->fill = 0;
|
||||
|
||||
bih->x_d = dib->width;
|
||||
bih->y_d = dib->height;
|
||||
bih->l_0 = bih->y_d / 35;
|
||||
|
||||
while (bih->l_0 > 128)
|
||||
bih->l_0--;
|
||||
if (bih->l_0 < 2)
|
||||
bih->l_0 = 2;
|
||||
|
||||
bih->m_x = 8;
|
||||
bih->m_y = 0;
|
||||
|
||||
bih->order |= 1 << 1;
|
||||
bih->order |= 1 << 0;
|
||||
|
||||
bih->options |= 1 << 4;
|
||||
bih->options |= 1 << 3;
|
||||
bih->options |= 1 << 2;
|
||||
|
||||
bih->dptable = NULL;
|
||||
|
||||
int bie_size = jbig_size - 28; /* - 40 - 8 + 20 */
|
||||
char *bie = malloc(bie_size);
|
||||
|
||||
if (bie == NULL) {
|
||||
free(dib);
|
||||
free(bih);
|
||||
return 1;
|
||||
}
|
||||
|
||||
memcpy(bie, bih, 20);
|
||||
memcpy(bie + 20, jbig + 48, jbig_size - 48);
|
||||
|
||||
int ret = strdec_jbig(bitmap, bitmap_size, bie, bie_size);
|
||||
|
||||
if (ret == 0) {
|
||||
*bitmap_width = bih->x_d;
|
||||
*bitmap_height = bih->y_d;
|
||||
}
|
||||
*bitmap_width = dib->width;
|
||||
*bitmap_height = dib->height;
|
||||
|
||||
free(dib);
|
||||
free(bih);
|
||||
free(bie);
|
||||
|
||||
if (ret != 0)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1,48 +1,11 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
/*
|
||||
* order (MSB first):
|
||||
* 0
|
||||
* 0
|
||||
* 0
|
||||
* 0
|
||||
* HITOLO
|
||||
* SEQ
|
||||
* ILEAVE (default)
|
||||
* SMID (default)
|
||||
*
|
||||
* options (MSB first):
|
||||
* 0
|
||||
* LRLTWO
|
||||
* VLENGTH
|
||||
* TPDON (default)
|
||||
* TPBON (default)
|
||||
* DPON (default)
|
||||
* DPPRIV
|
||||
* DPLAST
|
||||
*/
|
||||
typedef struct _bih_t {
|
||||
char d_l; /* Initial resolution layer */
|
||||
char d; /* Final resolution layer */
|
||||
char p; /* Number of bit-planes, for bi-level image, always 1 */
|
||||
char fill; /* Always 0 */
|
||||
/* MSB first */
|
||||
int32_t x_d; /* Horizontal dimension at highestresolution */
|
||||
int32_t y_d; /* Vertical dimension at highest resolution */
|
||||
int32_t l_0; /* Number of lines per stripe at lowest resolution */
|
||||
char m_x; /* Maximum horizontal offsets (default: 8) */
|
||||
char m_y; /* Maximum vertical offsets (default: 0) */
|
||||
char order;
|
||||
char options;
|
||||
char *dptable; /* 0 or 1728 */
|
||||
} bih_t;
|
||||
|
||||
typedef enum _dib_compression_code {
|
||||
BI_RGB,
|
||||
BI_RLE8,
|
||||
|
|
43
src/cnki_jbig2.c
Normal file
43
src/cnki_jbig2.c
Normal file
|
@ -0,0 +1,43 @@
|
|||
/*
|
||||
* Copyright (c) 2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "cnki_jbig.h"
|
||||
#include "jbig2.h"
|
||||
|
||||
int
|
||||
cnki_jbig2(char **bitmap, int *bitmap_size,
|
||||
int *bitmap_width, int *bitmap_height,
|
||||
const char * restrict jbig, int jbig_size)
|
||||
{
|
||||
dib_t *dib = malloc(sizeof(dib_t));
|
||||
|
||||
if (dib == NULL)
|
||||
return 1;
|
||||
|
||||
memcpy(dib, jbig, 40);
|
||||
|
||||
int width_padded = (dib->width * dib->depth + 7) / 8;
|
||||
|
||||
*bitmap_size = dib->height * width_padded;
|
||||
*bitmap = malloc(*bitmap_size);
|
||||
|
||||
if (*bitmap == NULL) {
|
||||
free(dib);
|
||||
return 1;
|
||||
}
|
||||
|
||||
strdec_jbig2(bitmap, jbig + 48, jbig_size - 48);
|
||||
|
||||
*bitmap_width = dib->width;
|
||||
*bitmap_height = dib->height;
|
||||
|
||||
free(dib);
|
||||
|
||||
return 0;
|
||||
}
|
135
src/cnki_pdf.c
135
src/cnki_pdf.c
|
@ -238,7 +238,7 @@ cnki_pdf(cnki_t **param)
|
|||
if ((*param)->stat > 1)
|
||||
printf("Generating object\n");
|
||||
|
||||
dictionary_size = 64 + 12 * kid[0];
|
||||
dictionary_size = 64 + 16 * kid[0];
|
||||
dictionary = malloc(dictionary_size);
|
||||
|
||||
if (dictionary == NULL) {
|
||||
|
@ -483,6 +483,9 @@ cnki_pdf_hn(cnki_t **param)
|
|||
int *ids = NULL;
|
||||
pdf_get_free_ids(&pdf, &ids, ptr->image_length + 3);
|
||||
|
||||
int bitmap_size;
|
||||
char *bitmap;
|
||||
|
||||
int stream_size;
|
||||
char *stream;
|
||||
|
||||
|
@ -493,32 +496,34 @@ cnki_pdf_hn(cnki_t **param)
|
|||
|
||||
if (dim == NULL) {
|
||||
free(root_kid);
|
||||
free(ids);
|
||||
return 1;
|
||||
}
|
||||
|
||||
for (int i = 0; i < ptr->image_length; i++) {
|
||||
dictionary_size = 128;
|
||||
dictionary_size = 256;
|
||||
dictionary = malloc(dictionary_size);
|
||||
|
||||
if (dictionary == NULL) {
|
||||
free(root_kid);
|
||||
free(ids);
|
||||
free(dim);
|
||||
return 1;
|
||||
}
|
||||
|
||||
for (int i = 0; i < ptr->image_length; i++) {
|
||||
memset(dictionary, 0, dictionary_size);
|
||||
|
||||
strcat(dictionary, "<<\n/Type /XObject\n"
|
||||
"/Subtype /Image\n");
|
||||
|
||||
if ((*param)->stat > 2)
|
||||
printf("\tDecoding data, page %04d item %02d... ",
|
||||
ptr->page, i);
|
||||
printf("\tDecoding data, page %04d item %02d format %d... ",
|
||||
ptr->page, i, ptr->image_data[i].format);
|
||||
|
||||
switch (ptr->image_data[i].format) {
|
||||
case JBIG:
|
||||
ret = cnki_jbig(&stream,
|
||||
&stream_size,
|
||||
ret = cnki_jbig(&bitmap,
|
||||
&bitmap_size,
|
||||
&wh[0],
|
||||
&wh[1],
|
||||
ptr->image_data[i].image,
|
||||
|
@ -530,18 +535,30 @@ cnki_pdf_hn(cnki_t **param)
|
|||
break;
|
||||
}
|
||||
|
||||
if (strdeflate(&stream, &stream_size,
|
||||
bitmap, bitmap_size) != 0) {
|
||||
free(root_kid);
|
||||
free(ids);
|
||||
free(dim);
|
||||
free(dictionary);
|
||||
return 1;
|
||||
}
|
||||
|
||||
free(bitmap);
|
||||
|
||||
snprintf(buf, 64, "/Width %d\n/Height %d\n",
|
||||
wh[0], wh[1]);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
strcat(dictionary, "/ColorSpace /DeviceGray\n"
|
||||
"/BitsPerComponent 1\n");
|
||||
strcat(dictionary, "/Decode [1.0 0.0]\n");
|
||||
|
||||
snprintf(buf, 64, "/Length %d\n",
|
||||
stream_size);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
strcat(dictionary, "/Filter /CCITTFaxDecode\n");
|
||||
strcat(dictionary, "/Filter /FlateDecode\n");
|
||||
|
||||
dim[i * 2] = wh[0];
|
||||
dim[i * 2 + 1] = wh[1];
|
||||
|
@ -562,9 +579,10 @@ cnki_pdf_hn(cnki_t **param)
|
|||
stream_size = ptr->image_data[i].size;
|
||||
stream = malloc(stream_size);
|
||||
if (stream == NULL) {
|
||||
free(dictionary);
|
||||
free(root_kid);
|
||||
free(ids);
|
||||
free(dim);
|
||||
free(dictionary);
|
||||
return 1;
|
||||
}
|
||||
memcpy(stream, ptr->image_data[i].image, stream_size);
|
||||
|
@ -573,7 +591,7 @@ cnki_pdf_hn(cnki_t **param)
|
|||
wh[0], wh[1]);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
strcat(dictionary, "/ColorSpace /DeviceRGB\n"
|
||||
strcat(dictionary, "/ColorSpace /DeviceGray\n"
|
||||
"/BitsPerComponent 8\n");
|
||||
|
||||
snprintf(buf, 64, "/Length %d\n",
|
||||
|
@ -586,6 +604,47 @@ cnki_pdf_hn(cnki_t **param)
|
|||
dim[i * 2 + 1] = wh[1];
|
||||
break;
|
||||
case JBIG2:
|
||||
ret = cnki_jbig2(&bitmap,
|
||||
&bitmap_size,
|
||||
&wh[0],
|
||||
&wh[1],
|
||||
ptr->image_data[i].image,
|
||||
ptr->image_data[i].size);
|
||||
|
||||
if (ret != 0) {
|
||||
dim[i * 2] = 0;
|
||||
dim[i * 2 + 1] = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
if (strdeflate(&stream, &stream_size,
|
||||
bitmap, bitmap_size) != 0) {
|
||||
free(root_kid);
|
||||
free(ids);
|
||||
free(dim);
|
||||
free(dictionary);
|
||||
return 1;
|
||||
}
|
||||
|
||||
free(bitmap);
|
||||
|
||||
snprintf(buf, 64, "/Width %d\n/Height %d\n",
|
||||
wh[0], wh[1]);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
strcat(dictionary, "/ColorSpace /DeviceGray\n"
|
||||
"/BitsPerComponent 1\n");
|
||||
strcat(dictionary, "/Decode [1.0 0.0]\n");
|
||||
|
||||
snprintf(buf, 64, "/Length %d\n",
|
||||
stream_size);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
strcat(dictionary, "/Filter /FlateDecode\n");
|
||||
|
||||
dim[i * 2] = wh[0];
|
||||
dim[i * 2 + 1] = wh[1];
|
||||
break;
|
||||
case JPX:
|
||||
default:
|
||||
ret = -1;
|
||||
|
@ -598,37 +657,26 @@ cnki_pdf_hn(cnki_t **param)
|
|||
|
||||
if (ret == 0) {
|
||||
if ((*param)->stat > 2)
|
||||
printf("Done\n");
|
||||
printf("%6d byte(s), width %4d, height %4d.\n",
|
||||
stream_size, wh[0], wh[1]);
|
||||
|
||||
pdf_obj_append(&pdf, ids[i],
|
||||
NULL, dictionary, stream, stream_size);
|
||||
|
||||
free(dictionary);
|
||||
free(stream);
|
||||
} else if (ret == 1) {
|
||||
if ((*param)->stat > 2)
|
||||
printf("Failed\n");
|
||||
|
||||
free(dictionary);
|
||||
printf("Not extracted.\n");
|
||||
|
||||
pdf_obj_append(&pdf, ids[i], NULL, NULL, NULL, 0);
|
||||
} else {
|
||||
if ((*param)->stat > 2)
|
||||
printf("Unsupported format\n");
|
||||
printf("Unsupported format.\n");
|
||||
|
||||
free(dictionary);
|
||||
pdf_obj_append(&pdf, ids[i], NULL, NULL, NULL, 0);
|
||||
}
|
||||
}
|
||||
|
||||
dictionary_size = 128;
|
||||
dictionary = malloc(dictionary_size);
|
||||
|
||||
if (dictionary == NULL) {
|
||||
free(root_kid);
|
||||
free(dim);
|
||||
return 1;
|
||||
}
|
||||
|
||||
memset(dictionary, 0, dictionary_size);
|
||||
|
||||
strcat(dictionary, "<<\n/XObject <<");
|
||||
|
@ -655,11 +703,12 @@ cnki_pdf_hn(cnki_t **param)
|
|||
if (strncmp(ptr->text + 8, "COMPRESSTEXT", 12) == 0) {
|
||||
cnki_zlib(&stream, &stream_size, ptr->text, ptr->text_size);
|
||||
|
||||
dictionary_size = stream_size / 8 + 7;
|
||||
dictionary_size = 64 + 2 * stream_size;
|
||||
dictionary = malloc(dictionary_size);
|
||||
|
||||
if (dictionary == NULL) {
|
||||
free(root_kid);
|
||||
free(ids);
|
||||
free(dim);
|
||||
return 1;
|
||||
}
|
||||
|
@ -688,11 +737,12 @@ cnki_pdf_hn(cnki_t **param)
|
|||
|
||||
strcat(dictionary, ">");
|
||||
} else {
|
||||
dictionary_size = ptr->text_size;
|
||||
dictionary_size = 64 + 2 * ptr->text_size;
|
||||
dictionary = malloc(dictionary_size);
|
||||
|
||||
if (dictionary == NULL) {
|
||||
free(root_kid);
|
||||
free(ids);
|
||||
free(dim);
|
||||
return 1;
|
||||
}
|
||||
|
@ -724,11 +774,12 @@ cnki_pdf_hn(cnki_t **param)
|
|||
/* FIXME: Use the text somehow? */
|
||||
free(dictionary);
|
||||
|
||||
dictionary_size = 64 + 12 * ptr->image_length;
|
||||
dictionary_size = 64 + 64 * ptr->image_length;
|
||||
dictionary = malloc(dictionary_size);
|
||||
|
||||
if (dictionary == NULL) {
|
||||
free(root_kid);
|
||||
free(ids);
|
||||
free(dim);
|
||||
return 1;
|
||||
}
|
||||
|
@ -739,12 +790,27 @@ cnki_pdf_hn(cnki_t **param)
|
|||
|
||||
strcat(dictionary, "0.25 0 0 0.25 0 0 cm\n");
|
||||
|
||||
double resize_x;
|
||||
double resize_y;
|
||||
|
||||
for (int i = 0; i < ptr->image_length; i++) {
|
||||
if (dim[i * 2] <= 0 || dim[i * 2 + 1] <= 0)
|
||||
continue;
|
||||
|
||||
/* Scale within bound of A4 paper */
|
||||
resize_x = 595.276 * 4 / dim[i * 2];
|
||||
resize_y = 841.89 * 4 / dim[i * 2 + 1];
|
||||
|
||||
if (resize_y < resize_x)
|
||||
snprintf(buf, 64, "%f 0 0 %f 0 0 cm\n",
|
||||
resize_y, resize_y);
|
||||
else
|
||||
snprintf(buf, 64, "%f 0 0 %f 0 0 cm\n",
|
||||
resize_x, resize_x);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
/* Apply transformation matrix */
|
||||
if (ptr->image_data[i].format == DCT_1) {
|
||||
if (ptr->image_data[i].format == JBIG || ptr->image_data[i].format == DCT_1) {
|
||||
snprintf(buf, 64, "1 0 0 1 0 %d cm\n",
|
||||
dim[i * 2 + 1]);
|
||||
strcat(dictionary, buf);
|
||||
|
@ -763,9 +829,10 @@ cnki_pdf_hn(cnki_t **param)
|
|||
strcat(dictionary, "Q");
|
||||
|
||||
if (strdeflate(&stream, &stream_size, dictionary, strlen(dictionary)) != 0) {
|
||||
free(dictionary);
|
||||
free(root_kid);
|
||||
free(ids);
|
||||
free(dim);
|
||||
free(dictionary);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -796,7 +863,7 @@ cnki_pdf_hn(cnki_t **param)
|
|||
strcat(dictionary, buf);
|
||||
|
||||
/* A4 paper */
|
||||
strcat(dictionary, "/MediaBox [ 0 0 595.276 841.89 ]\n");
|
||||
strcat(dictionary, "/MediaBox [0 0 595.276 841.89]\n");
|
||||
|
||||
/* Add /Parent when we know root */
|
||||
pdf_obj_append(&pdf, ids[ptr->image_length + 2], NULL, dictionary, NULL, 0);
|
||||
|
@ -838,7 +905,7 @@ cnki_pdf_hn(cnki_t **param)
|
|||
if ((*param)->stat > 1)
|
||||
printf("Generating root object\n");
|
||||
|
||||
dictionary_size = 64 + 12 * (*param)->file_stat->page;
|
||||
dictionary_size = 64 + 64 * (*param)->file_stat->page;
|
||||
dictionary = malloc(dictionary_size);
|
||||
|
||||
if (dictionary == NULL) {
|
||||
|
@ -877,7 +944,7 @@ cnki_pdf_hn(cnki_t **param)
|
|||
|
||||
free(dictionary);
|
||||
|
||||
dictionary_size = 128;
|
||||
dictionary_size = 256;
|
||||
dictionary = malloc(dictionary_size);
|
||||
|
||||
if (dictionary == NULL) {
|
||||
|
|
|
@ -1,14 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
int
|
||||
cnki_xml(char **xml, FILE **fp)
|
||||
{
|
||||
/* TODO: Extract XML and embed into `/Metadata' */
|
||||
return 1;
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
@ -18,7 +18,7 @@ cnki_zlib(char **dst, int *dst_size,
|
|||
|
||||
*dst_size = size;
|
||||
|
||||
if (strinflate(dst, size, src + 24, size - 24) != 0)
|
||||
if (strinflate(dst, size, src + 24, src_size - 24) != 0)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
|
|
304
src/jbig.c
304
src/jbig.c
|
@ -1,41 +1,303 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdio.h> /* FIXME: test */
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <jbig.h>
|
||||
static const uint16_t _LSZ[256] = {
|
||||
0x5a1d,
|
||||
0x2586, 0x1114, 0x080b, 0x03d8, 0x01da, 0x00e5, 0x006f, 0x0036,
|
||||
0x001a, 0x000d, 0x0006, 0x0003, 0x0001, 0x5a7f, 0x3f25, 0x2cf2,
|
||||
0x207c, 0x17b9, 0x1182, 0x0cef, 0x09a1, 0x072f, 0x055c, 0x0406,
|
||||
0x0303, 0x0240, 0x01b1, 0x0144, 0x00f5, 0x00b7, 0x008a, 0x0068,
|
||||
0x004e, 0x003b, 0x002c, 0x5ae1, 0x484c, 0x3a0d, 0x2ef1, 0x261f,
|
||||
0x1f33, 0x19a8, 0x1518, 0x1177, 0x0e74, 0x0bfb, 0x09f8, 0x0861,
|
||||
0x0706, 0x05cd, 0x04de, 0x040f, 0x0363, 0x02d4, 0x025c, 0x01f8,
|
||||
|
||||
int
|
||||
strdec_jbig(char **bitmap, int *bitmap_size,
|
||||
const char * restrict data, int data_size)
|
||||
0x01a4, 0x0160, 0x0125, 0x00f6, 0x00cb, 0x00ab, 0x008f, 0x5b12,
|
||||
0x4d04, 0x412c, 0x37d8, 0x2fe8, 0x293c, 0x2379, 0x1edf, 0x1aa9,
|
||||
0x174e, 0x1424, 0x119c, 0x0f6b, 0x0d51, 0x0bb6, 0x0a40, 0x5832,
|
||||
0x4d1c, 0x438e, 0x3bdd, 0x34ee, 0x2eae, 0x299a, 0x2516, 0x5570,
|
||||
0x4ca9, 0x44d9, 0x3e22, 0x3824, 0x32b4, 0x2e17, 0x56a8, 0x4f46,
|
||||
0x47e5, 0x41cf, 0x3c3d, 0x375e, 0x5231, 0x4c0f, 0x4639, 0x415e,
|
||||
0x5627, 0x50e7, 0x4b85, 0x5597, 0x504f, 0x5a10, 0x5522, 0x59eb
|
||||
};
|
||||
|
||||
static const uint8_t _NLPS[256] = {
|
||||
1,
|
||||
14, 16, 18, 20, 23, 25, 28, 30,
|
||||
33, 35, 9, 10, 12, 15, 36, 38,
|
||||
39, 40, 42, 43, 45, 46, 48, 49,
|
||||
51, 52, 54, 56, 57, 59, 60, 62,
|
||||
63, 32, 33, 37, 64, 65, 67, 68,
|
||||
69, 70, 72, 73, 74, 75, 77, 78,
|
||||
79, 48, 50, 50, 51, 52, 53, 54,
|
||||
|
||||
55, 56, 57, 58, 59, 61, 61, 65,
|
||||
80, 81, 82, 83, 84, 86, 87, 87,
|
||||
72, 72, 74, 74, 75, 77, 77, 80,
|
||||
88, 89, 90, 91, 92, 93, 86, 88,
|
||||
95, 96, 97, 99, 99, 93, 95, 101,
|
||||
102, 103, 104, 99, 105, 106, 107, 103,
|
||||
105, 108, 109, 110, 111, 110, 112, 112
|
||||
};
|
||||
|
||||
static const uint8_t _NMPS[256] = {
|
||||
1,
|
||||
2, 3, 4, 5, 6, 7, 8, 9,
|
||||
10, 11, 12, 13, 13, 15, 16, 17,
|
||||
18, 19, 20, 21, 22, 23, 24, 25,
|
||||
26, 27, 28, 29, 30, 31, 32, 33,
|
||||
34, 35, 9, 37, 38, 39, 40, 41,
|
||||
42, 43, 44, 45, 46, 47, 48, 49,
|
||||
50, 51, 52, 53, 54, 55, 56, 57,
|
||||
|
||||
58, 59, 60, 61, 62, 63, 32, 65,
|
||||
66, 67, 68, 69, 70, 71, 72, 73,
|
||||
74, 75, 76, 77, 78, 79, 48, 81,
|
||||
82, 83, 84, 85, 86, 87, 71, 89,
|
||||
90, 91, 92, 93, 94, 86, 96, 97,
|
||||
98, 99, 100, 93, 102, 103, 104, 99,
|
||||
106, 107, 103, 109, 107, 111, 109, 111
|
||||
};
|
||||
|
||||
static const bool _SWTCH[256] = {
|
||||
1,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 1, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 1, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0, 0, 0, 0, 0, 0, 0, 1,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 1,
|
||||
0, 0, 0, 0, 0, 0, 0, 1,
|
||||
0, 0, 0, 0, 0, 0, 1, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 0, 0, 0, 0, 1, 0, 1
|
||||
};
|
||||
|
||||
static uint8_t _ct;
|
||||
static uint8_t _pix;
|
||||
|
||||
static uint16_t _reg_a;
|
||||
static uint32_t _reg_c;
|
||||
static uint8_t _mps[0x1000];
|
||||
static uint8_t _st[0x1000];
|
||||
|
||||
static int _width;
|
||||
static int _height;
|
||||
|
||||
static int _width_padded;
|
||||
|
||||
static int _ret_pos;
|
||||
static char *_ret;
|
||||
|
||||
static int _scd_size;
|
||||
static unsigned char *_scd;
|
||||
|
||||
static void
|
||||
_bytein(void)
|
||||
{
|
||||
struct jbg_dec_state sd;
|
||||
if (_ret_pos < _scd_size)
|
||||
_reg_c += *(_scd + _ret_pos++) << 8;
|
||||
|
||||
jbg_dec_init(&sd);
|
||||
_ct = 8;
|
||||
}
|
||||
|
||||
unsigned char *data_ptr[1] = {(unsigned char *) data};
|
||||
static void
|
||||
_initdec(void)
|
||||
{
|
||||
memset(_mps, 0, 0x1000);
|
||||
memset(_st, 0, 0x1000);
|
||||
|
||||
/* FIXME: test */
|
||||
int ret;
|
||||
if ((ret = jbg_dec_in(&sd, (unsigned char *) data_ptr,
|
||||
data_size, NULL)) != JBG_EOK) {
|
||||
printf("[%s] ", jbg_strerror(ret));
|
||||
jbg_dec_free(&sd);
|
||||
return 1;
|
||||
_reg_c = 0;
|
||||
_bytein();
|
||||
_reg_c <<= 8;
|
||||
_bytein();
|
||||
_reg_c <<= 8;
|
||||
_bytein();
|
||||
_reg_a = 0x0000;
|
||||
}
|
||||
|
||||
static void
|
||||
_exchange_lps(uint16_t cx)
|
||||
{
|
||||
uint8_t st_cx = _st[cx];
|
||||
uint16_t lsz_st_cx = _LSZ[_st[cx]];
|
||||
|
||||
if (_reg_a < lsz_st_cx) {
|
||||
_pix = _mps[cx];
|
||||
_st[cx] = _NMPS[st_cx];
|
||||
} else {
|
||||
_pix = 1 - _mps[cx];
|
||||
|
||||
if (_SWTCH[st_cx])
|
||||
_mps[cx] = _pix;
|
||||
|
||||
_st[cx] = _NLPS[st_cx];
|
||||
}
|
||||
|
||||
*bitmap_size = jbg_dec_getsize(&sd);
|
||||
*bitmap = malloc(*bitmap_size);
|
||||
_reg_c -= _reg_a << 16;
|
||||
_reg_a = lsz_st_cx;
|
||||
}
|
||||
|
||||
if (*bitmap != NULL)
|
||||
memcpy(*bitmap, jbg_dec_getimage(&sd, 0), *bitmap_size);
|
||||
static void
|
||||
_exchange_mps(uint16_t cx)
|
||||
{
|
||||
uint8_t st_cx = _st[cx];
|
||||
uint16_t lsz_st_cx = _LSZ[_st[cx]];
|
||||
|
||||
jbg_dec_free(&sd);
|
||||
if (_reg_a < lsz_st_cx) {
|
||||
_pix = 1 - _mps[cx];
|
||||
|
||||
if (_SWTCH[st_cx])
|
||||
_mps[cx] = _pix;
|
||||
|
||||
_st[cx] = _NLPS[st_cx];
|
||||
} else {
|
||||
_pix = _mps[cx];
|
||||
_st[cx] = _NMPS[st_cx];
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
_renormd(void)
|
||||
{
|
||||
do {
|
||||
if (_ct == 0)
|
||||
_bytein();
|
||||
|
||||
_reg_a <<= 1;
|
||||
_reg_c <<= 1;
|
||||
_ct--;
|
||||
} while (_reg_a < 0x8000);
|
||||
|
||||
if (_ct == 0)
|
||||
_bytein();
|
||||
}
|
||||
|
||||
static void
|
||||
_decode(uint16_t cx)
|
||||
{
|
||||
_reg_a -= _LSZ[_st[cx]];
|
||||
|
||||
if (_reg_a > _reg_c >> 16) {
|
||||
if (_reg_a < 0x8000) {
|
||||
_exchange_mps(cx);
|
||||
_renormd();
|
||||
} else {
|
||||
_pix = _mps[cx];
|
||||
}
|
||||
} else {
|
||||
_exchange_lps(cx);
|
||||
_renormd();
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
_procline(int line, char *a, char *b, char *c)
|
||||
{
|
||||
/* The encoder must be erroneous */
|
||||
uint16_t cx = (*b & 0x01) << 2;
|
||||
|
||||
for (int i = 0; i < _width; i++) {
|
||||
_decode(cx);
|
||||
|
||||
cx >>= 1;
|
||||
|
||||
if (_pix == 1) {
|
||||
*(_ret + _width_padded * (_height - line - 1) + i / 8) |= _pix << (7 - (i & 0x07));
|
||||
*(c + i) = 1;
|
||||
cx |= 0x0200;
|
||||
} else {
|
||||
cx &= 0xfdff;
|
||||
}
|
||||
|
||||
if (i + 2 < _width && *(a + i + 2) == 1)
|
||||
cx |= 0x0004;
|
||||
else
|
||||
cx &= 0xfffb;
|
||||
|
||||
if (i + 3 < _width && *(b + i + 3) == 1)
|
||||
cx |= 0x0080;
|
||||
else
|
||||
cx &= 0xff7f;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
_procstripe(void)
|
||||
{
|
||||
if (_height <= 0 || _width_padded <= 0)
|
||||
return 1;
|
||||
|
||||
int pix_size = 8 * _width_padded;
|
||||
|
||||
char *buf = malloc(3 * pix_size);
|
||||
|
||||
if (buf == NULL)
|
||||
return 1;
|
||||
|
||||
memset(buf, 0, 3 * pix_size);
|
||||
|
||||
char *a = buf;
|
||||
char *b = a + pix_size;
|
||||
char *c = b + pix_size;
|
||||
char *z;
|
||||
|
||||
for (int i = 0; i < _height; i++) {
|
||||
_decode(0x029c);
|
||||
|
||||
if (_pix == 1) {
|
||||
if (i > 0)
|
||||
memcpy(_ret + _width_padded * (_height - i - 1),
|
||||
_ret + _width_padded * (_height - i),
|
||||
_width_padded);
|
||||
|
||||
memcpy(c, b, pix_size);
|
||||
} else {
|
||||
/* line atypical */
|
||||
memset(c, 0, pix_size);
|
||||
_procline(i, a, b, c);
|
||||
}
|
||||
|
||||
z = a;
|
||||
a = b;
|
||||
b = c;
|
||||
c = z;
|
||||
}
|
||||
|
||||
free(buf);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
strdec_jbig(char **bitmap, int width, int height,
|
||||
const char * restrict jbig, int jbig_size)
|
||||
{
|
||||
_width = width;
|
||||
_height = height;
|
||||
|
||||
_width_padded = (_width + 7) / 8;
|
||||
|
||||
memset(*bitmap, 0, _height * _width_padded);
|
||||
|
||||
_ret_pos = 0;
|
||||
_ret = *bitmap;
|
||||
|
||||
_scd_size = jbig_size;
|
||||
_scd = (unsigned char *) jbig;
|
||||
|
||||
_initdec();
|
||||
return _procstripe();
|
||||
}
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
int strdec_jbig(char **bitmap, int *bitmap_size,
|
||||
const char * restrict data, int data_size);
|
||||
int strdec_jbig(char **bitmap, int width, int height,
|
||||
const char * restrict jbig, int jbig_size);
|
||||
|
|
35
src/jbig2.c
Normal file
35
src/jbig2.c
Normal file
|
@ -0,0 +1,35 @@
|
|||
/*
|
||||
* Copyright (c) 2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <jbig2.h>
|
||||
|
||||
int
|
||||
strdec_jbig2(char **bitmap,
|
||||
const char * restrict jbig2, int jbig2_size)
|
||||
{
|
||||
Jbig2Ctx *ctx = jbig2_ctx_new(NULL, JBIG2_OPTIONS_EMBEDDED, NULL, NULL, NULL);
|
||||
|
||||
jbig2_data_in(ctx, (unsigned char *) jbig2, jbig2_size);
|
||||
|
||||
jbig2_complete_page(ctx);
|
||||
|
||||
Jbig2Image *image = jbig2_page_out(ctx);
|
||||
|
||||
int width_padded = (image->width + 7) / 8;
|
||||
unsigned char *data = image->data;
|
||||
|
||||
for (unsigned int i = 0; i < image->height; i++) {
|
||||
memcpy(*bitmap + i * width_padded, data, width_padded);
|
||||
data += image->stride;
|
||||
}
|
||||
|
||||
jbig2_release_page(ctx, image);
|
||||
return 0;
|
||||
}
|
7
src/jbig2.h
Normal file
7
src/jbig2.h
Normal file
|
@ -0,0 +1,7 @@
|
|||
/*
|
||||
* Copyright (c) 2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
int strdec_jbig2(char **bitmap, const char * restrict jbig2, int jbig2_size);
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
@ -82,7 +82,7 @@ main(int argc, char **argv)
|
|||
|
||||
if (param->stat > 0)
|
||||
printf("Melon " VERSION "." RELEASE "." PATCH EXTRA "\n"
|
||||
"Copyright (c) 2020-2021, yzrh <yzrh@noema.org>\n\n");
|
||||
"Copyright (c) 2020-2022, yzrh <yzrh@noema.org>\n\n");
|
||||
|
||||
cnki_info(¶m);
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#define VERSION "0"
|
||||
#define RELEASE "1"
|
||||
#define RELEASE "2"
|
||||
#define PATCH "0"
|
||||
#define EXTRA ""
|
||||
|
|
Loading…
Reference in a new issue