Fix HN conversion and add JBIG2 support.

Signed-off-by: yzrh <yzrh@noema.org>
This commit is contained in:
yzrh 2022-12-22 19:47:40 +00:00
parent ac3b1dda63
commit 9c1f1d0b75
19 changed files with 519 additions and 194 deletions

View file

@ -1,10 +1,16 @@
0.2.0 (2021-XX-XX) 0.3.0 (2022-XX-XX)
==================
* Support JPEG 2000 for HN.
0.2.0 (2022-12-22)
================== ==================
* KDH conversion now produces a valid PDF * KDH conversion now produces a valid PDF
* Handle binary data in dictionary. * Handle binary data in dictionary.
* Add preliminary support for HN * Add preliminary support for HN
* Fix root object dictionary generation when root object has more than two children. * Fix root object dictionary generation when root object has more than two children.
* Fix memory leak and data type.
0.1.0 (2020-04-08) 0.1.0 (2020-04-08)
================== ==================

View file

@ -6,10 +6,10 @@ Melon: Converter that produces PDF from CNKI proprietary formats
Development Development
----------- -----------
Currently, CAJ and KDH can be converted. Please report Currently, CAJ, KDH, and HN can be converted. Please report
any failures with a sample that can reproduce the behaviour. any failures with a sample that can reproduce the behaviour.
HN support is being worked on. HN support does not support JPEG 2000 yet.
Dependency Dependency
---------- ----------
@ -17,7 +17,7 @@ Dependency
1. OpenSSL 1. OpenSSL
2. libiconv 2. libiconv
3. zlib 3. zlib
4. JBIG-KIT 4. jbig2dec
5. libjpeg-turbo 5. libjpeg-turbo
Usage Usage
@ -39,7 +39,7 @@ Specify output file
Set buffer size (default 512k) Set buffer size (default 512k)
-v, --verbose -v, --verbose
Print more information (twice for even more) Print more information (twice for even more, three times for HN image decoding information as well)
Thanks Thanks
====== ======

View file

@ -4,19 +4,19 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# #
src = melon.c iconv.c zlib.c jbig.c jpeg.c \ src = melon.c iconv.c zlib.c jbig.c jbig2.c jpeg.c \
cnki_caj.c cnki_hn.c cnki_kdh.c cnki_outline_tree.c \ cnki_caj.c cnki_hn.c cnki_kdh.c cnki_outline_tree.c \
cnki_pdf.c cnki_xml.c cnki_zlib.c cnki_jbig.c cnki.c \ cnki_pdf.c cnki_zlib.c cnki_jbig.c cnki_jbig2.c cnki.c \
pdf_cnki.c pdf_get.c pdf_parser.c pdf_writer.c pdf.c pdf_cnki.c pdf_get.c pdf_parser.c pdf_writer.c pdf.c
inc = extern.h version.h iconv.h zlib.h jbig.h jpeg.h \ inc = extern.h version.h iconv.h zlib.h jbig.h jbig2.h jpeg.h \
cnki.h pdf_cnki.h cnki_jbig.h pdf.h cnki.h pdf_cnki.h cnki_jbig.h pdf.h
obj = ${src:.c=.o} obj = ${src:.c=.o}
PREFIX = /usr/local PREFIX = /usr/local
CFLAGS = -O2 -pipe -flto -Wall -Wextra -Wno-unused-parameter CFLAGS = -O2 -pipe -flto -Wall -Wextra
LDFLAGS = -Wl,-O2 -lcrypto -liconv -lz -ljbig -ljpeg -Wl,--as-needed LDFLAGS = -Wl,-O2 -lcrypto -liconv -lz -ljbig2dec -ljpeg -Wl,--as-needed
CFLAGS += -I/usr/local/include CFLAGS += -I/usr/local/include
LDFLAGS += -L/usr/local/lib LDFLAGS += -L/usr/local/lib

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org> * Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
* *
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
*/ */
@ -97,5 +97,7 @@ int cnki_jbig(char **bitmap, int *bitmap_size,
int *bitmap_width, int *bitmap_height, int *bitmap_width, int *bitmap_height,
const char * restrict jbig, int jbig_size); const char * restrict jbig, int jbig_size);
/* cnki_xml.c */ /* cnki_jbig2.c */
int cnki_xml(char **xml, FILE **fp); int cnki_jbig2(char **bitmap, int *bitmap_size,
int *bitmap_width, int *bitmap_height,
const char * restrict jbig, int jbig_size);

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org> * Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
* *
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
*/ */
@ -22,68 +22,22 @@ cnki_jbig(char **bitmap, int *bitmap_size,
memcpy(dib, jbig, 40); memcpy(dib, jbig, 40);
bih_t *bih = malloc(sizeof(bih_t)); int width_padded = (dib->width * dib->depth + 7) / 8;
if (bih == NULL) { *bitmap_size = dib->height * width_padded;
*bitmap = malloc(*bitmap_size);
if (*bitmap == NULL) {
free(dib); free(dib);
return 1; return 1;
} }
memset(bih, 0, sizeof(bih_t)); strdec_jbig(bitmap, dib->width, dib->height, jbig + 48, jbig_size - 48);
bih->d_l = 0; *bitmap_width = dib->width;
bih->d = 0; *bitmap_height = dib->height;
bih->p = 1;
bih->fill = 0;
bih->x_d = dib->width;
bih->y_d = dib->height;
bih->l_0 = bih->y_d / 35;
while (bih->l_0 > 128)
bih->l_0--;
if (bih->l_0 < 2)
bih->l_0 = 2;
bih->m_x = 8;
bih->m_y = 0;
bih->order |= 1 << 1;
bih->order |= 1 << 0;
bih->options |= 1 << 4;
bih->options |= 1 << 3;
bih->options |= 1 << 2;
bih->dptable = NULL;
int bie_size = jbig_size - 28; /* - 40 - 8 + 20 */
char *bie = malloc(bie_size);
if (bie == NULL) {
free(dib);
free(bih);
return 1;
}
memcpy(bie, bih, 20);
memcpy(bie + 20, jbig + 48, jbig_size - 48);
int ret = strdec_jbig(bitmap, bitmap_size, bie, bie_size);
if (ret == 0) {
*bitmap_width = bih->x_d;
*bitmap_height = bih->y_d;
}
free(dib); free(dib);
free(bih);
free(bie);
if (ret != 0)
return 1;
return 0; return 0;
} }

View file

@ -1,48 +1,11 @@
/* /*
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org> * Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
* *
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
*/ */
#include <stdint.h> #include <stdint.h>
/*
* order (MSB first):
* 0
* 0
* 0
* 0
* HITOLO
* SEQ
* ILEAVE (default)
* SMID (default)
*
* options (MSB first):
* 0
* LRLTWO
* VLENGTH
* TPDON (default)
* TPBON (default)
* DPON (default)
* DPPRIV
* DPLAST
*/
typedef struct _bih_t {
char d_l; /* Initial resolution layer */
char d; /* Final resolution layer */
char p; /* Number of bit-planes, for bi-level image, always 1 */
char fill; /* Always 0 */
/* MSB first */
int32_t x_d; /* Horizontal dimension at highestresolution */
int32_t y_d; /* Vertical dimension at highest resolution */
int32_t l_0; /* Number of lines per stripe at lowest resolution */
char m_x; /* Maximum horizontal offsets (default: 8) */
char m_y; /* Maximum vertical offsets (default: 0) */
char order;
char options;
char *dptable; /* 0 or 1728 */
} bih_t;
typedef enum _dib_compression_code { typedef enum _dib_compression_code {
BI_RGB, BI_RGB,
BI_RLE8, BI_RLE8,

43
src/cnki_jbig2.c Normal file
View file

@ -0,0 +1,43 @@
/*
* Copyright (c) 2022, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <stdlib.h>
#include <string.h>
#include "cnki_jbig.h"
#include "jbig2.h"
int
cnki_jbig2(char **bitmap, int *bitmap_size,
int *bitmap_width, int *bitmap_height,
const char * restrict jbig, int jbig_size)
{
dib_t *dib = malloc(sizeof(dib_t));
if (dib == NULL)
return 1;
memcpy(dib, jbig, 40);
int width_padded = (dib->width * dib->depth + 7) / 8;
*bitmap_size = dib->height * width_padded;
*bitmap = malloc(*bitmap_size);
if (*bitmap == NULL) {
free(dib);
return 1;
}
strdec_jbig2(bitmap, jbig + 48, jbig_size - 48);
*bitmap_width = dib->width;
*bitmap_height = dib->height;
free(dib);
return 0;
}

View file

@ -238,7 +238,7 @@ cnki_pdf(cnki_t **param)
if ((*param)->stat > 1) if ((*param)->stat > 1)
printf("Generating object\n"); printf("Generating object\n");
dictionary_size = 64 + 12 * kid[0]; dictionary_size = 64 + 16 * kid[0];
dictionary = malloc(dictionary_size); dictionary = malloc(dictionary_size);
if (dictionary == NULL) { if (dictionary == NULL) {
@ -483,6 +483,9 @@ cnki_pdf_hn(cnki_t **param)
int *ids = NULL; int *ids = NULL;
pdf_get_free_ids(&pdf, &ids, ptr->image_length + 3); pdf_get_free_ids(&pdf, &ids, ptr->image_length + 3);
int bitmap_size;
char *bitmap;
int stream_size; int stream_size;
char *stream; char *stream;
@ -493,32 +496,34 @@ cnki_pdf_hn(cnki_t **param)
if (dim == NULL) { if (dim == NULL) {
free(root_kid); free(root_kid);
free(ids);
return 1;
}
dictionary_size = 256;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(root_kid);
free(ids);
free(dim);
return 1; return 1;
} }
for (int i = 0; i < ptr->image_length; i++) { for (int i = 0; i < ptr->image_length; i++) {
dictionary_size = 128;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(root_kid);
free(dim);
return 1;
}
memset(dictionary, 0, dictionary_size); memset(dictionary, 0, dictionary_size);
strcat(dictionary, "<<\n/Type /XObject\n" strcat(dictionary, "<<\n/Type /XObject\n"
"/Subtype /Image\n"); "/Subtype /Image\n");
if ((*param)->stat > 2) if ((*param)->stat > 2)
printf("\tDecoding data, page %04d item %02d... ", printf("\tDecoding data, page %04d item %02d format %d... ",
ptr->page, i); ptr->page, i, ptr->image_data[i].format);
switch (ptr->image_data[i].format) { switch (ptr->image_data[i].format) {
case JBIG: case JBIG:
ret = cnki_jbig(&stream, ret = cnki_jbig(&bitmap,
&stream_size, &bitmap_size,
&wh[0], &wh[0],
&wh[1], &wh[1],
ptr->image_data[i].image, ptr->image_data[i].image,
@ -530,18 +535,30 @@ cnki_pdf_hn(cnki_t **param)
break; break;
} }
if (strdeflate(&stream, &stream_size,
bitmap, bitmap_size) != 0) {
free(root_kid);
free(ids);
free(dim);
free(dictionary);
return 1;
}
free(bitmap);
snprintf(buf, 64, "/Width %d\n/Height %d\n", snprintf(buf, 64, "/Width %d\n/Height %d\n",
wh[0], wh[1]); wh[0], wh[1]);
strcat(dictionary, buf); strcat(dictionary, buf);
strcat(dictionary, "/ColorSpace /DeviceGray\n" strcat(dictionary, "/ColorSpace /DeviceGray\n"
"/BitsPerComponent 1\n"); "/BitsPerComponent 1\n");
strcat(dictionary, "/Decode [1.0 0.0]\n");
snprintf(buf, 64, "/Length %d\n", snprintf(buf, 64, "/Length %d\n",
stream_size); stream_size);
strcat(dictionary, buf); strcat(dictionary, buf);
strcat(dictionary, "/Filter /CCITTFaxDecode\n"); strcat(dictionary, "/Filter /FlateDecode\n");
dim[i * 2] = wh[0]; dim[i * 2] = wh[0];
dim[i * 2 + 1] = wh[1]; dim[i * 2 + 1] = wh[1];
@ -562,9 +579,10 @@ cnki_pdf_hn(cnki_t **param)
stream_size = ptr->image_data[i].size; stream_size = ptr->image_data[i].size;
stream = malloc(stream_size); stream = malloc(stream_size);
if (stream == NULL) { if (stream == NULL) {
free(dictionary);
free(root_kid); free(root_kid);
free(ids);
free(dim); free(dim);
free(dictionary);
return 1; return 1;
} }
memcpy(stream, ptr->image_data[i].image, stream_size); memcpy(stream, ptr->image_data[i].image, stream_size);
@ -573,7 +591,7 @@ cnki_pdf_hn(cnki_t **param)
wh[0], wh[1]); wh[0], wh[1]);
strcat(dictionary, buf); strcat(dictionary, buf);
strcat(dictionary, "/ColorSpace /DeviceRGB\n" strcat(dictionary, "/ColorSpace /DeviceGray\n"
"/BitsPerComponent 8\n"); "/BitsPerComponent 8\n");
snprintf(buf, 64, "/Length %d\n", snprintf(buf, 64, "/Length %d\n",
@ -586,6 +604,47 @@ cnki_pdf_hn(cnki_t **param)
dim[i * 2 + 1] = wh[1]; dim[i * 2 + 1] = wh[1];
break; break;
case JBIG2: case JBIG2:
ret = cnki_jbig2(&bitmap,
&bitmap_size,
&wh[0],
&wh[1],
ptr->image_data[i].image,
ptr->image_data[i].size);
if (ret != 0) {
dim[i * 2] = 0;
dim[i * 2 + 1] = 0;
break;
}
if (strdeflate(&stream, &stream_size,
bitmap, bitmap_size) != 0) {
free(root_kid);
free(ids);
free(dim);
free(dictionary);
return 1;
}
free(bitmap);
snprintf(buf, 64, "/Width %d\n/Height %d\n",
wh[0], wh[1]);
strcat(dictionary, buf);
strcat(dictionary, "/ColorSpace /DeviceGray\n"
"/BitsPerComponent 1\n");
strcat(dictionary, "/Decode [1.0 0.0]\n");
snprintf(buf, 64, "/Length %d\n",
stream_size);
strcat(dictionary, buf);
strcat(dictionary, "/Filter /FlateDecode\n");
dim[i * 2] = wh[0];
dim[i * 2 + 1] = wh[1];
break;
case JPX: case JPX:
default: default:
ret = -1; ret = -1;
@ -598,37 +657,26 @@ cnki_pdf_hn(cnki_t **param)
if (ret == 0) { if (ret == 0) {
if ((*param)->stat > 2) if ((*param)->stat > 2)
printf("Done\n"); printf("%6d byte(s), width %4d, height %4d.\n",
stream_size, wh[0], wh[1]);
pdf_obj_append(&pdf, ids[i], pdf_obj_append(&pdf, ids[i],
NULL, dictionary, stream, stream_size); NULL, dictionary, stream, stream_size);
free(dictionary);
free(stream); free(stream);
} else if (ret == 1) { } else if (ret == 1) {
if ((*param)->stat > 2) if ((*param)->stat > 2)
printf("Failed\n"); printf("Not extracted.\n");
free(dictionary);
pdf_obj_append(&pdf, ids[i], NULL, NULL, NULL, 0); pdf_obj_append(&pdf, ids[i], NULL, NULL, NULL, 0);
} else { } else {
if ((*param)->stat > 2) if ((*param)->stat > 2)
printf("Unsupported format\n"); printf("Unsupported format.\n");
free(dictionary); pdf_obj_append(&pdf, ids[i], NULL, NULL, NULL, 0);
} }
} }
dictionary_size = 128;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(root_kid);
free(dim);
return 1;
}
memset(dictionary, 0, dictionary_size); memset(dictionary, 0, dictionary_size);
strcat(dictionary, "<<\n/XObject <<"); strcat(dictionary, "<<\n/XObject <<");
@ -655,11 +703,12 @@ cnki_pdf_hn(cnki_t **param)
if (strncmp(ptr->text + 8, "COMPRESSTEXT", 12) == 0) { if (strncmp(ptr->text + 8, "COMPRESSTEXT", 12) == 0) {
cnki_zlib(&stream, &stream_size, ptr->text, ptr->text_size); cnki_zlib(&stream, &stream_size, ptr->text, ptr->text_size);
dictionary_size = stream_size / 8 + 7; dictionary_size = 64 + 2 * stream_size;
dictionary = malloc(dictionary_size); dictionary = malloc(dictionary_size);
if (dictionary == NULL) { if (dictionary == NULL) {
free(root_kid); free(root_kid);
free(ids);
free(dim); free(dim);
return 1; return 1;
} }
@ -688,11 +737,12 @@ cnki_pdf_hn(cnki_t **param)
strcat(dictionary, ">"); strcat(dictionary, ">");
} else { } else {
dictionary_size = ptr->text_size; dictionary_size = 64 + 2 * ptr->text_size;
dictionary = malloc(dictionary_size); dictionary = malloc(dictionary_size);
if (dictionary == NULL) { if (dictionary == NULL) {
free(root_kid); free(root_kid);
free(ids);
free(dim); free(dim);
return 1; return 1;
} }
@ -724,11 +774,12 @@ cnki_pdf_hn(cnki_t **param)
/* FIXME: Use the text somehow? */ /* FIXME: Use the text somehow? */
free(dictionary); free(dictionary);
dictionary_size = 64 + 12 * ptr->image_length; dictionary_size = 64 + 64 * ptr->image_length;
dictionary = malloc(dictionary_size); dictionary = malloc(dictionary_size);
if (dictionary == NULL) { if (dictionary == NULL) {
free(root_kid); free(root_kid);
free(ids);
free(dim); free(dim);
return 1; return 1;
} }
@ -739,12 +790,27 @@ cnki_pdf_hn(cnki_t **param)
strcat(dictionary, "0.25 0 0 0.25 0 0 cm\n"); strcat(dictionary, "0.25 0 0 0.25 0 0 cm\n");
double resize_x;
double resize_y;
for (int i = 0; i < ptr->image_length; i++) { for (int i = 0; i < ptr->image_length; i++) {
if (dim[i * 2] <= 0 || dim[i * 2 + 1] <= 0) if (dim[i * 2] <= 0 || dim[i * 2 + 1] <= 0)
continue; continue;
/* Scale within bound of A4 paper */
resize_x = 595.276 * 4 / dim[i * 2];
resize_y = 841.89 * 4 / dim[i * 2 + 1];
if (resize_y < resize_x)
snprintf(buf, 64, "%f 0 0 %f 0 0 cm\n",
resize_y, resize_y);
else
snprintf(buf, 64, "%f 0 0 %f 0 0 cm\n",
resize_x, resize_x);
strcat(dictionary, buf);
/* Apply transformation matrix */ /* Apply transformation matrix */
if (ptr->image_data[i].format == DCT_1) { if (ptr->image_data[i].format == JBIG || ptr->image_data[i].format == DCT_1) {
snprintf(buf, 64, "1 0 0 1 0 %d cm\n", snprintf(buf, 64, "1 0 0 1 0 %d cm\n",
dim[i * 2 + 1]); dim[i * 2 + 1]);
strcat(dictionary, buf); strcat(dictionary, buf);
@ -763,9 +829,10 @@ cnki_pdf_hn(cnki_t **param)
strcat(dictionary, "Q"); strcat(dictionary, "Q");
if (strdeflate(&stream, &stream_size, dictionary, strlen(dictionary)) != 0) { if (strdeflate(&stream, &stream_size, dictionary, strlen(dictionary)) != 0) {
free(dictionary);
free(root_kid); free(root_kid);
free(ids);
free(dim); free(dim);
free(dictionary);
return 1; return 1;
} }
@ -796,7 +863,7 @@ cnki_pdf_hn(cnki_t **param)
strcat(dictionary, buf); strcat(dictionary, buf);
/* A4 paper */ /* A4 paper */
strcat(dictionary, "/MediaBox [ 0 0 595.276 841.89 ]\n"); strcat(dictionary, "/MediaBox [0 0 595.276 841.89]\n");
/* Add /Parent when we know root */ /* Add /Parent when we know root */
pdf_obj_append(&pdf, ids[ptr->image_length + 2], NULL, dictionary, NULL, 0); pdf_obj_append(&pdf, ids[ptr->image_length + 2], NULL, dictionary, NULL, 0);
@ -838,7 +905,7 @@ cnki_pdf_hn(cnki_t **param)
if ((*param)->stat > 1) if ((*param)->stat > 1)
printf("Generating root object\n"); printf("Generating root object\n");
dictionary_size = 64 + 12 * (*param)->file_stat->page; dictionary_size = 64 + 64 * (*param)->file_stat->page;
dictionary = malloc(dictionary_size); dictionary = malloc(dictionary_size);
if (dictionary == NULL) { if (dictionary == NULL) {
@ -877,7 +944,7 @@ cnki_pdf_hn(cnki_t **param)
free(dictionary); free(dictionary);
dictionary_size = 128; dictionary_size = 256;
dictionary = malloc(dictionary_size); dictionary = malloc(dictionary_size);
if (dictionary == NULL) { if (dictionary == NULL) {

View file

@ -1,14 +0,0 @@
/*
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <stdio.h>
int
cnki_xml(char **xml, FILE **fp)
{
/* TODO: Extract XML and embed into `/Metadata' */
return 1;
}

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org> * Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
* *
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
*/ */
@ -18,7 +18,7 @@ cnki_zlib(char **dst, int *dst_size,
*dst_size = size; *dst_size = size;
if (strinflate(dst, size, src + 24, size - 24) != 0) if (strinflate(dst, size, src + 24, src_size - 24) != 0)
return 1; return 1;
return 0; return 0;

View file

@ -1,41 +1,303 @@
/* /*
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org> * Copyright (c) 2022, yzrh <yzrh@noema.org>
* *
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
*/ */
#include <stdio.h> /* FIXME: test */ #include <stdbool.h>
#include <stdint.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <jbig.h> static const uint16_t _LSZ[256] = {
0x5a1d,
0x2586, 0x1114, 0x080b, 0x03d8, 0x01da, 0x00e5, 0x006f, 0x0036,
0x001a, 0x000d, 0x0006, 0x0003, 0x0001, 0x5a7f, 0x3f25, 0x2cf2,
0x207c, 0x17b9, 0x1182, 0x0cef, 0x09a1, 0x072f, 0x055c, 0x0406,
0x0303, 0x0240, 0x01b1, 0x0144, 0x00f5, 0x00b7, 0x008a, 0x0068,
0x004e, 0x003b, 0x002c, 0x5ae1, 0x484c, 0x3a0d, 0x2ef1, 0x261f,
0x1f33, 0x19a8, 0x1518, 0x1177, 0x0e74, 0x0bfb, 0x09f8, 0x0861,
0x0706, 0x05cd, 0x04de, 0x040f, 0x0363, 0x02d4, 0x025c, 0x01f8,
int 0x01a4, 0x0160, 0x0125, 0x00f6, 0x00cb, 0x00ab, 0x008f, 0x5b12,
strdec_jbig(char **bitmap, int *bitmap_size, 0x4d04, 0x412c, 0x37d8, 0x2fe8, 0x293c, 0x2379, 0x1edf, 0x1aa9,
const char * restrict data, int data_size) 0x174e, 0x1424, 0x119c, 0x0f6b, 0x0d51, 0x0bb6, 0x0a40, 0x5832,
0x4d1c, 0x438e, 0x3bdd, 0x34ee, 0x2eae, 0x299a, 0x2516, 0x5570,
0x4ca9, 0x44d9, 0x3e22, 0x3824, 0x32b4, 0x2e17, 0x56a8, 0x4f46,
0x47e5, 0x41cf, 0x3c3d, 0x375e, 0x5231, 0x4c0f, 0x4639, 0x415e,
0x5627, 0x50e7, 0x4b85, 0x5597, 0x504f, 0x5a10, 0x5522, 0x59eb
};
static const uint8_t _NLPS[256] = {
1,
14, 16, 18, 20, 23, 25, 28, 30,
33, 35, 9, 10, 12, 15, 36, 38,
39, 40, 42, 43, 45, 46, 48, 49,
51, 52, 54, 56, 57, 59, 60, 62,
63, 32, 33, 37, 64, 65, 67, 68,
69, 70, 72, 73, 74, 75, 77, 78,
79, 48, 50, 50, 51, 52, 53, 54,
55, 56, 57, 58, 59, 61, 61, 65,
80, 81, 82, 83, 84, 86, 87, 87,
72, 72, 74, 74, 75, 77, 77, 80,
88, 89, 90, 91, 92, 93, 86, 88,
95, 96, 97, 99, 99, 93, 95, 101,
102, 103, 104, 99, 105, 106, 107, 103,
105, 108, 109, 110, 111, 110, 112, 112
};
static const uint8_t _NMPS[256] = {
1,
2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 13, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25,
26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 9, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49,
50, 51, 52, 53, 54, 55, 56, 57,
58, 59, 60, 61, 62, 63, 32, 65,
66, 67, 68, 69, 70, 71, 72, 73,
74, 75, 76, 77, 78, 79, 48, 81,
82, 83, 84, 85, 86, 87, 71, 89,
90, 91, 92, 93, 94, 86, 96, 97,
98, 99, 100, 93, 102, 103, 104, 99,
106, 107, 103, 109, 107, 111, 109, 111
};
static const bool _SWTCH[256] = {
1,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 0, 1, 0, 1
};
static uint8_t _ct;
static uint8_t _pix;
static uint16_t _reg_a;
static uint32_t _reg_c;
static uint8_t _mps[0x1000];
static uint8_t _st[0x1000];
static int _width;
static int _height;
static int _width_padded;
static int _ret_pos;
static char *_ret;
static int _scd_size;
static unsigned char *_scd;
static void
_bytein(void)
{ {
struct jbg_dec_state sd; if (_ret_pos < _scd_size)
_reg_c += *(_scd + _ret_pos++) << 8;
jbg_dec_init(&sd); _ct = 8;
}
unsigned char *data_ptr[1] = {(unsigned char *) data}; static void
_initdec(void)
{
memset(_mps, 0, 0x1000);
memset(_st, 0, 0x1000);
/* FIXME: test */ _reg_c = 0;
int ret; _bytein();
if ((ret = jbg_dec_in(&sd, (unsigned char *) data_ptr, _reg_c <<= 8;
data_size, NULL)) != JBG_EOK) { _bytein();
printf("[%s] ", jbg_strerror(ret)); _reg_c <<= 8;
jbg_dec_free(&sd); _bytein();
return 1; _reg_a = 0x0000;
}
static void
_exchange_lps(uint16_t cx)
{
uint8_t st_cx = _st[cx];
uint16_t lsz_st_cx = _LSZ[_st[cx]];
if (_reg_a < lsz_st_cx) {
_pix = _mps[cx];
_st[cx] = _NMPS[st_cx];
} else {
_pix = 1 - _mps[cx];
if (_SWTCH[st_cx])
_mps[cx] = _pix;
_st[cx] = _NLPS[st_cx];
} }
*bitmap_size = jbg_dec_getsize(&sd); _reg_c -= _reg_a << 16;
*bitmap = malloc(*bitmap_size); _reg_a = lsz_st_cx;
}
if (*bitmap != NULL) static void
memcpy(*bitmap, jbg_dec_getimage(&sd, 0), *bitmap_size); _exchange_mps(uint16_t cx)
{
uint8_t st_cx = _st[cx];
uint16_t lsz_st_cx = _LSZ[_st[cx]];
jbg_dec_free(&sd); if (_reg_a < lsz_st_cx) {
_pix = 1 - _mps[cx];
if (_SWTCH[st_cx])
_mps[cx] = _pix;
_st[cx] = _NLPS[st_cx];
} else {
_pix = _mps[cx];
_st[cx] = _NMPS[st_cx];
}
}
static void
_renormd(void)
{
do {
if (_ct == 0)
_bytein();
_reg_a <<= 1;
_reg_c <<= 1;
_ct--;
} while (_reg_a < 0x8000);
if (_ct == 0)
_bytein();
}
static void
_decode(uint16_t cx)
{
_reg_a -= _LSZ[_st[cx]];
if (_reg_a > _reg_c >> 16) {
if (_reg_a < 0x8000) {
_exchange_mps(cx);
_renormd();
} else {
_pix = _mps[cx];
}
} else {
_exchange_lps(cx);
_renormd();
}
}
static void
_procline(int line, char *a, char *b, char *c)
{
/* The encoder must be erroneous */
uint16_t cx = (*b & 0x01) << 2;
for (int i = 0; i < _width; i++) {
_decode(cx);
cx >>= 1;
if (_pix == 1) {
*(_ret + _width_padded * (_height - line - 1) + i / 8) |= _pix << (7 - (i & 0x07));
*(c + i) = 1;
cx |= 0x0200;
} else {
cx &= 0xfdff;
}
if (i + 2 < _width && *(a + i + 2) == 1)
cx |= 0x0004;
else
cx &= 0xfffb;
if (i + 3 < _width && *(b + i + 3) == 1)
cx |= 0x0080;
else
cx &= 0xff7f;
}
}
static int
_procstripe(void)
{
if (_height <= 0 || _width_padded <= 0)
return 1;
int pix_size = 8 * _width_padded;
char *buf = malloc(3 * pix_size);
if (buf == NULL)
return 1;
memset(buf, 0, 3 * pix_size);
char *a = buf;
char *b = a + pix_size;
char *c = b + pix_size;
char *z;
for (int i = 0; i < _height; i++) {
_decode(0x029c);
if (_pix == 1) {
if (i > 0)
memcpy(_ret + _width_padded * (_height - i - 1),
_ret + _width_padded * (_height - i),
_width_padded);
memcpy(c, b, pix_size);
} else {
/* line atypical */
memset(c, 0, pix_size);
_procline(i, a, b, c);
}
z = a;
a = b;
b = c;
c = z;
}
free(buf);
return 0; return 0;
} }
int
strdec_jbig(char **bitmap, int width, int height,
const char * restrict jbig, int jbig_size)
{
_width = width;
_height = height;
_width_padded = (_width + 7) / 8;
memset(*bitmap, 0, _height * _width_padded);
_ret_pos = 0;
_ret = *bitmap;
_scd_size = jbig_size;
_scd = (unsigned char *) jbig;
_initdec();
return _procstripe();
}

View file

@ -1,8 +1,8 @@
/* /*
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org> * Copyright (c) 2022, yzrh <yzrh@noema.org>
* *
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
*/ */
int strdec_jbig(char **bitmap, int *bitmap_size, int strdec_jbig(char **bitmap, int width, int height,
const char * restrict data, int data_size); const char * restrict jbig, int jbig_size);

35
src/jbig2.c Normal file
View file

@ -0,0 +1,35 @@
/*
* Copyright (c) 2022, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <jbig2.h>
int
strdec_jbig2(char **bitmap,
const char * restrict jbig2, int jbig2_size)
{
Jbig2Ctx *ctx = jbig2_ctx_new(NULL, JBIG2_OPTIONS_EMBEDDED, NULL, NULL, NULL);
jbig2_data_in(ctx, (unsigned char *) jbig2, jbig2_size);
jbig2_complete_page(ctx);
Jbig2Image *image = jbig2_page_out(ctx);
int width_padded = (image->width + 7) / 8;
unsigned char *data = image->data;
for (unsigned int i = 0; i < image->height; i++) {
memcpy(*bitmap + i * width_padded, data, width_padded);
data += image->stride;
}
jbig2_release_page(ctx, image);
return 0;
}

7
src/jbig2.h Normal file
View file

@ -0,0 +1,7 @@
/*
* Copyright (c) 2022, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
int strdec_jbig2(char **bitmap, const char * restrict jbig2, int jbig2_size);

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org> * Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
* *
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
*/ */
@ -82,7 +82,7 @@ main(int argc, char **argv)
if (param->stat > 0) if (param->stat > 0)
printf("Melon " VERSION "." RELEASE "." PATCH EXTRA "\n" printf("Melon " VERSION "." RELEASE "." PATCH EXTRA "\n"
"Copyright (c) 2020-2021, yzrh <yzrh@noema.org>\n\n"); "Copyright (c) 2020-2022, yzrh <yzrh@noema.org>\n\n");
cnki_info(&param); cnki_info(&param);

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org> * Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
* *
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
*/ */

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org> * Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
* *
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
*/ */

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org> * Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
* *
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
*/ */

View file

@ -1,10 +1,10 @@
/* /*
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org> * Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
* *
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
*/ */
#define VERSION "0" #define VERSION "0"
#define RELEASE "1" #define RELEASE "2"
#define PATCH "0" #define PATCH "0"
#define EXTRA "" #define EXTRA ""