Decode JBIG and JPEG during HN conversion.

This commit is contained in:
yzrh 2020-12-31 22:36:28 +00:00
parent b20c6ad3ed
commit 1994f122cc
31 changed files with 1035 additions and 274 deletions

View file

@ -17,6 +17,8 @@ Dependency
1. OpenSSL
2. libiconv
3. zlib
4. JBIG-KIT
5. libjpeg-turbo
Usage
=====

View file

@ -1,22 +1,22 @@
#
# Copyright (c) 2020, yzrh <yzrh@noema.org>
# Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
#
# SPDX-License-Identifier: Apache-2.0
#
src = melon.c iconv.c zlib.c \
src = melon.c iconv.c zlib.c jbig.c jpeg.c \
cnki_caj.c cnki_hn.c cnki_kdh.c cnki_outline_tree.c \
cnki_pdf.c cnki_xml.c cnki_zlib.c cnki.c \
cnki_pdf.c cnki_xml.c cnki_zlib.c cnki_jbig.c cnki.c \
pdf_cnki.c pdf_get.c pdf_parser.c pdf_writer.c pdf.c
inc = extern.h version.h iconv.h zlib.h \
cnki.h pdf_cnki.h pdf.h
inc = extern.h version.h iconv.h zlib.h jbig.h jpeg.h \
cnki.h pdf_cnki.h cnki_jbig.h pdf.h
obj = ${src:.c=.o}
PREFIX = /usr/local
CFLAGS = -O3 -march=native -pipe -flto=thin -Wall
LDFLAGS = -Wl,-O3 -lcrypto -liconv -lz -Wl,--as-needed
CFLAGS = -O3 -march=native -pipe -flto=thin -Wall -Wextra -Wno-unused-parameter
LDFLAGS = -Wl,-O3 -lcrypto -liconv -lz -ljbig -ljpeg -Wl,--as-needed
CFLAGS += -I/usr/local/include
LDFLAGS += -L/usr/local/lib

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, yzrh <yzrh@noema.org>
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, yzrh <yzrh@noema.org>
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
@ -82,6 +82,7 @@ typedef struct _cnki_t {
/* cnki_pdf.c */
int cnki_pdf(cnki_t **param);
int cnki_pdf_hn(cnki_t **param);
/* cnki_outline_tree.c */
int cnki_outline_tree(object_outline_tree_t **outline_tree,
@ -91,5 +92,10 @@ int cnki_outline_tree(object_outline_tree_t **outline_tree,
int cnki_zlib(char **dst, int *dst_size,
const char * restrict src, int src_size);
/* cnki_jbig.c */
int cnki_jbig(char **bitmap, int *bitmap_size,
int *bitmap_width, int *bitmap_height,
const char * restrict jbig, int jbig_size);
/* cnki_xml.c */
int cnki_xml(char **xml, FILE **fp);

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, yzrh <yzrh@noema.org>
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, yzrh <yzrh@noema.org>
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
@ -9,6 +9,8 @@
#include "cnki.h"
#include "iconv.h"
#include "zlib.h"
#include "jpeg.h"
#include "pdf.h"
#include "pdf_cnki.h"
@ -131,231 +133,13 @@ cnki_hn(cnki_t **param)
ptr = ptr->next;
}
if ((*param)->stat > 1)
if ((*param)->stat > 0)
printf("Loaded %d page(s)\n", (*param)->file_stat->page);
if ((*param)->stat > 1)
printf("Generating PDF object(s)\n");
pdf_object_t *pdf = NULL;
if (pdf_obj_create(&pdf) != 0)
return 1;
int buf_size;
char *buf;
int str_size;
char *str;
int conv_size;
char *conv_dst;
char conv_src[2];
char conv_hex[3];
ptr = (*param)->object_hn;
while (ptr != NULL) {
if (strncmp(ptr->text + 8, "COMPRESSTEXT", 12) == 0) {
cnki_zlib(&buf, &buf_size, ptr->text, ptr->text_size);
str_size = buf_size / 8 + 7;
str = malloc(str_size);
if (str == NULL)
return 1;
memset(str, 0, str_size);
strcat(str, "<feff");
for (int i = 0; i < buf_size; i += 16) {
conv_src[0] = buf[i + 7];
conv_src[1] = buf[i + 6];
conv_size = 6;
if (strconv(&conv_dst, "UTF-16BE",
conv_src, "GB18030", &conv_size) == 0) {
for (int j = 0; j < conv_size - 2; j++) {
snprintf(conv_hex, 3,
"%02x", (unsigned char) conv_dst[j]);
strcat(str, conv_hex);
}
free(conv_dst);
}
}
free(buf);
strcat(str, ">");
} else {
str_size = ptr->text_size;
str = malloc(str_size);
if (str == NULL)
return 1;
memset(str, 0, str_size);
strcat(str, "<feff");
for (int i = 0; i < ptr->text_size; i += 4) {
conv_src[0] = ptr->text[i + 3];
conv_src[1] = ptr->text[i + 2];
conv_size = 6;
if (strconv(&conv_dst, "UTF-16BE",
conv_src, "GB18030", &conv_size) == 0) {
for (int j = 0; j < conv_size - 2; j++) {
snprintf(conv_hex, 3,
"%02x", (unsigned char) conv_dst[j]);
strcat(str, conv_hex);
}
free(conv_dst);
}
}
strcat(str, ">");
}
pdf_obj_append(&pdf, 0, str, NULL, NULL);
free(str);
ptr = ptr->next;
}
if ((*param)->stat > 1) {
printf("\t%8s\t%12s\t%12s\t%12s\n",
"id",
"object",
"dictionary",
"stream");
pdf_object_t *ptr = pdf->next;
while (ptr != NULL) {
printf("\t%8d\t%12d\t%12d\t%12d\n",
ptr->id,
ptr->object_size,
ptr->dictionary_size,
ptr->stream_size);
ptr = ptr->next;
}
}
cnki_pdf_hn(param);
if ((*param)->stat > 0)
printf("Generated %d object(s)\n",
pdf_get_count(&pdf));
printf("Conversion ended\n");
int *ids = NULL;
if ((*param)->file_stat->outline > 0) {
if ((*param)->stat > 1)
printf("Generating outline object(s)\n\t%8s\n", "id");
pdf_get_free_ids(&pdf, &ids, (*param)->file_stat->outline + 1);
int outline = pdf_cnki_outline(&pdf, &(*param)->object_outline, &ids);
if ((*param)->stat > 1)
for (int i = 0; i < (*param)->file_stat->outline + 1; i++)
printf("\t%8d\n", ids[i]);
if ((*param)->stat > 0) {
if (outline != 0)
printf("No outline information\n");
else
printf("Generated %d outline object(s)\n",
(*param)->file_stat->outline + 1);
}
}
if ((*param)->stat > 1)
printf("Writing header\n");
long cur = 0;
if ((*param)->stat > 0)
cur = ftell((*param)->fp_o);
if (pdf_dump_header(&pdf, &(*param)->fp_o) != 0) {
fprintf(stderr, "Header not written\n");
return 1;
} else {
if ((*param)->stat > 0)
printf("Header %ld byte(s) written\n",
ftell((*param)->fp_o) - cur);
}
if ((*param)->stat > 1)
printf("Writing object(s)\n");
pdf_dump_obj(&pdf, &(*param)->fp_o);
if ((*param)->stat > 1) {
printf("\t%8s\t%8s\t%8s\t%12s\t%12s\t%12s\n",
"address",
"size",
"id",
"object",
"dictionary",
"stream");
pdf_object_t *ptr = pdf->next;
while (ptr != NULL) {
printf("\t%08x\t%8d\t%8d\t%12d\t%12d\t%12d\n",
ptr->address,
ptr->size,
ptr->id,
ptr->object_size,
ptr->dictionary_size,
ptr->stream_size);
ptr = ptr->next;
}
}
if ((*param)->stat > 0)
printf("%d object(s) %ld byte(s) written\n",
pdf_get_count(&pdf),
ftell((*param)->fp_o));
long xref = ftell((*param)->fp_o);
if ((*param)->stat > 1)
printf("Writing cross-reference table\n");
if (pdf_dump_xref(&pdf, &(*param)->fp_o) != 0) {
if ((*param)->stat > 0)
printf("Cross-reference table not written\n");
} else {
if ((*param)->stat > 0)
printf("Cross-reference table %ld byte(s) written\n",
ftell((*param)->fp_o) - xref);
}
if ((*param)->stat > 1)
printf("Writing trailer\n");
if ((*param)->stat > 0)
cur = ftell((*param)->fp_o);
if (pdf_dump_trailer(&pdf, &(*param)->fp_o, xref) != 0) {
if ((*param)->stat > 0)
printf("Trailer not written\n");
} else {
if ((*param)->stat > 0)
printf("Trailer %ld byte(s) written\n",
ftell((*param)->fp_o) - cur);
}
if ((*param)->stat > 0)
printf("Total %ld byte(s) written\n",
ftell((*param)->fp_o));
pdf_obj_destroy(&pdf);
if ((*param)->stat > 0)
printf("Conversion ended (partial)\n");
/* TODO: Finish me please :) */
return 0;
}

89
src/cnki_jbig.c Normal file
View file

@ -0,0 +1,89 @@
/*
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <stdlib.h>
#include <string.h>
#include "cnki_jbig.h"
#include "jbig.h"
int
cnki_jbig(char **bitmap, int *bitmap_size,
int *bitmap_width, int *bitmap_height,
const char * restrict jbig, int jbig_size)
{
dib_t *dib = malloc(sizeof(dib_t));
if (dib == NULL)
return 1;
memcpy(dib, jbig, 40);
bih_t *bih = malloc(sizeof(bih_t));
if (bih == NULL) {
free(dib);
return 1;
}
memset(bih, 0, sizeof(bih_t));
bih->d_l = 0;
bih->d = 0;
bih->p = 1;
bih->fill = 0;
bih->x_d = dib->width;
bih->y_d = dib->height;
bih->l_0 = bih->y_d / 35;
while (bih->l_0 > 128)
bih->l_0--;
if (bih->l_0 < 2)
bih->l_0 = 2;
bih->m_x = 8;
bih->m_y = 0;
bih->order |= 1 << 1;
bih->order |= 1 << 0;
bih->options |= 1 << 4;
bih->options |= 1 << 3;
bih->options |= 1 << 2;
bih->dptable = NULL;
int bie_size = jbig_size - 28; /* - 40 - 8 + 20 */
char *bie = malloc(bie_size);
if (bie == NULL) {
free(dib);
free(bih);
return 1;
}
memcpy(bie, bih, 20);
memcpy(bie + 20, jbig + 48, jbig_size - 48);
int ret = strdec_jbig(bitmap, bitmap_size, bie, bie_size);
if (ret == 0) {
*bitmap_width = bih->x_d;
*bitmap_height = bih->y_d;
}
free(dib);
free(bih);
free(bie);
if (ret != 0)
return 1;
return 0;
}

78
src/cnki_jbig.h Normal file
View file

@ -0,0 +1,78 @@
/*
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <stdint.h>
/*
* order (MSB first):
* 0
* 0
* 0
* 0
* HITOLO
* SEQ
* ILEAVE (default)
* SMID (default)
*
* options (MSB first):
* 0
* LRLTWO
* VLENGTH
* TPDON (default)
* TPBON (default)
* DPON (default)
* DPPRIV
* DPLAST
*/
typedef struct _bih_t {
char d_l; /* Initial resolution layer */
char d; /* Final resolution layer */
char p; /* Number of bit-planes, for bi-level image, always 1 */
char fill; /* Always 0 */
/* MSB first */
int32_t x_d; /* Horizontal dimension at highestresolution */
int32_t y_d; /* Vertical dimension at highest resolution */
int32_t l_0; /* Number of lines per stripe at lowest resolution */
char m_x; /* Maximum horizontal offsets (default: 8) */
char m_y; /* Maximum vertical offsets (default: 0) */
char order;
char options;
char *dptable; /* 0 or 1728 */
} bih_t;
typedef enum _dib_compression_code {
BI_RGB,
BI_RLE8,
BI_RLE4,
BI_BITFIELDS,
BI_JPEG,
BI_PNG,
BI_ALPHABITFIELDS,
BI_CMYK = 11,
BI_CMYKRLE8 = 12,
BI_CMYKRLE4 = 13
} dib_compression_code;
typedef struct _dib_t {
uint32_t dib_size; /* Always 40 */
int32_t width;
int32_t height;
uint16_t plane; /* Always 1 */
uint16_t depth;
uint32_t compression; /* dib_compression_code */
uint32_t size;
uint32_t resolution_h;
uint32_t resolution_v;
uint32_t colour;
uint32_t colour_used;
} dib_t;
typedef struct _colour_table {
uint16_t blue;
uint16_t green;
uint16_t red;
uint16_t fill; /* Always 0 */
} colour_table;

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, yzrh <yzrh@noema.org>
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, yzrh <yzrh@noema.org>
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, yzrh <yzrh@noema.org>
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
@ -8,6 +8,9 @@
#include <string.h>
#include "cnki.h"
#include "iconv.h"
#include "zlib.h"
#include "jpeg.h"
#include "pdf.h"
#include "pdf_cnki.h"
@ -57,6 +60,11 @@ cnki_pdf(cnki_t **param)
printf("Loaded %d object(s)\n",
pdf_get_count(&pdf));
int dictionary_size;
char *dictionary;
char buf[64];
if ((*param)->stat > 1)
printf("Searching for parent object(s)\n");
@ -69,12 +77,8 @@ cnki_pdf(cnki_t **param)
if ((*param)->stat > 0)
printf("Discovered %d parent object(s)\n", parent[0]);
char buf[64];
int parent_missing[parent[0]];
int *kid;
int dictionary_size;
char *dictionary;
for (int i = 1; i <= parent[0]; i++) {
if ((*param)->stat > 1)
@ -101,20 +105,23 @@ cnki_pdf(cnki_t **param)
snprintf(buf, 64,
"<<\n/Type /Pages\n/Kids [");
strcat(dictionary, buf);
for (int j = 1; j <= kid[0]; j++) {
snprintf(buf, 64,
"%d 0 R",
kid[j]);
strcat(dictionary, buf);
if (j < kid[0])
strcat(dictionary, " ");
}
snprintf(buf, 64,
"]\n/Count %d\n>>",
pdf_get_kid_count(&pdf, parent[i]));
strcat(dictionary, buf);
pdf_obj_prepend(&pdf, parent[i], NULL, dictionary, NULL);
pdf_obj_prepend(&pdf, parent[i], NULL, dictionary, NULL, 0);
parent_missing[i - 1] = 1;
@ -185,6 +192,7 @@ cnki_pdf(cnki_t **param)
if (parent_missing[i]) {
snprintf(buf, 64, "%d 0 R", parent[i + 1]);
strcat(dictionary, buf);
if (i < root_kid)
strcat(dictionary, " ");
}
@ -200,7 +208,7 @@ cnki_pdf(cnki_t **param)
strcat(dictionary, ">>");
pdf_obj_prepend(&pdf, root, NULL, dictionary, NULL);
pdf_obj_prepend(&pdf, root, NULL, dictionary, NULL, 0);
memset(dictionary, 0, dictionary_size);
@ -260,7 +268,7 @@ cnki_pdf(cnki_t **param)
strcat(dictionary, ">>");
pdf_obj_append(&pdf, 0, NULL, dictionary, NULL);
pdf_obj_append(&pdf, 0, NULL, dictionary, NULL, 0);
if ((*param)->stat > 0)
printf("Generated catalog object\n");
@ -383,3 +391,611 @@ cnki_pdf(cnki_t **param)
return 0;
}
int
cnki_pdf_hn(cnki_t **param)
{
if (*param == NULL)
return 1;
pdf_object_t *pdf = NULL;
if (pdf_obj_create(&pdf) != 0)
return 1;
if ((*param)->stat > 1)
printf("Generating PDF object(s)\n");
int dictionary_size;
char *dictionary;
char buf[64];
int *ids = NULL;
int cnt = 0;
int *root_kid = malloc((*param)->file_stat->page * sizeof(int));
if (root_kid == NULL)
return 1;
memset(root_kid, 0, (*param)->file_stat->page);
object_hn_t *ptr = (*param)->object_hn;
while (ptr != NULL) {
/*
* External object (ptr->image_length) +
* content object +
* resource object +
* page object
*/
pdf_get_free_ids(&pdf, &ids, ptr->image_length + 3);
int stream_size;
char *stream;
int *dim = malloc(2 * ptr->image_length * sizeof(int));
int ret;
int wh[2];
if (dim == NULL) {
free(root_kid);
return 1;
}
for (int i = 0; i < ptr->image_length; i++) {
dictionary_size = 128;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(root_kid);
free(dim);
return 1;
}
memset(dictionary, 0, dictionary_size);
strcat(dictionary, "<<\n/Type /XObject\n"
"/Subtype /Image\n");
if ((*param)->stat > 2)
printf("\tDecoding data, page %04d item %02d... ",
ptr->page, i);
switch (ptr->image_data[i].format) {
case JBIG:
ret = cnki_jbig(&stream,
&stream_size,
&wh[0],
&wh[1],
ptr->image_data[i].image,
ptr->image_data[i].size);
if (ret != 0) {
dim[i * 2] = 0;
dim[i * 2 + 1] = 0;
break;
}
snprintf(buf, 64, "/Width %d\n/Height %d\n",
wh[0], wh[1]);
strcat(dictionary, buf);
strcat(dictionary, "/ColorSpace /DeviceGray\n"
"/BitsPerComponent 1\n");
snprintf(buf, 64, "/Length %d\n",
stream_size);
strcat(dictionary, buf);
strcat(dictionary, "/Filter /CCITTFaxDecode\n");
dim[i * 2] = wh[0];
dim[i * 2 + 1] = wh[1];
break;
case DCT_0:
case DCT_1:
ret = strinfo_jpeg_dim(&wh[0],
&wh[1],
ptr->image_data[i].image,
ptr->image_data[i].size);
if (ret != 0) {
dim[i * 2] = 0;
dim[i * 2 + 1] = 0;
break;
}
stream_size = ptr->image_data[i].size;
stream = malloc(stream_size);
if (stream == NULL) {
free(dictionary);
free(root_kid);
free(dim);
return 1;
}
memcpy(stream, ptr->image_data[i].image, stream_size);
snprintf(buf, 64, "/Width %d\n/Height %d\n",
wh[0], wh[1]);
strcat(dictionary, buf);
strcat(dictionary, "/ColorSpace /DeviceRGB\n"
"/BitsPerComponent 8\n");
snprintf(buf, 64, "/Length %d\n",
stream_size);
strcat(dictionary, buf);
strcat(dictionary, "/Filter /DCTDecode\n");
dim[i * 2] = wh[0];
dim[i * 2 + 1] = wh[1];
break;
case JBIG2:
case JPX:
default:
ret = -1;
dim[i * 2] = -1;
dim[i * 2 + 1] = -1;
break;
}
strcat(dictionary, ">>");
if (ret == 0) {
if ((*param)->stat > 2)
printf("Succeed\n");
pdf_obj_append(&pdf, ids[i],
NULL, dictionary, stream, stream_size);
free(dictionary);
free(stream);
} else if (ret == 1) {
if ((*param)->stat > 2)
printf("; Failed\n");
free(dictionary);
pdf_obj_append(&pdf, ids[i], NULL, NULL, NULL, 0);
} else {
free(dictionary);
}
}
dictionary_size = 128;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(root_kid);
free(dim);
return 1;
}
memset(dictionary, 0, dictionary_size);
strcat(dictionary, "<<\n/XObject <<");
for (int i = 0; i < ptr->image_length; i++) {
snprintf(buf, 64, "/Im%d %d 0 R", i, ids[i]);
strcat(dictionary, buf);
if (i + 1 < ptr->image_length)
strcat(dictionary, " ");
}
strcat(dictionary, ">>\n>>");
pdf_obj_append(&pdf, ids[ptr->image_length], NULL, dictionary, NULL, 0);
free(dictionary);
int conv_size;
char *conv_dst;
char conv_src[2];
char conv_hex[3];
if (strncmp(ptr->text + 8, "COMPRESSTEXT", 12) == 0) {
cnki_zlib(&stream, &stream_size, ptr->text, ptr->text_size);
dictionary_size = stream_size / 8 + 7;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(root_kid);
free(dim);
return 1;
}
memset(dictionary, 0, dictionary_size);
strcat(dictionary, "<feff");
for (int i = 0; i < stream_size; i += 16) {
conv_src[0] = stream[i + 7];
conv_src[1] = stream[i + 6];
conv_size = 6;
if (strconv(&conv_dst, "UTF-16BE",
conv_src, "GB18030", &conv_size) == 0) {
for (int j = 0; j < conv_size - 2; j++) {
snprintf(conv_hex, 3,
"%02x", (unsigned char) conv_dst[j]);
strcat(dictionary, conv_hex);
}
free(conv_dst);
}
}
free(stream);
strcat(dictionary, ">");
} else {
dictionary_size = ptr->text_size;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(root_kid);
free(dim);
return 1;
}
memset(dictionary, 0, dictionary_size);
strcat(dictionary, "<feff");
for (int i = 0; i < ptr->text_size; i += 4) {
conv_src[0] = ptr->text[i + 3];
conv_src[1] = ptr->text[i + 2];
conv_size = 6;
if (strconv(&conv_dst, "UTF-16BE",
conv_src, "GB18030", &conv_size) == 0) {
for (int j = 0; j < conv_size - 2; j++) {
snprintf(conv_hex, 3,
"%02x", (unsigned char) conv_dst[j]);
strcat(dictionary, conv_hex);
}
free(conv_dst);
}
}
strcat(dictionary, ">");
}
/* FIXME: Use the text somehow? */
free(dictionary);
dictionary_size = 64 + 12 * ptr->image_length;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(root_kid);
free(dim);
return 1;
}
memset(dictionary, 0, dictionary_size);
strcat(dictionary, "q\n");
strcat(dictionary, "0.120000 0 0 0.120000 0 0 cm\n");
for (int i = 0; i < ptr->image_length; i++) {
if (dim[i * 2] <= 0 || dim[i * 2 + 1] <= 0)
continue;
/* Apply transformation matrix */
if (ptr->image_data[i].format == DCT_1)
strcat(dictionary, "-1 0 0 -1 0 0 cm\n");
snprintf(buf, 64, "%d 0 0 %d 0 0 cm\n",
dim[i * 2], dim[i * 2 + 1]);
strcat(dictionary, buf);
snprintf(buf, 64, "/Im%d Do\n", i);
strcat(dictionary, buf);
}
strcat(dictionary, "Q");
if (strdeflate(&stream, &stream_size, dictionary, strlen(dictionary)) != 0) {
free(dictionary);
free(root_kid);
free(dim);
return 1;
}
memset(dictionary, 0, dictionary_size);
strcat(dictionary, "<<\n");
snprintf(buf, 64, "/Length %d\n", stream_size);
strcat(dictionary, buf);
strcat(dictionary, "/Filter /FlateDecode\n");
strcat(dictionary, ">>");
pdf_obj_append(&pdf, ids[ptr->image_length + 1],
NULL, dictionary, stream, stream_size);
free(stream);
memset(dictionary, 0, dictionary_size);
strcat(dictionary, "<<\n/Type /Page\n");
snprintf(buf, 64, "/Resources %d 0 R\n", ids[ptr->image_length]);
strcat(dictionary, buf);
snprintf(buf, 64, "/Contents %d 0 R\n", ids[ptr->image_length + 1]);
strcat(dictionary, buf);
/* A4 paper */
strcat(dictionary, "/MediaBox [ 0 0 595.276 841.89 ]\n");
/* Add /Parent when we know root */
pdf_obj_append(&pdf, ids[ptr->image_length + 2], NULL, dictionary, NULL, 0);
free(dictionary);
root_kid[cnt++] = ids[ptr->image_length + 2];
free(ids);
ids = NULL;
free(dim);
ptr = ptr->next;
}
if ((*param)->stat > 1) {
printf("\t%8s\t%12s\t%12s\t%12s\n",
"id",
"object",
"dictionary",
"stream");
pdf_object_t *ptr = pdf->next;
while (ptr != NULL) {
printf("\t%8d\t%12d\t%12d\t%12d\n",
ptr->id,
ptr->object_size,
ptr->dictionary_size,
ptr->stream_size);
ptr = ptr->next;
}
}
if ((*param)->stat > 0)
printf("Generated %d object(s)\n",
pdf_get_count(&pdf));
ids = NULL;
if ((*param)->file_stat->outline > 0) {
if ((*param)->stat > 1)
printf("Generating outline object(s)\n\t%8s\n", "id");
pdf_get_free_ids(&pdf, &ids, (*param)->file_stat->outline + 1);
int outline = pdf_cnki_outline(&pdf, &(*param)->object_outline, &ids);
if ((*param)->stat > 1)
for (int i = 0; i < (*param)->file_stat->outline + 1; i++)
printf("\t%8d\n", ids[i]);
if ((*param)->stat > 0) {
if (outline != 0)
printf("No outline information\n");
else
printf("Generated %d outline object(s)\n",
(*param)->file_stat->outline + 1);
}
}
if ((*param)->stat > 1)
printf("Generating root object\n");
dictionary_size = 64 + 12 * (*param)->file_stat->page;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(root_kid);
return 1;
}
memset(dictionary, 0, dictionary_size);
int root = pdf_get_free_id(&pdf);
snprintf(buf, 64, "<<\n/Type /Pages\n/Kids ");
strcat(dictionary, buf);
if ((*param)->file_stat->page > 1)
strcat(dictionary, "[");
for (int i = 0; i < (*param)->file_stat->page; i++) {
snprintf(buf, 64, "%d 0 R", root_kid[i]);
strcat(dictionary, buf);
if (i + 1 < (*param)->file_stat->page)
strcat(dictionary, " ");
}
if ((*param)->file_stat->page > 1)
strcat(dictionary, "]");
strcat(dictionary, "\n");
snprintf(buf, 64, "/Count %d\n", (*param)->file_stat->page);
strcat(dictionary, buf);
strcat(dictionary, ">>");
pdf_obj_prepend(&pdf, root, NULL, dictionary, NULL, 0);
free(dictionary);
dictionary_size = 128;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(root_kid);
return 1;
}
memset(dictionary, 0, dictionary_size);
pdf_object_t *tmp = NULL;
/* Add /Parent to page object */
for (int i = 0; i < (*param)->file_stat->page; i++) {
if (pdf_get_obj(&pdf, root_kid[i], &tmp) != 0) {
free(dictionary);
free(root_kid);
return 1;
}
memset(dictionary, 0, dictionary_size);
strcat(dictionary, tmp->dictionary);
snprintf(buf, 64, "/Parent %d 0 R\n>>", root);
strcat(dictionary, buf);
if (pdf_obj_replace(&pdf, root_kid[i], NULL, dictionary, NULL, 0) != 0) {
free(dictionary);
free(root_kid);
return 1;
}
}
free(root_kid);
memset(dictionary, 0, dictionary_size);
if ((*param)->stat > 0)
printf("Generated root object %d.\n",
root);
if ((*param)->stat > 1)
printf("Generating catalog object\n");
snprintf(buf, 64,
"<<\n/Type /Catalog\n/Pages %d 0 R\n",
root);
strcat(dictionary, buf);
if (ids != NULL) {
snprintf(buf, 64,
"/Outlines %d 0 R\n/PageMode /UseOutlines\n",
ids[0]);
strcat(dictionary, buf);
}
strcat(dictionary, ">>");
pdf_obj_append(&pdf, 0, NULL, dictionary, NULL, 0);
free(dictionary);
if ((*param)->stat > 0)
printf("Generated catalog object\n");
if ((*param)->stat > 1)
printf("Sorting object(s)\n");
pdf_obj_sort(&pdf);
if ((*param)->stat > 0)
printf("Sorted object(s)\n");
if ((*param)->stat > 1)
printf("Writing header\n");
long cur = 0;
if ((*param)->stat > 0)
cur = ftell((*param)->fp_o);
if (pdf_dump_header(&pdf, &(*param)->fp_o) != 0) {
fprintf(stderr, "Header not written\n");
return 1;
} else {
if ((*param)->stat > 0)
printf("Header %ld byte(s) written\n",
ftell((*param)->fp_o) - cur);
}
if ((*param)->stat > 1)
printf("Writing object(s)\n");
pdf_dump_obj(&pdf, &(*param)->fp_o);
if ((*param)->stat > 1) {
printf("\t%8s\t%8s\t%8s\t%12s\t%12s\t%12s\n",
"address",
"size",
"id",
"object",
"dictionary",
"stream");
pdf_object_t *ptr = pdf->next;
while (ptr != NULL) {
printf("\t%08x\t%8d\t%8d\t%12d\t%12d\t%12d\n",
ptr->address,
ptr->size,
ptr->id,
ptr->object_size,
ptr->dictionary_size,
ptr->stream_size);
ptr = ptr->next;
}
}
if ((*param)->stat > 0)
printf("%d object(s) %ld byte(s) written\n",
pdf_get_count(&pdf),
ftell((*param)->fp_o));
long xref = ftell((*param)->fp_o);
if ((*param)->stat > 1)
printf("Writing cross-reference table\n");
if (pdf_dump_xref(&pdf, &(*param)->fp_o) != 0) {
if ((*param)->stat > 0)
printf("Cross-reference table not written\n");
} else {
if ((*param)->stat > 0)
printf("Cross-reference table %ld byte(s) written\n",
ftell((*param)->fp_o) - xref);
}
if ((*param)->stat > 1)
printf("Writing trailer\n");
if ((*param)->stat > 0)
cur = ftell((*param)->fp_o);
if (pdf_dump_trailer(&pdf, &(*param)->fp_o, xref) != 0) {
if ((*param)->stat > 0)
printf("Trailer not written\n");
} else {
if ((*param)->stat > 0)
printf("Trailer %ld byte(s) written\n",
ftell((*param)->fp_o) - cur);
}
if ((*param)->stat > 0)
printf("Total %ld byte(s) written\n",
ftell((*param)->fp_o));
pdf_obj_destroy(&pdf);
return 0;
}

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, yzrh <yzrh@noema.org>
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, yzrh <yzrh@noema.org>
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, yzrh <yzrh@noema.org>
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, yzrh <yzrh@noema.org>
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, yzrh <yzrh@noema.org>
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/

41
src/jbig.c Normal file
View file

@ -0,0 +1,41 @@
/*
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <stdio.h> /* FIXME: test */
#include <stdlib.h>
#include <string.h>
#include <jbig.h>
int
strdec_jbig(char **bitmap, int *bitmap_size,
const char * restrict data, int data_size)
{
struct jbg_dec_state sd;
jbg_dec_init(&sd);
unsigned char *data_ptr[1] = {(unsigned char *) data};
/* FIXME: test */
int ret;
if ((ret = jbg_dec_in(&sd, (unsigned char *) data_ptr,
data_size, NULL)) != JBG_EOK) {
printf("%s", jbg_strerror(ret));
jbg_dec_free(&sd);
return 1;
}
*bitmap_size = jbg_dec_getsize(&sd);
*bitmap = malloc(*bitmap_size);
if (*bitmap != NULL)
memcpy(*bitmap, jbg_dec_getimage(&sd, 0), *bitmap_size);
jbg_dec_free(&sd);
return 0;
}

8
src/jbig.h Normal file
View file

@ -0,0 +1,8 @@
/*
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
int strdec_jbig(char **bitmap, int *bitmap_size,
const char * restrict data, int data_size);

36
src/jpeg.c Normal file
View file

@ -0,0 +1,36 @@
/*
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <stdio.h>
#include <jpeglib.h>
int
strinfo_jpeg_dim(int *jpeg_width, int *jpeg_height,
const char * restrict data, int data_size)
{
struct jpeg_decompress_struct cinfo;
struct jpeg_error_mgr jerr;
cinfo.err = jpeg_std_error(&jerr);
jpeg_create_decompress(&cinfo);
jpeg_mem_src(&cinfo, (unsigned char *) data, data_size);
jpeg_read_header(&cinfo, TRUE);
jpeg_calc_output_dimensions(&cinfo);
*jpeg_width = cinfo.output_width;
*jpeg_height = cinfo.output_height;
jpeg_destroy((struct jpeg_common_struct *) &cinfo);
jpeg_destroy_decompress(&cinfo);
return 0;
}

8
src/jpeg.h Normal file
View file

@ -0,0 +1,8 @@
/*
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
int strinfo_jpeg_dim(int *jpeg_width, int *jpeg_height,
const char * restrict data, int data_size);

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, yzrh <yzrh@noema.org>
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
@ -82,7 +82,7 @@ main(int argc, char **argv, char **envp)
if (param->stat > 0)
printf("Melon " VERSION "." RELEASE "." PATCH EXTRA "\n"
"Copyright (c) 2020, yzrh <yzrh@noema.org>\n\n");
"Copyright (c) 2020-2021, yzrh <yzrh@noema.org>\n\n");
cnki_info(&param);

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, yzrh <yzrh@noema.org>
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
@ -67,7 +67,8 @@ int
pdf_obj_add(pdf_object_t **pdf, int id,
const char * restrict object,
const char * restrict dictionary,
const char * restrict stream)
const char * restrict stream,
int stream_size)
{
if (*pdf != NULL || id <= 0 ||
(object != NULL && dictionary != NULL))
@ -112,14 +113,15 @@ pdf_obj_add(pdf_object_t **pdf, int id,
(*pdf)->dictionary = NULL;
}
if (stream != NULL) {
(*pdf)->stream_size = sizeof(stream);
if (stream != NULL && stream_size > 0) {
(*pdf)->stream_size = stream_size + 1;
(*pdf)->stream = malloc((*pdf)->stream_size);
if ((*pdf)->stream == NULL)
return 1;
memcpy((*pdf)->stream, stream, (*pdf)->stream_size);
(*pdf)->stream[(*pdf)->stream_size - 1] = '\n';
} else {
(*pdf)->stream_size = 0;
(*pdf)->stream = NULL;
@ -153,7 +155,8 @@ int
pdf_obj_prepend(pdf_object_t **pdf, int id,
const char * restrict object,
const char * restrict dictionary,
const char * restrict stream)
const char * restrict stream,
int stream_size)
{
if (*pdf == NULL)
return 1;
@ -163,7 +166,8 @@ pdf_obj_prepend(pdf_object_t **pdf, int id,
pdf_object_t *ptr = NULL;
if (pdf_obj_add(&ptr, id, object, dictionary, stream) != 0) {
if (pdf_obj_add(&ptr, id, object, dictionary,
stream, stream_size) != 0) {
free(ptr);
return 1;
}
@ -178,7 +182,8 @@ int
pdf_obj_append(pdf_object_t **pdf, int id,
const char * restrict object,
const char * restrict dictionary,
const char * restrict stream)
const char * restrict stream,
int stream_size)
{
if (*pdf == NULL)
return 1;
@ -190,12 +195,67 @@ pdf_obj_append(pdf_object_t **pdf, int id,
while (ptr->next != NULL)
ptr = ptr->next;
if (pdf_obj_add(&ptr->next, id, object, dictionary, stream) != 0)
if (pdf_obj_add(&ptr->next, id, object, dictionary,
stream, stream_size) != 0)
return 1;
return 0;
}
int
pdf_obj_replace(pdf_object_t **pdf, int id,
const char * restrict object,
const char * restrict dictionary,
const char * restrict stream,
int stream_size)
{
pdf_object_t *ptr;
char *ret;
if (pdf_get_obj(pdf, id, &ptr) != 0)
return 1;
if (object != NULL && dictionary != NULL)
return 1;
if (dictionary != NULL) {
ret = realloc(ptr->dictionary, strlen(dictionary));
if (ret == NULL)
return 1;
ptr->dictionary_size = strlen(dictionary);
ptr->dictionary = ret;
memcpy(ptr->dictionary, dictionary, ptr->dictionary_size);
} else if (object != NULL) {
ret = realloc(ptr->object, strlen(object));
if (ret == NULL)
return 1;
ptr->object_size = strlen(object);
ptr->object = ret;
memcpy(ptr->object, object, ptr->object_size);
}
if (stream != NULL && stream_size > 0) {
ret = realloc(ptr->stream, stream_size + 1);
if (ret == NULL)
return 1;
ptr->stream_size = stream_size + 1;
ptr->stream = ret;
memcpy(ptr->stream, stream, ptr->stream_size);
ptr->stream[ptr->stream_size - 1] = '\n';
}
return 0;
}
int
pdf_obj_sort(pdf_object_t **pdf)
{

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, yzrh <yzrh@noema.org>
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
@ -26,16 +26,24 @@ void pdf_obj_destroy(pdf_object_t **pdf);
int pdf_obj_add(pdf_object_t **pdf, int id,
const char * restrict object,
const char * restrict dictionary,
const char * restrict stream);
const char * restrict stream,
int stream_size);
int pdf_obj_del(pdf_object_t **pdf, int id);
int pdf_obj_prepend(pdf_object_t **pdf, int id,
const char * restrict object,
const char * restrict dictionary,
const char * restrict stream);
const char * restrict stream,
int stream_size);
int pdf_obj_append(pdf_object_t **pdf, int id,
const char * restrict object,
const char * restrict dictionary,
const char * restrict stream);
const char * restrict stream,
int stream_size);
int pdf_obj_replace(pdf_object_t **pdf, int id,
const char * restrict object,
const char * restrict dictionary,
const char * restrict stream,
int stream_size);
int pdf_obj_sort(pdf_object_t **pdf);
/* pdf_parser.c */

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, yzrh <yzrh@noema.org>
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
@ -93,7 +93,7 @@ _outline(pdf_object_t **pdf, object_outline_tree_t **outline_tree, int id, int *
atoi(ptr->item->page) - 1);
strcat(dictionary, buf);
pdf_obj_append(pdf, ptr->id, NULL, dictionary, NULL);
pdf_obj_append(pdf, ptr->id, NULL, dictionary, NULL, 0);
if (ptr->left == NULL)
(*stat)[1] = ptr->id;
@ -128,7 +128,7 @@ pdf_cnki_outline(pdf_object_t **pdf, object_outline_t **outline, int **ids)
free(ret);
pdf_obj_append(pdf, (*ids)[0], NULL, buf, NULL);
pdf_obj_append(pdf, (*ids)[0], NULL, buf, NULL, 0);
return 0;
}

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, yzrh <yzrh@noema.org>
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, yzrh <yzrh@noema.org>
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, yzrh <yzrh@noema.org>
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, yzrh <yzrh@noema.org>
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, yzrh <yzrh@noema.org>
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, yzrh <yzrh@noema.org>
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
@ -20,12 +20,34 @@ strinflate(char **dst, int dst_size,
unsigned long size = dst_size;
uncompress((Bytef *) *dst, &size, (const Bytef *) src, src_size);
if (size != dst_size) {
if (uncompress((Bytef *) *dst,
&size, (const Bytef *) src, src_size) != Z_OK) {
free(*dst);
return 1;
}
return 0;
}
int
strdeflate(char **dst, int *dst_size,
const char * restrict src, int src_size)
{
*dst_size = compressBound(src_size);
*dst = malloc(*dst_size);
if (*dst == NULL)
return 1;
unsigned long size = *dst_size;
if (compress((Bytef *) *dst, &size,
(const Bytef *) src, src_size) != Z_OK) {
free(*dst);
return 1;
}
*dst_size = size;
return 0;
}

View file

@ -1,8 +1,11 @@
/*
* Copyright (c) 2020, yzrh <yzrh@noema.org>
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
int strinflate(char **dst, int dst_size,
const char * restrict src, int src_size);
int strdeflate(char **dst, int *dst_size,
const char * restrict src, int src_size);