Decode JBIG and JPEG during HN conversion.
This commit is contained in:
parent
b20c6ad3ed
commit
1994f122cc
31 changed files with 1035 additions and 274 deletions
|
@ -17,6 +17,8 @@ Dependency
|
|||
1. OpenSSL
|
||||
2. libiconv
|
||||
3. zlib
|
||||
4. JBIG-KIT
|
||||
5. libjpeg-turbo
|
||||
|
||||
Usage
|
||||
=====
|
||||
|
|
14
src/Makefile
14
src/Makefile
|
@ -1,22 +1,22 @@
|
|||
#
|
||||
# Copyright (c) 2020, yzrh <yzrh@noema.org>
|
||||
# Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
src = melon.c iconv.c zlib.c \
|
||||
src = melon.c iconv.c zlib.c jbig.c jpeg.c \
|
||||
cnki_caj.c cnki_hn.c cnki_kdh.c cnki_outline_tree.c \
|
||||
cnki_pdf.c cnki_xml.c cnki_zlib.c cnki.c \
|
||||
cnki_pdf.c cnki_xml.c cnki_zlib.c cnki_jbig.c cnki.c \
|
||||
pdf_cnki.c pdf_get.c pdf_parser.c pdf_writer.c pdf.c
|
||||
inc = extern.h version.h iconv.h zlib.h \
|
||||
cnki.h pdf_cnki.h pdf.h
|
||||
inc = extern.h version.h iconv.h zlib.h jbig.h jpeg.h \
|
||||
cnki.h pdf_cnki.h cnki_jbig.h pdf.h
|
||||
|
||||
obj = ${src:.c=.o}
|
||||
|
||||
PREFIX = /usr/local
|
||||
|
||||
CFLAGS = -O3 -march=native -pipe -flto=thin -Wall
|
||||
LDFLAGS = -Wl,-O3 -lcrypto -liconv -lz -Wl,--as-needed
|
||||
CFLAGS = -O3 -march=native -pipe -flto=thin -Wall -Wextra -Wno-unused-parameter
|
||||
LDFLAGS = -Wl,-O3 -lcrypto -liconv -lz -ljbig -ljpeg -Wl,--as-needed
|
||||
|
||||
CFLAGS += -I/usr/local/include
|
||||
LDFLAGS += -L/usr/local/lib
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
@ -82,6 +82,7 @@ typedef struct _cnki_t {
|
|||
|
||||
/* cnki_pdf.c */
|
||||
int cnki_pdf(cnki_t **param);
|
||||
int cnki_pdf_hn(cnki_t **param);
|
||||
|
||||
/* cnki_outline_tree.c */
|
||||
int cnki_outline_tree(object_outline_tree_t **outline_tree,
|
||||
|
@ -91,5 +92,10 @@ int cnki_outline_tree(object_outline_tree_t **outline_tree,
|
|||
int cnki_zlib(char **dst, int *dst_size,
|
||||
const char * restrict src, int src_size);
|
||||
|
||||
/* cnki_jbig.c */
|
||||
int cnki_jbig(char **bitmap, int *bitmap_size,
|
||||
int *bitmap_width, int *bitmap_height,
|
||||
const char * restrict jbig, int jbig_size);
|
||||
|
||||
/* cnki_xml.c */
|
||||
int cnki_xml(char **xml, FILE **fp);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
|
228
src/cnki_hn.c
228
src/cnki_hn.c
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
@ -9,6 +9,8 @@
|
|||
|
||||
#include "cnki.h"
|
||||
#include "iconv.h"
|
||||
#include "zlib.h"
|
||||
#include "jpeg.h"
|
||||
#include "pdf.h"
|
||||
#include "pdf_cnki.h"
|
||||
|
||||
|
@ -131,231 +133,13 @@ cnki_hn(cnki_t **param)
|
|||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
if ((*param)->stat > 0)
|
||||
printf("Loaded %d page(s)\n", (*param)->file_stat->page);
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Generating PDF object(s)\n");
|
||||
|
||||
pdf_object_t *pdf = NULL;
|
||||
|
||||
if (pdf_obj_create(&pdf) != 0)
|
||||
return 1;
|
||||
|
||||
int buf_size;
|
||||
char *buf;
|
||||
|
||||
int str_size;
|
||||
char *str;
|
||||
|
||||
int conv_size;
|
||||
char *conv_dst;
|
||||
char conv_src[2];
|
||||
char conv_hex[3];
|
||||
|
||||
ptr = (*param)->object_hn;
|
||||
while (ptr != NULL) {
|
||||
if (strncmp(ptr->text + 8, "COMPRESSTEXT", 12) == 0) {
|
||||
cnki_zlib(&buf, &buf_size, ptr->text, ptr->text_size);
|
||||
|
||||
str_size = buf_size / 8 + 7;
|
||||
str = malloc(str_size);
|
||||
|
||||
if (str == NULL)
|
||||
return 1;
|
||||
|
||||
memset(str, 0, str_size);
|
||||
|
||||
strcat(str, "<feff");
|
||||
|
||||
for (int i = 0; i < buf_size; i += 16) {
|
||||
conv_src[0] = buf[i + 7];
|
||||
conv_src[1] = buf[i + 6];
|
||||
|
||||
conv_size = 6;
|
||||
|
||||
if (strconv(&conv_dst, "UTF-16BE",
|
||||
conv_src, "GB18030", &conv_size) == 0) {
|
||||
for (int j = 0; j < conv_size - 2; j++) {
|
||||
snprintf(conv_hex, 3,
|
||||
"%02x", (unsigned char) conv_dst[j]);
|
||||
strcat(str, conv_hex);
|
||||
}
|
||||
free(conv_dst);
|
||||
}
|
||||
}
|
||||
free(buf);
|
||||
|
||||
strcat(str, ">");
|
||||
} else {
|
||||
str_size = ptr->text_size;
|
||||
str = malloc(str_size);
|
||||
|
||||
if (str == NULL)
|
||||
return 1;
|
||||
|
||||
memset(str, 0, str_size);
|
||||
|
||||
strcat(str, "<feff");
|
||||
|
||||
for (int i = 0; i < ptr->text_size; i += 4) {
|
||||
conv_src[0] = ptr->text[i + 3];
|
||||
conv_src[1] = ptr->text[i + 2];
|
||||
|
||||
conv_size = 6;
|
||||
|
||||
if (strconv(&conv_dst, "UTF-16BE",
|
||||
conv_src, "GB18030", &conv_size) == 0) {
|
||||
for (int j = 0; j < conv_size - 2; j++) {
|
||||
snprintf(conv_hex, 3,
|
||||
"%02x", (unsigned char) conv_dst[j]);
|
||||
strcat(str, conv_hex);
|
||||
}
|
||||
free(conv_dst);
|
||||
}
|
||||
}
|
||||
|
||||
strcat(str, ">");
|
||||
}
|
||||
|
||||
pdf_obj_append(&pdf, 0, str, NULL, NULL);
|
||||
|
||||
free(str);
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
if ((*param)->stat > 1) {
|
||||
printf("\t%8s\t%12s\t%12s\t%12s\n",
|
||||
"id",
|
||||
"object",
|
||||
"dictionary",
|
||||
"stream");
|
||||
|
||||
pdf_object_t *ptr = pdf->next;
|
||||
while (ptr != NULL) {
|
||||
printf("\t%8d\t%12d\t%12d\t%12d\n",
|
||||
ptr->id,
|
||||
ptr->object_size,
|
||||
ptr->dictionary_size,
|
||||
ptr->stream_size);
|
||||
ptr = ptr->next;
|
||||
}
|
||||
}
|
||||
cnki_pdf_hn(param);
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Generated %d object(s)\n",
|
||||
pdf_get_count(&pdf));
|
||||
printf("Conversion ended\n");
|
||||
|
||||
int *ids = NULL;
|
||||
|
||||
if ((*param)->file_stat->outline > 0) {
|
||||
if ((*param)->stat > 1)
|
||||
printf("Generating outline object(s)\n\t%8s\n", "id");
|
||||
|
||||
pdf_get_free_ids(&pdf, &ids, (*param)->file_stat->outline + 1);
|
||||
int outline = pdf_cnki_outline(&pdf, &(*param)->object_outline, &ids);
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
for (int i = 0; i < (*param)->file_stat->outline + 1; i++)
|
||||
printf("\t%8d\n", ids[i]);
|
||||
|
||||
if ((*param)->stat > 0) {
|
||||
if (outline != 0)
|
||||
printf("No outline information\n");
|
||||
else
|
||||
printf("Generated %d outline object(s)\n",
|
||||
(*param)->file_stat->outline + 1);
|
||||
}
|
||||
}
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Writing header\n");
|
||||
|
||||
long cur = 0;
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
cur = ftell((*param)->fp_o);
|
||||
|
||||
if (pdf_dump_header(&pdf, &(*param)->fp_o) != 0) {
|
||||
fprintf(stderr, "Header not written\n");
|
||||
return 1;
|
||||
} else {
|
||||
if ((*param)->stat > 0)
|
||||
printf("Header %ld byte(s) written\n",
|
||||
ftell((*param)->fp_o) - cur);
|
||||
}
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Writing object(s)\n");
|
||||
|
||||
pdf_dump_obj(&pdf, &(*param)->fp_o);
|
||||
|
||||
if ((*param)->stat > 1) {
|
||||
printf("\t%8s\t%8s\t%8s\t%12s\t%12s\t%12s\n",
|
||||
"address",
|
||||
"size",
|
||||
"id",
|
||||
"object",
|
||||
"dictionary",
|
||||
"stream");
|
||||
|
||||
pdf_object_t *ptr = pdf->next;
|
||||
while (ptr != NULL) {
|
||||
printf("\t%08x\t%8d\t%8d\t%12d\t%12d\t%12d\n",
|
||||
ptr->address,
|
||||
ptr->size,
|
||||
ptr->id,
|
||||
ptr->object_size,
|
||||
ptr->dictionary_size,
|
||||
ptr->stream_size);
|
||||
ptr = ptr->next;
|
||||
}
|
||||
}
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("%d object(s) %ld byte(s) written\n",
|
||||
pdf_get_count(&pdf),
|
||||
ftell((*param)->fp_o));
|
||||
|
||||
long xref = ftell((*param)->fp_o);
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Writing cross-reference table\n");
|
||||
|
||||
if (pdf_dump_xref(&pdf, &(*param)->fp_o) != 0) {
|
||||
if ((*param)->stat > 0)
|
||||
printf("Cross-reference table not written\n");
|
||||
} else {
|
||||
if ((*param)->stat > 0)
|
||||
printf("Cross-reference table %ld byte(s) written\n",
|
||||
ftell((*param)->fp_o) - xref);
|
||||
}
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Writing trailer\n");
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
cur = ftell((*param)->fp_o);
|
||||
|
||||
if (pdf_dump_trailer(&pdf, &(*param)->fp_o, xref) != 0) {
|
||||
if ((*param)->stat > 0)
|
||||
printf("Trailer not written\n");
|
||||
} else {
|
||||
if ((*param)->stat > 0)
|
||||
printf("Trailer %ld byte(s) written\n",
|
||||
ftell((*param)->fp_o) - cur);
|
||||
}
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Total %ld byte(s) written\n",
|
||||
ftell((*param)->fp_o));
|
||||
|
||||
pdf_obj_destroy(&pdf);
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Conversion ended (partial)\n");
|
||||
|
||||
/* TODO: Finish me please :) */
|
||||
return 0;
|
||||
}
|
||||
|
|
89
src/cnki_jbig.c
Normal file
89
src/cnki_jbig.c
Normal file
|
@ -0,0 +1,89 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "cnki_jbig.h"
|
||||
#include "jbig.h"
|
||||
|
||||
int
|
||||
cnki_jbig(char **bitmap, int *bitmap_size,
|
||||
int *bitmap_width, int *bitmap_height,
|
||||
const char * restrict jbig, int jbig_size)
|
||||
{
|
||||
dib_t *dib = malloc(sizeof(dib_t));
|
||||
|
||||
if (dib == NULL)
|
||||
return 1;
|
||||
|
||||
memcpy(dib, jbig, 40);
|
||||
|
||||
bih_t *bih = malloc(sizeof(bih_t));
|
||||
|
||||
if (bih == NULL) {
|
||||
free(dib);
|
||||
return 1;
|
||||
}
|
||||
|
||||
memset(bih, 0, sizeof(bih_t));
|
||||
|
||||
bih->d_l = 0;
|
||||
bih->d = 0;
|
||||
|
||||
bih->p = 1;
|
||||
|
||||
bih->fill = 0;
|
||||
|
||||
bih->x_d = dib->width;
|
||||
bih->y_d = dib->height;
|
||||
bih->l_0 = bih->y_d / 35;
|
||||
|
||||
while (bih->l_0 > 128)
|
||||
bih->l_0--;
|
||||
if (bih->l_0 < 2)
|
||||
bih->l_0 = 2;
|
||||
|
||||
bih->m_x = 8;
|
||||
bih->m_y = 0;
|
||||
|
||||
bih->order |= 1 << 1;
|
||||
bih->order |= 1 << 0;
|
||||
|
||||
bih->options |= 1 << 4;
|
||||
bih->options |= 1 << 3;
|
||||
bih->options |= 1 << 2;
|
||||
|
||||
bih->dptable = NULL;
|
||||
|
||||
int bie_size = jbig_size - 28; /* - 40 - 8 + 20 */
|
||||
char *bie = malloc(bie_size);
|
||||
|
||||
if (bie == NULL) {
|
||||
free(dib);
|
||||
free(bih);
|
||||
return 1;
|
||||
}
|
||||
|
||||
memcpy(bie, bih, 20);
|
||||
memcpy(bie + 20, jbig + 48, jbig_size - 48);
|
||||
|
||||
int ret = strdec_jbig(bitmap, bitmap_size, bie, bie_size);
|
||||
|
||||
if (ret == 0) {
|
||||
*bitmap_width = bih->x_d;
|
||||
*bitmap_height = bih->y_d;
|
||||
}
|
||||
|
||||
free(dib);
|
||||
free(bih);
|
||||
free(bie);
|
||||
|
||||
if (ret != 0)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
78
src/cnki_jbig.h
Normal file
78
src/cnki_jbig.h
Normal file
|
@ -0,0 +1,78 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
/*
|
||||
* order (MSB first):
|
||||
* 0
|
||||
* 0
|
||||
* 0
|
||||
* 0
|
||||
* HITOLO
|
||||
* SEQ
|
||||
* ILEAVE (default)
|
||||
* SMID (default)
|
||||
*
|
||||
* options (MSB first):
|
||||
* 0
|
||||
* LRLTWO
|
||||
* VLENGTH
|
||||
* TPDON (default)
|
||||
* TPBON (default)
|
||||
* DPON (default)
|
||||
* DPPRIV
|
||||
* DPLAST
|
||||
*/
|
||||
typedef struct _bih_t {
|
||||
char d_l; /* Initial resolution layer */
|
||||
char d; /* Final resolution layer */
|
||||
char p; /* Number of bit-planes, for bi-level image, always 1 */
|
||||
char fill; /* Always 0 */
|
||||
/* MSB first */
|
||||
int32_t x_d; /* Horizontal dimension at highestresolution */
|
||||
int32_t y_d; /* Vertical dimension at highest resolution */
|
||||
int32_t l_0; /* Number of lines per stripe at lowest resolution */
|
||||
char m_x; /* Maximum horizontal offsets (default: 8) */
|
||||
char m_y; /* Maximum vertical offsets (default: 0) */
|
||||
char order;
|
||||
char options;
|
||||
char *dptable; /* 0 or 1728 */
|
||||
} bih_t;
|
||||
|
||||
typedef enum _dib_compression_code {
|
||||
BI_RGB,
|
||||
BI_RLE8,
|
||||
BI_RLE4,
|
||||
BI_BITFIELDS,
|
||||
BI_JPEG,
|
||||
BI_PNG,
|
||||
BI_ALPHABITFIELDS,
|
||||
BI_CMYK = 11,
|
||||
BI_CMYKRLE8 = 12,
|
||||
BI_CMYKRLE4 = 13
|
||||
} dib_compression_code;
|
||||
|
||||
typedef struct _dib_t {
|
||||
uint32_t dib_size; /* Always 40 */
|
||||
int32_t width;
|
||||
int32_t height;
|
||||
uint16_t plane; /* Always 1 */
|
||||
uint16_t depth;
|
||||
uint32_t compression; /* dib_compression_code */
|
||||
uint32_t size;
|
||||
uint32_t resolution_h;
|
||||
uint32_t resolution_v;
|
||||
uint32_t colour;
|
||||
uint32_t colour_used;
|
||||
} dib_t;
|
||||
|
||||
typedef struct _colour_table {
|
||||
uint16_t blue;
|
||||
uint16_t green;
|
||||
uint16_t red;
|
||||
uint16_t fill; /* Always 0 */
|
||||
} colour_table;
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
|
632
src/cnki_pdf.c
632
src/cnki_pdf.c
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
@ -8,6 +8,9 @@
|
|||
#include <string.h>
|
||||
|
||||
#include "cnki.h"
|
||||
#include "iconv.h"
|
||||
#include "zlib.h"
|
||||
#include "jpeg.h"
|
||||
#include "pdf.h"
|
||||
#include "pdf_cnki.h"
|
||||
|
||||
|
@ -57,6 +60,11 @@ cnki_pdf(cnki_t **param)
|
|||
printf("Loaded %d object(s)\n",
|
||||
pdf_get_count(&pdf));
|
||||
|
||||
int dictionary_size;
|
||||
char *dictionary;
|
||||
|
||||
char buf[64];
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Searching for parent object(s)\n");
|
||||
|
||||
|
@ -69,12 +77,8 @@ cnki_pdf(cnki_t **param)
|
|||
if ((*param)->stat > 0)
|
||||
printf("Discovered %d parent object(s)\n", parent[0]);
|
||||
|
||||
char buf[64];
|
||||
|
||||
int parent_missing[parent[0]];
|
||||
int *kid;
|
||||
int dictionary_size;
|
||||
char *dictionary;
|
||||
|
||||
for (int i = 1; i <= parent[0]; i++) {
|
||||
if ((*param)->stat > 1)
|
||||
|
@ -101,20 +105,23 @@ cnki_pdf(cnki_t **param)
|
|||
snprintf(buf, 64,
|
||||
"<<\n/Type /Pages\n/Kids [");
|
||||
strcat(dictionary, buf);
|
||||
|
||||
for (int j = 1; j <= kid[0]; j++) {
|
||||
snprintf(buf, 64,
|
||||
"%d 0 R",
|
||||
kid[j]);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
if (j < kid[0])
|
||||
strcat(dictionary, " ");
|
||||
}
|
||||
|
||||
snprintf(buf, 64,
|
||||
"]\n/Count %d\n>>",
|
||||
pdf_get_kid_count(&pdf, parent[i]));
|
||||
strcat(dictionary, buf);
|
||||
|
||||
pdf_obj_prepend(&pdf, parent[i], NULL, dictionary, NULL);
|
||||
pdf_obj_prepend(&pdf, parent[i], NULL, dictionary, NULL, 0);
|
||||
|
||||
parent_missing[i - 1] = 1;
|
||||
|
||||
|
@ -185,6 +192,7 @@ cnki_pdf(cnki_t **param)
|
|||
if (parent_missing[i]) {
|
||||
snprintf(buf, 64, "%d 0 R", parent[i + 1]);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
if (i < root_kid)
|
||||
strcat(dictionary, " ");
|
||||
}
|
||||
|
@ -200,7 +208,7 @@ cnki_pdf(cnki_t **param)
|
|||
|
||||
strcat(dictionary, ">>");
|
||||
|
||||
pdf_obj_prepend(&pdf, root, NULL, dictionary, NULL);
|
||||
pdf_obj_prepend(&pdf, root, NULL, dictionary, NULL, 0);
|
||||
|
||||
memset(dictionary, 0, dictionary_size);
|
||||
|
||||
|
@ -260,7 +268,7 @@ cnki_pdf(cnki_t **param)
|
|||
|
||||
strcat(dictionary, ">>");
|
||||
|
||||
pdf_obj_append(&pdf, 0, NULL, dictionary, NULL);
|
||||
pdf_obj_append(&pdf, 0, NULL, dictionary, NULL, 0);
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Generated catalog object\n");
|
||||
|
@ -383,3 +391,611 @@ cnki_pdf(cnki_t **param)
|
|||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
cnki_pdf_hn(cnki_t **param)
|
||||
{
|
||||
if (*param == NULL)
|
||||
return 1;
|
||||
|
||||
pdf_object_t *pdf = NULL;
|
||||
|
||||
if (pdf_obj_create(&pdf) != 0)
|
||||
return 1;
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Generating PDF object(s)\n");
|
||||
|
||||
int dictionary_size;
|
||||
char *dictionary;
|
||||
|
||||
char buf[64];
|
||||
|
||||
int *ids = NULL;
|
||||
|
||||
int cnt = 0;
|
||||
int *root_kid = malloc((*param)->file_stat->page * sizeof(int));
|
||||
|
||||
if (root_kid == NULL)
|
||||
return 1;
|
||||
|
||||
memset(root_kid, 0, (*param)->file_stat->page);
|
||||
|
||||
object_hn_t *ptr = (*param)->object_hn;
|
||||
while (ptr != NULL) {
|
||||
/*
|
||||
* External object (ptr->image_length) +
|
||||
* content object +
|
||||
* resource object +
|
||||
* page object
|
||||
*/
|
||||
pdf_get_free_ids(&pdf, &ids, ptr->image_length + 3);
|
||||
|
||||
int stream_size;
|
||||
char *stream;
|
||||
|
||||
int *dim = malloc(2 * ptr->image_length * sizeof(int));
|
||||
|
||||
int ret;
|
||||
int wh[2];
|
||||
|
||||
if (dim == NULL) {
|
||||
free(root_kid);
|
||||
return 1;
|
||||
}
|
||||
|
||||
for (int i = 0; i < ptr->image_length; i++) {
|
||||
dictionary_size = 128;
|
||||
dictionary = malloc(dictionary_size);
|
||||
|
||||
if (dictionary == NULL) {
|
||||
free(root_kid);
|
||||
free(dim);
|
||||
return 1;
|
||||
}
|
||||
|
||||
memset(dictionary, 0, dictionary_size);
|
||||
|
||||
strcat(dictionary, "<<\n/Type /XObject\n"
|
||||
"/Subtype /Image\n");
|
||||
|
||||
if ((*param)->stat > 2)
|
||||
printf("\tDecoding data, page %04d item %02d... ",
|
||||
ptr->page, i);
|
||||
|
||||
switch (ptr->image_data[i].format) {
|
||||
case JBIG:
|
||||
ret = cnki_jbig(&stream,
|
||||
&stream_size,
|
||||
&wh[0],
|
||||
&wh[1],
|
||||
ptr->image_data[i].image,
|
||||
ptr->image_data[i].size);
|
||||
|
||||
if (ret != 0) {
|
||||
dim[i * 2] = 0;
|
||||
dim[i * 2 + 1] = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
snprintf(buf, 64, "/Width %d\n/Height %d\n",
|
||||
wh[0], wh[1]);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
strcat(dictionary, "/ColorSpace /DeviceGray\n"
|
||||
"/BitsPerComponent 1\n");
|
||||
|
||||
snprintf(buf, 64, "/Length %d\n",
|
||||
stream_size);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
strcat(dictionary, "/Filter /CCITTFaxDecode\n");
|
||||
|
||||
dim[i * 2] = wh[0];
|
||||
dim[i * 2 + 1] = wh[1];
|
||||
break;
|
||||
case DCT_0:
|
||||
case DCT_1:
|
||||
ret = strinfo_jpeg_dim(&wh[0],
|
||||
&wh[1],
|
||||
ptr->image_data[i].image,
|
||||
ptr->image_data[i].size);
|
||||
|
||||
if (ret != 0) {
|
||||
dim[i * 2] = 0;
|
||||
dim[i * 2 + 1] = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
stream_size = ptr->image_data[i].size;
|
||||
stream = malloc(stream_size);
|
||||
if (stream == NULL) {
|
||||
free(dictionary);
|
||||
free(root_kid);
|
||||
free(dim);
|
||||
return 1;
|
||||
}
|
||||
memcpy(stream, ptr->image_data[i].image, stream_size);
|
||||
|
||||
snprintf(buf, 64, "/Width %d\n/Height %d\n",
|
||||
wh[0], wh[1]);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
strcat(dictionary, "/ColorSpace /DeviceRGB\n"
|
||||
"/BitsPerComponent 8\n");
|
||||
|
||||
snprintf(buf, 64, "/Length %d\n",
|
||||
stream_size);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
strcat(dictionary, "/Filter /DCTDecode\n");
|
||||
|
||||
dim[i * 2] = wh[0];
|
||||
dim[i * 2 + 1] = wh[1];
|
||||
break;
|
||||
case JBIG2:
|
||||
case JPX:
|
||||
default:
|
||||
ret = -1;
|
||||
dim[i * 2] = -1;
|
||||
dim[i * 2 + 1] = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
strcat(dictionary, ">>");
|
||||
|
||||
if (ret == 0) {
|
||||
if ((*param)->stat > 2)
|
||||
printf("Succeed\n");
|
||||
|
||||
pdf_obj_append(&pdf, ids[i],
|
||||
NULL, dictionary, stream, stream_size);
|
||||
|
||||
free(dictionary);
|
||||
free(stream);
|
||||
} else if (ret == 1) {
|
||||
if ((*param)->stat > 2)
|
||||
printf("; Failed\n");
|
||||
|
||||
free(dictionary);
|
||||
|
||||
pdf_obj_append(&pdf, ids[i], NULL, NULL, NULL, 0);
|
||||
} else {
|
||||
free(dictionary);
|
||||
}
|
||||
}
|
||||
|
||||
dictionary_size = 128;
|
||||
dictionary = malloc(dictionary_size);
|
||||
|
||||
if (dictionary == NULL) {
|
||||
free(root_kid);
|
||||
free(dim);
|
||||
return 1;
|
||||
}
|
||||
|
||||
memset(dictionary, 0, dictionary_size);
|
||||
|
||||
strcat(dictionary, "<<\n/XObject <<");
|
||||
|
||||
for (int i = 0; i < ptr->image_length; i++) {
|
||||
snprintf(buf, 64, "/Im%d %d 0 R", i, ids[i]);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
if (i + 1 < ptr->image_length)
|
||||
strcat(dictionary, " ");
|
||||
}
|
||||
|
||||
strcat(dictionary, ">>\n>>");
|
||||
|
||||
pdf_obj_append(&pdf, ids[ptr->image_length], NULL, dictionary, NULL, 0);
|
||||
|
||||
free(dictionary);
|
||||
|
||||
int conv_size;
|
||||
char *conv_dst;
|
||||
char conv_src[2];
|
||||
char conv_hex[3];
|
||||
|
||||
if (strncmp(ptr->text + 8, "COMPRESSTEXT", 12) == 0) {
|
||||
cnki_zlib(&stream, &stream_size, ptr->text, ptr->text_size);
|
||||
|
||||
dictionary_size = stream_size / 8 + 7;
|
||||
dictionary = malloc(dictionary_size);
|
||||
|
||||
if (dictionary == NULL) {
|
||||
free(root_kid);
|
||||
free(dim);
|
||||
return 1;
|
||||
}
|
||||
|
||||
memset(dictionary, 0, dictionary_size);
|
||||
|
||||
strcat(dictionary, "<feff");
|
||||
|
||||
for (int i = 0; i < stream_size; i += 16) {
|
||||
conv_src[0] = stream[i + 7];
|
||||
conv_src[1] = stream[i + 6];
|
||||
|
||||
conv_size = 6;
|
||||
|
||||
if (strconv(&conv_dst, "UTF-16BE",
|
||||
conv_src, "GB18030", &conv_size) == 0) {
|
||||
for (int j = 0; j < conv_size - 2; j++) {
|
||||
snprintf(conv_hex, 3,
|
||||
"%02x", (unsigned char) conv_dst[j]);
|
||||
strcat(dictionary, conv_hex);
|
||||
}
|
||||
free(conv_dst);
|
||||
}
|
||||
}
|
||||
free(stream);
|
||||
|
||||
strcat(dictionary, ">");
|
||||
} else {
|
||||
dictionary_size = ptr->text_size;
|
||||
dictionary = malloc(dictionary_size);
|
||||
|
||||
if (dictionary == NULL) {
|
||||
free(root_kid);
|
||||
free(dim);
|
||||
return 1;
|
||||
}
|
||||
|
||||
memset(dictionary, 0, dictionary_size);
|
||||
|
||||
strcat(dictionary, "<feff");
|
||||
|
||||
for (int i = 0; i < ptr->text_size; i += 4) {
|
||||
conv_src[0] = ptr->text[i + 3];
|
||||
conv_src[1] = ptr->text[i + 2];
|
||||
|
||||
conv_size = 6;
|
||||
|
||||
if (strconv(&conv_dst, "UTF-16BE",
|
||||
conv_src, "GB18030", &conv_size) == 0) {
|
||||
for (int j = 0; j < conv_size - 2; j++) {
|
||||
snprintf(conv_hex, 3,
|
||||
"%02x", (unsigned char) conv_dst[j]);
|
||||
strcat(dictionary, conv_hex);
|
||||
}
|
||||
free(conv_dst);
|
||||
}
|
||||
}
|
||||
|
||||
strcat(dictionary, ">");
|
||||
}
|
||||
|
||||
/* FIXME: Use the text somehow? */
|
||||
free(dictionary);
|
||||
|
||||
dictionary_size = 64 + 12 * ptr->image_length;
|
||||
dictionary = malloc(dictionary_size);
|
||||
|
||||
if (dictionary == NULL) {
|
||||
free(root_kid);
|
||||
free(dim);
|
||||
return 1;
|
||||
}
|
||||
|
||||
memset(dictionary, 0, dictionary_size);
|
||||
|
||||
strcat(dictionary, "q\n");
|
||||
|
||||
strcat(dictionary, "0.120000 0 0 0.120000 0 0 cm\n");
|
||||
|
||||
for (int i = 0; i < ptr->image_length; i++) {
|
||||
if (dim[i * 2] <= 0 || dim[i * 2 + 1] <= 0)
|
||||
continue;
|
||||
|
||||
/* Apply transformation matrix */
|
||||
if (ptr->image_data[i].format == DCT_1)
|
||||
strcat(dictionary, "-1 0 0 -1 0 0 cm\n");
|
||||
|
||||
snprintf(buf, 64, "%d 0 0 %d 0 0 cm\n",
|
||||
dim[i * 2], dim[i * 2 + 1]);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
snprintf(buf, 64, "/Im%d Do\n", i);
|
||||
strcat(dictionary, buf);
|
||||
}
|
||||
|
||||
strcat(dictionary, "Q");
|
||||
|
||||
if (strdeflate(&stream, &stream_size, dictionary, strlen(dictionary)) != 0) {
|
||||
free(dictionary);
|
||||
free(root_kid);
|
||||
free(dim);
|
||||
return 1;
|
||||
}
|
||||
|
||||
memset(dictionary, 0, dictionary_size);
|
||||
|
||||
strcat(dictionary, "<<\n");
|
||||
|
||||
snprintf(buf, 64, "/Length %d\n", stream_size);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
strcat(dictionary, "/Filter /FlateDecode\n");
|
||||
|
||||
strcat(dictionary, ">>");
|
||||
|
||||
pdf_obj_append(&pdf, ids[ptr->image_length + 1],
|
||||
NULL, dictionary, stream, stream_size);
|
||||
|
||||
free(stream);
|
||||
|
||||
memset(dictionary, 0, dictionary_size);
|
||||
|
||||
strcat(dictionary, "<<\n/Type /Page\n");
|
||||
|
||||
snprintf(buf, 64, "/Resources %d 0 R\n", ids[ptr->image_length]);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
snprintf(buf, 64, "/Contents %d 0 R\n", ids[ptr->image_length + 1]);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
/* A4 paper */
|
||||
strcat(dictionary, "/MediaBox [ 0 0 595.276 841.89 ]\n");
|
||||
|
||||
/* Add /Parent when we know root */
|
||||
pdf_obj_append(&pdf, ids[ptr->image_length + 2], NULL, dictionary, NULL, 0);
|
||||
|
||||
free(dictionary);
|
||||
|
||||
root_kid[cnt++] = ids[ptr->image_length + 2];
|
||||
|
||||
free(ids);
|
||||
ids = NULL;
|
||||
|
||||
free(dim);
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
if ((*param)->stat > 1) {
|
||||
printf("\t%8s\t%12s\t%12s\t%12s\n",
|
||||
"id",
|
||||
"object",
|
||||
"dictionary",
|
||||
"stream");
|
||||
|
||||
pdf_object_t *ptr = pdf->next;
|
||||
while (ptr != NULL) {
|
||||
printf("\t%8d\t%12d\t%12d\t%12d\n",
|
||||
ptr->id,
|
||||
ptr->object_size,
|
||||
ptr->dictionary_size,
|
||||
ptr->stream_size);
|
||||
ptr = ptr->next;
|
||||
}
|
||||
}
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Generated %d object(s)\n",
|
||||
pdf_get_count(&pdf));
|
||||
|
||||
ids = NULL;
|
||||
|
||||
if ((*param)->file_stat->outline > 0) {
|
||||
if ((*param)->stat > 1)
|
||||
printf("Generating outline object(s)\n\t%8s\n", "id");
|
||||
|
||||
pdf_get_free_ids(&pdf, &ids, (*param)->file_stat->outline + 1);
|
||||
int outline = pdf_cnki_outline(&pdf, &(*param)->object_outline, &ids);
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
for (int i = 0; i < (*param)->file_stat->outline + 1; i++)
|
||||
printf("\t%8d\n", ids[i]);
|
||||
|
||||
if ((*param)->stat > 0) {
|
||||
if (outline != 0)
|
||||
printf("No outline information\n");
|
||||
else
|
||||
printf("Generated %d outline object(s)\n",
|
||||
(*param)->file_stat->outline + 1);
|
||||
}
|
||||
}
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Generating root object\n");
|
||||
|
||||
dictionary_size = 64 + 12 * (*param)->file_stat->page;
|
||||
dictionary = malloc(dictionary_size);
|
||||
|
||||
if (dictionary == NULL) {
|
||||
free(root_kid);
|
||||
return 1;
|
||||
}
|
||||
|
||||
memset(dictionary, 0, dictionary_size);
|
||||
|
||||
int root = pdf_get_free_id(&pdf);
|
||||
|
||||
snprintf(buf, 64, "<<\n/Type /Pages\n/Kids ");
|
||||
strcat(dictionary, buf);
|
||||
|
||||
if ((*param)->file_stat->page > 1)
|
||||
strcat(dictionary, "[");
|
||||
|
||||
for (int i = 0; i < (*param)->file_stat->page; i++) {
|
||||
snprintf(buf, 64, "%d 0 R", root_kid[i]);
|
||||
strcat(dictionary, buf);
|
||||
if (i + 1 < (*param)->file_stat->page)
|
||||
strcat(dictionary, " ");
|
||||
}
|
||||
|
||||
if ((*param)->file_stat->page > 1)
|
||||
strcat(dictionary, "]");
|
||||
|
||||
strcat(dictionary, "\n");
|
||||
|
||||
snprintf(buf, 64, "/Count %d\n", (*param)->file_stat->page);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
strcat(dictionary, ">>");
|
||||
|
||||
pdf_obj_prepend(&pdf, root, NULL, dictionary, NULL, 0);
|
||||
|
||||
free(dictionary);
|
||||
|
||||
dictionary_size = 128;
|
||||
dictionary = malloc(dictionary_size);
|
||||
|
||||
if (dictionary == NULL) {
|
||||
free(root_kid);
|
||||
return 1;
|
||||
}
|
||||
|
||||
memset(dictionary, 0, dictionary_size);
|
||||
|
||||
pdf_object_t *tmp = NULL;
|
||||
|
||||
/* Add /Parent to page object */
|
||||
for (int i = 0; i < (*param)->file_stat->page; i++) {
|
||||
if (pdf_get_obj(&pdf, root_kid[i], &tmp) != 0) {
|
||||
free(dictionary);
|
||||
free(root_kid);
|
||||
return 1;
|
||||
}
|
||||
|
||||
memset(dictionary, 0, dictionary_size);
|
||||
|
||||
strcat(dictionary, tmp->dictionary);
|
||||
|
||||
snprintf(buf, 64, "/Parent %d 0 R\n>>", root);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
if (pdf_obj_replace(&pdf, root_kid[i], NULL, dictionary, NULL, 0) != 0) {
|
||||
free(dictionary);
|
||||
free(root_kid);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
free(root_kid);
|
||||
|
||||
memset(dictionary, 0, dictionary_size);
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Generated root object %d.\n",
|
||||
root);
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Generating catalog object\n");
|
||||
|
||||
snprintf(buf, 64,
|
||||
"<<\n/Type /Catalog\n/Pages %d 0 R\n",
|
||||
root);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
if (ids != NULL) {
|
||||
snprintf(buf, 64,
|
||||
"/Outlines %d 0 R\n/PageMode /UseOutlines\n",
|
||||
ids[0]);
|
||||
strcat(dictionary, buf);
|
||||
}
|
||||
|
||||
strcat(dictionary, ">>");
|
||||
|
||||
pdf_obj_append(&pdf, 0, NULL, dictionary, NULL, 0);
|
||||
|
||||
free(dictionary);
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Generated catalog object\n");
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Sorting object(s)\n");
|
||||
|
||||
pdf_obj_sort(&pdf);
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Sorted object(s)\n");
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Writing header\n");
|
||||
|
||||
long cur = 0;
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
cur = ftell((*param)->fp_o);
|
||||
|
||||
if (pdf_dump_header(&pdf, &(*param)->fp_o) != 0) {
|
||||
fprintf(stderr, "Header not written\n");
|
||||
return 1;
|
||||
} else {
|
||||
if ((*param)->stat > 0)
|
||||
printf("Header %ld byte(s) written\n",
|
||||
ftell((*param)->fp_o) - cur);
|
||||
}
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Writing object(s)\n");
|
||||
|
||||
pdf_dump_obj(&pdf, &(*param)->fp_o);
|
||||
|
||||
if ((*param)->stat > 1) {
|
||||
printf("\t%8s\t%8s\t%8s\t%12s\t%12s\t%12s\n",
|
||||
"address",
|
||||
"size",
|
||||
"id",
|
||||
"object",
|
||||
"dictionary",
|
||||
"stream");
|
||||
|
||||
pdf_object_t *ptr = pdf->next;
|
||||
while (ptr != NULL) {
|
||||
printf("\t%08x\t%8d\t%8d\t%12d\t%12d\t%12d\n",
|
||||
ptr->address,
|
||||
ptr->size,
|
||||
ptr->id,
|
||||
ptr->object_size,
|
||||
ptr->dictionary_size,
|
||||
ptr->stream_size);
|
||||
ptr = ptr->next;
|
||||
}
|
||||
}
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("%d object(s) %ld byte(s) written\n",
|
||||
pdf_get_count(&pdf),
|
||||
ftell((*param)->fp_o));
|
||||
|
||||
long xref = ftell((*param)->fp_o);
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Writing cross-reference table\n");
|
||||
|
||||
if (pdf_dump_xref(&pdf, &(*param)->fp_o) != 0) {
|
||||
if ((*param)->stat > 0)
|
||||
printf("Cross-reference table not written\n");
|
||||
} else {
|
||||
if ((*param)->stat > 0)
|
||||
printf("Cross-reference table %ld byte(s) written\n",
|
||||
ftell((*param)->fp_o) - xref);
|
||||
}
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Writing trailer\n");
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
cur = ftell((*param)->fp_o);
|
||||
|
||||
if (pdf_dump_trailer(&pdf, &(*param)->fp_o, xref) != 0) {
|
||||
if ((*param)->stat > 0)
|
||||
printf("Trailer not written\n");
|
||||
} else {
|
||||
if ((*param)->stat > 0)
|
||||
printf("Trailer %ld byte(s) written\n",
|
||||
ftell((*param)->fp_o) - cur);
|
||||
}
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Total %ld byte(s) written\n",
|
||||
ftell((*param)->fp_o));
|
||||
|
||||
pdf_obj_destroy(&pdf);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
|
41
src/jbig.c
Normal file
41
src/jbig.c
Normal file
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdio.h> /* FIXME: test */
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <jbig.h>
|
||||
|
||||
int
|
||||
strdec_jbig(char **bitmap, int *bitmap_size,
|
||||
const char * restrict data, int data_size)
|
||||
{
|
||||
struct jbg_dec_state sd;
|
||||
|
||||
jbg_dec_init(&sd);
|
||||
|
||||
unsigned char *data_ptr[1] = {(unsigned char *) data};
|
||||
|
||||
/* FIXME: test */
|
||||
int ret;
|
||||
if ((ret = jbg_dec_in(&sd, (unsigned char *) data_ptr,
|
||||
data_size, NULL)) != JBG_EOK) {
|
||||
printf("%s", jbg_strerror(ret));
|
||||
jbg_dec_free(&sd);
|
||||
return 1;
|
||||
}
|
||||
|
||||
*bitmap_size = jbg_dec_getsize(&sd);
|
||||
*bitmap = malloc(*bitmap_size);
|
||||
|
||||
if (*bitmap != NULL)
|
||||
memcpy(*bitmap, jbg_dec_getimage(&sd, 0), *bitmap_size);
|
||||
|
||||
jbg_dec_free(&sd);
|
||||
|
||||
return 0;
|
||||
}
|
8
src/jbig.h
Normal file
8
src/jbig.h
Normal file
|
@ -0,0 +1,8 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
int strdec_jbig(char **bitmap, int *bitmap_size,
|
||||
const char * restrict data, int data_size);
|
36
src/jpeg.c
Normal file
36
src/jpeg.c
Normal file
|
@ -0,0 +1,36 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include <jpeglib.h>
|
||||
|
||||
int
|
||||
strinfo_jpeg_dim(int *jpeg_width, int *jpeg_height,
|
||||
const char * restrict data, int data_size)
|
||||
{
|
||||
struct jpeg_decompress_struct cinfo;
|
||||
struct jpeg_error_mgr jerr;
|
||||
|
||||
cinfo.err = jpeg_std_error(&jerr);
|
||||
|
||||
jpeg_create_decompress(&cinfo);
|
||||
|
||||
jpeg_mem_src(&cinfo, (unsigned char *) data, data_size);
|
||||
|
||||
jpeg_read_header(&cinfo, TRUE);
|
||||
|
||||
jpeg_calc_output_dimensions(&cinfo);
|
||||
|
||||
*jpeg_width = cinfo.output_width;
|
||||
*jpeg_height = cinfo.output_height;
|
||||
|
||||
jpeg_destroy((struct jpeg_common_struct *) &cinfo);
|
||||
|
||||
jpeg_destroy_decompress(&cinfo);
|
||||
|
||||
return 0;
|
||||
}
|
8
src/jpeg.h
Normal file
8
src/jpeg.h
Normal file
|
@ -0,0 +1,8 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
int strinfo_jpeg_dim(int *jpeg_width, int *jpeg_height,
|
||||
const char * restrict data, int data_size);
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
@ -82,7 +82,7 @@ main(int argc, char **argv, char **envp)
|
|||
|
||||
if (param->stat > 0)
|
||||
printf("Melon " VERSION "." RELEASE "." PATCH EXTRA "\n"
|
||||
"Copyright (c) 2020, yzrh <yzrh@noema.org>\n\n");
|
||||
"Copyright (c) 2020-2021, yzrh <yzrh@noema.org>\n\n");
|
||||
|
||||
cnki_info(¶m);
|
||||
|
||||
|
|
76
src/pdf.c
76
src/pdf.c
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
@ -67,7 +67,8 @@ int
|
|||
pdf_obj_add(pdf_object_t **pdf, int id,
|
||||
const char * restrict object,
|
||||
const char * restrict dictionary,
|
||||
const char * restrict stream)
|
||||
const char * restrict stream,
|
||||
int stream_size)
|
||||
{
|
||||
if (*pdf != NULL || id <= 0 ||
|
||||
(object != NULL && dictionary != NULL))
|
||||
|
@ -112,14 +113,15 @@ pdf_obj_add(pdf_object_t **pdf, int id,
|
|||
(*pdf)->dictionary = NULL;
|
||||
}
|
||||
|
||||
if (stream != NULL) {
|
||||
(*pdf)->stream_size = sizeof(stream);
|
||||
if (stream != NULL && stream_size > 0) {
|
||||
(*pdf)->stream_size = stream_size + 1;
|
||||
(*pdf)->stream = malloc((*pdf)->stream_size);
|
||||
|
||||
if ((*pdf)->stream == NULL)
|
||||
return 1;
|
||||
|
||||
memcpy((*pdf)->stream, stream, (*pdf)->stream_size);
|
||||
(*pdf)->stream[(*pdf)->stream_size - 1] = '\n';
|
||||
} else {
|
||||
(*pdf)->stream_size = 0;
|
||||
(*pdf)->stream = NULL;
|
||||
|
@ -153,7 +155,8 @@ int
|
|||
pdf_obj_prepend(pdf_object_t **pdf, int id,
|
||||
const char * restrict object,
|
||||
const char * restrict dictionary,
|
||||
const char * restrict stream)
|
||||
const char * restrict stream,
|
||||
int stream_size)
|
||||
{
|
||||
if (*pdf == NULL)
|
||||
return 1;
|
||||
|
@ -163,7 +166,8 @@ pdf_obj_prepend(pdf_object_t **pdf, int id,
|
|||
|
||||
pdf_object_t *ptr = NULL;
|
||||
|
||||
if (pdf_obj_add(&ptr, id, object, dictionary, stream) != 0) {
|
||||
if (pdf_obj_add(&ptr, id, object, dictionary,
|
||||
stream, stream_size) != 0) {
|
||||
free(ptr);
|
||||
return 1;
|
||||
}
|
||||
|
@ -178,7 +182,8 @@ int
|
|||
pdf_obj_append(pdf_object_t **pdf, int id,
|
||||
const char * restrict object,
|
||||
const char * restrict dictionary,
|
||||
const char * restrict stream)
|
||||
const char * restrict stream,
|
||||
int stream_size)
|
||||
{
|
||||
if (*pdf == NULL)
|
||||
return 1;
|
||||
|
@ -190,12 +195,67 @@ pdf_obj_append(pdf_object_t **pdf, int id,
|
|||
while (ptr->next != NULL)
|
||||
ptr = ptr->next;
|
||||
|
||||
if (pdf_obj_add(&ptr->next, id, object, dictionary, stream) != 0)
|
||||
if (pdf_obj_add(&ptr->next, id, object, dictionary,
|
||||
stream, stream_size) != 0)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
pdf_obj_replace(pdf_object_t **pdf, int id,
|
||||
const char * restrict object,
|
||||
const char * restrict dictionary,
|
||||
const char * restrict stream,
|
||||
int stream_size)
|
||||
{
|
||||
pdf_object_t *ptr;
|
||||
char *ret;
|
||||
|
||||
if (pdf_get_obj(pdf, id, &ptr) != 0)
|
||||
return 1;
|
||||
|
||||
if (object != NULL && dictionary != NULL)
|
||||
return 1;
|
||||
|
||||
if (dictionary != NULL) {
|
||||
ret = realloc(ptr->dictionary, strlen(dictionary));
|
||||
|
||||
if (ret == NULL)
|
||||
return 1;
|
||||
|
||||
ptr->dictionary_size = strlen(dictionary);
|
||||
ptr->dictionary = ret;
|
||||
|
||||
memcpy(ptr->dictionary, dictionary, ptr->dictionary_size);
|
||||
} else if (object != NULL) {
|
||||
ret = realloc(ptr->object, strlen(object));
|
||||
|
||||
if (ret == NULL)
|
||||
return 1;
|
||||
|
||||
ptr->object_size = strlen(object);
|
||||
ptr->object = ret;
|
||||
|
||||
memcpy(ptr->object, object, ptr->object_size);
|
||||
}
|
||||
|
||||
if (stream != NULL && stream_size > 0) {
|
||||
ret = realloc(ptr->stream, stream_size + 1);
|
||||
|
||||
if (ret == NULL)
|
||||
return 1;
|
||||
|
||||
ptr->stream_size = stream_size + 1;
|
||||
ptr->stream = ret;
|
||||
|
||||
memcpy(ptr->stream, stream, ptr->stream_size);
|
||||
ptr->stream[ptr->stream_size - 1] = '\n';
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
pdf_obj_sort(pdf_object_t **pdf)
|
||||
{
|
||||
|
|
16
src/pdf.h
16
src/pdf.h
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
@ -26,16 +26,24 @@ void pdf_obj_destroy(pdf_object_t **pdf);
|
|||
int pdf_obj_add(pdf_object_t **pdf, int id,
|
||||
const char * restrict object,
|
||||
const char * restrict dictionary,
|
||||
const char * restrict stream);
|
||||
const char * restrict stream,
|
||||
int stream_size);
|
||||
int pdf_obj_del(pdf_object_t **pdf, int id);
|
||||
int pdf_obj_prepend(pdf_object_t **pdf, int id,
|
||||
const char * restrict object,
|
||||
const char * restrict dictionary,
|
||||
const char * restrict stream);
|
||||
const char * restrict stream,
|
||||
int stream_size);
|
||||
int pdf_obj_append(pdf_object_t **pdf, int id,
|
||||
const char * restrict object,
|
||||
const char * restrict dictionary,
|
||||
const char * restrict stream);
|
||||
const char * restrict stream,
|
||||
int stream_size);
|
||||
int pdf_obj_replace(pdf_object_t **pdf, int id,
|
||||
const char * restrict object,
|
||||
const char * restrict dictionary,
|
||||
const char * restrict stream,
|
||||
int stream_size);
|
||||
int pdf_obj_sort(pdf_object_t **pdf);
|
||||
|
||||
/* pdf_parser.c */
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
@ -93,7 +93,7 @@ _outline(pdf_object_t **pdf, object_outline_tree_t **outline_tree, int id, int *
|
|||
atoi(ptr->item->page) - 1);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
pdf_obj_append(pdf, ptr->id, NULL, dictionary, NULL);
|
||||
pdf_obj_append(pdf, ptr->id, NULL, dictionary, NULL, 0);
|
||||
|
||||
if (ptr->left == NULL)
|
||||
(*stat)[1] = ptr->id;
|
||||
|
@ -128,7 +128,7 @@ pdf_cnki_outline(pdf_object_t **pdf, object_outline_t **outline, int **ids)
|
|||
|
||||
free(ret);
|
||||
|
||||
pdf_obj_append(pdf, (*ids)[0], NULL, buf, NULL);
|
||||
pdf_obj_append(pdf, (*ids)[0], NULL, buf, NULL, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
|
30
src/zlib.c
30
src/zlib.c
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
@ -20,12 +20,34 @@ strinflate(char **dst, int dst_size,
|
|||
|
||||
unsigned long size = dst_size;
|
||||
|
||||
uncompress((Bytef *) *dst, &size, (const Bytef *) src, src_size);
|
||||
|
||||
if (size != dst_size) {
|
||||
if (uncompress((Bytef *) *dst,
|
||||
&size, (const Bytef *) src, src_size) != Z_OK) {
|
||||
free(*dst);
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
strdeflate(char **dst, int *dst_size,
|
||||
const char * restrict src, int src_size)
|
||||
{
|
||||
*dst_size = compressBound(src_size);
|
||||
*dst = malloc(*dst_size);
|
||||
|
||||
if (*dst == NULL)
|
||||
return 1;
|
||||
|
||||
unsigned long size = *dst_size;
|
||||
|
||||
if (compress((Bytef *) *dst, &size,
|
||||
(const Bytef *) src, src_size) != Z_OK) {
|
||||
free(*dst);
|
||||
return 1;
|
||||
}
|
||||
|
||||
*dst_size = size;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1,8 +1,11 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
int strinflate(char **dst, int dst_size,
|
||||
const char * restrict src, int src_size);
|
||||
|
||||
int strdeflate(char **dst, int *dst_size,
|
||||
const char * restrict src, int src_size);
|
||||
|
|
Loading…
Reference in a new issue