diff --git a/README.md b/README.md index b94a3e7..4e36fbd 100644 --- a/README.md +++ b/README.md @@ -9,8 +9,6 @@ Development Currently, CAJ, KDH, and HN can be converted. Please report any failures with a sample that can reproduce the behaviour. -HN support does not support JPEG 2000 yet. - Dependency ---------- @@ -19,6 +17,7 @@ Dependency 3. zlib 4. jbig2dec 5. libjpeg-turbo +6. openjpeg Usage ===== @@ -36,7 +35,7 @@ Options Specify output file -b, --buffer -Set buffer size (default 512k) +Set input buffer size (default 512k) -v, --verbose Print more information (twice for even more, three times for HN image processing information as well) diff --git a/src/Makefile b/src/Makefile index 6943af3..065a8a5 100644 --- a/src/Makefile +++ b/src/Makefile @@ -4,11 +4,11 @@ # SPDX-License-Identifier: Apache-2.0 # -src = melon.c iconv.c zlib.c jbig.c jbig2.c jpeg.c \ +src = melon.c iconv.c zlib.c jbig.c jbig2.c jpeg.c jp2.c \ cnki_caj.c cnki_hn.c cnki_kdh.c cnki_outline_tree.c \ cnki_pdf.c cnki_zlib.c cnki_jbig.c cnki_jbig2.c cnki.c \ pdf_cnki.c pdf_get.c pdf_parser.c pdf_writer.c pdf.c -inc = extern.h version.h iconv.h zlib.h jbig.h jbig2.h jpeg.h \ +inc = extern.h version.h iconv.h zlib.h jbig.h jbig2.h jpeg.h jp2.h \ cnki.h pdf_cnki.h cnki_jbig.h pdf.h obj = ${src:.c=.o} @@ -16,7 +16,7 @@ obj = ${src:.c=.o} PREFIX = /usr/local CFLAGS = -O2 -pipe -flto -Wall -Wextra -LDFLAGS = -Wl,-O2 -lcrypto -liconv -lz -ljbig2dec -ljpeg -Wl,--as-needed +LDFLAGS = -Wl,-O2 -lcrypto -liconv -lz -ljbig2dec -ljpeg -lopenjp2 -Wl,--as-needed CFLAGS += -I/usr/local/include LDFLAGS += -L/usr/local/lib diff --git a/src/cnki_pdf.c b/src/cnki_pdf.c index 0cb30ca..887e5f4 100644 --- a/src/cnki_pdf.c +++ b/src/cnki_pdf.c @@ -11,6 +11,7 @@ #include "iconv.h" #include "zlib.h" #include "jpeg.h" +#include "jp2.h" #include "pdf.h" #include "pdf_cnki.h" @@ -660,6 +661,41 @@ cnki_pdf_hn(cnki_t **param) dim[i * 2 + 1] = info[1]; break; case JPX: + ret = strinfo_jp2_dim(&info[0], + &info[1], + ptr->image_data[i].image, + ptr->image_data[i].size); + + if (ret != 0) { + dim[i * 2] = 0; + dim[i * 2 + 1] = 0; + break; + } + + stream_size = ptr->image_data[i].size; + stream = malloc(stream_size); + if (stream == NULL) { + free(root_kid); + free(ids); + free(dim); + free(dictionary); + return 1; + } + memcpy(stream, ptr->image_data[i].image, stream_size); + + snprintf(buf, 64, "/Width %d\n/Height %d\n", + info[0], info[1]); + strcat(dictionary, buf); + + snprintf(buf, 64, "/Length %d\n", + stream_size); + strcat(dictionary, buf); + + strcat(dictionary, "/Filter /JPXDecode\n"); + + dim[i * 2] = info[0]; + dim[i * 2 + 1] = info[1]; + break; default: ret = -1; dim[i * 2] = -1; diff --git a/src/jp2.c b/src/jp2.c new file mode 100644 index 0000000..9420b48 --- /dev/null +++ b/src/jp2.c @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2022, yzrh + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#ifdef __linux__ +#include +#else +#include +#endif + +typedef struct _stream_user_data { + OPJ_SIZE_T pos; + OPJ_SIZE_T size; + const unsigned char *data; +} stream_user_data; + +static OPJ_SIZE_T +_opj_stream_read(void *p_buffer, OPJ_SIZE_T p_nb_bytes, void *p_user_data) +{ + stream_user_data *d = (stream_user_data *) p_user_data; + + if (d->pos >= d->size) + return (OPJ_SIZE_T) - 1; + + OPJ_SIZE_T ret_size = p_nb_bytes; + + if (d->pos + ret_size > d->size) + ret_size = d->size - d->pos; + + memcpy(p_buffer, d->data + d->pos, ret_size); + + d->pos += ret_size; + + return ret_size; +} + +static OPJ_OFF_T +_opj_stream_skip(OPJ_OFF_T p_nb_bytes, void *p_user_data) +{ + stream_user_data *d = (stream_user_data *) p_user_data; + + if (d->pos + p_nb_bytes <= d->size) + d->pos += p_nb_bytes; + else + d->pos = d->size; + + return d->pos; +} + +static OPJ_BOOL +_opj_stream_seek(OPJ_OFF_T p_nb_bytes, void *p_user_data) +{ + stream_user_data *d = (stream_user_data *) p_user_data; + + if (p_nb_bytes <= (OPJ_OFF_T) d->size) { + d->pos = p_nb_bytes; + return OPJ_TRUE; + } + + return OPJ_FALSE; +} + +int +strinfo_jp2_dim(int *jp2_width, int *jp2_height, + const char * restrict data, int data_size) +{ + opj_codec_t *codec; + opj_dparameters_t param; + opj_stream_t *stream; + opj_image_t *image; + stream_user_data d; + + if (data_size < 2) + return 1; + + opj_set_default_decoder_parameters(¶m); + + if ((unsigned char) data[0] == 0xff && (unsigned char) data[1] == 0x4f) + codec = opj_create_decompress(OPJ_CODEC_J2K); + else + codec = opj_create_decompress(OPJ_CODEC_JP2); + + if (!opj_setup_decoder(codec, ¶m)) { + opj_destroy_codec(codec); + return 1; + } + + stream = opj_stream_default_create(OPJ_TRUE); + + d.pos = 0; + d.size = data_size; + d.data = (unsigned char *) data; + + opj_stream_set_read_function(stream, _opj_stream_read); + opj_stream_set_skip_function(stream, _opj_stream_skip); + opj_stream_set_seek_function(stream, _opj_stream_seek); + opj_stream_set_user_data(stream, &d, NULL); + opj_stream_set_user_data_length(stream, data_size); + + if (!opj_read_header(stream, codec, &image)) { + opj_destroy_codec(codec); + opj_stream_destroy(stream); + return 1; + } + + opj_destroy_codec(codec); + opj_stream_destroy(stream); + + *jp2_width = image->x1 - image->x0; + *jp2_height = image->y1 - image->y0; + + opj_image_destroy(image); + + return 0; +} diff --git a/src/jp2.h b/src/jp2.h new file mode 100644 index 0000000..5644938 --- /dev/null +++ b/src/jp2.h @@ -0,0 +1,8 @@ +/* + * Copyright (c) 2022, yzrh + * + * SPDX-License-Identifier: Apache-2.0 + */ + +int strinfo_jp2_dim(int *jp2_width, int *jp2_height, + const char * restrict data, int data_size);