From 5c5ddc926b20fae71544a8f2e2d96f15e1546c71 Mon Sep 17 00:00:00 2001 From: yzrh Date: Tue, 29 Dec 2020 02:10:17 +0000 Subject: [PATCH] Update HN data structure. --- src/cnki.c | 12 ++--- src/cnki.h | 33 +++++++----- src/cnki_caj.c | 4 +- src/cnki_hn.c | 134 ++++++++++++++++++++++++++++++++++++++++++++++++ src/cnki_nh.c | 110 --------------------------------------- src/cnki_zlib.c | 7 +++ src/extern.h | 4 +- src/melon.c | 9 ++-- 8 files changed, 176 insertions(+), 137 deletions(-) create mode 100644 src/cnki_hn.c delete mode 100644 src/cnki_nh.c create mode 100644 src/cnki_zlib.c diff --git a/src/cnki.c b/src/cnki.c index 5b9801b..4218adb 100644 --- a/src/cnki.c +++ b/src/cnki.c @@ -33,7 +33,7 @@ cnki_create(cnki_t **param) memset((*param)->file_stat, 0, sizeof(file_stat_t)); (*param)->object_outline = NULL; - (*param)->object_nh = NULL; + (*param)->object_hn = NULL; return 0; } @@ -46,8 +46,8 @@ cnki_destroy(cnki_t **param) free((*param)->file_stat); if ((*param)->object_outline != NULL) free((*param)->object_outline); - if ((*param)->object_nh != NULL) - free((*param)->object_nh); + if ((*param)->object_hn != NULL) + free((*param)->object_hn); free(*param); } } @@ -59,7 +59,7 @@ cnki_info(cnki_t **param) return 1; if ((*param)->stat > 1) - printf("Reading file header at %x\n", ADDRESS_HEAD); + printf("Reading file header at 0x%x\n", ADDRESS_HEAD); int addr[2]; @@ -84,7 +84,7 @@ cnki_info(cnki_t **param) } if ((*param)->stat > 1) - printf("Reading page count at %x\n", addr[0]); + printf("Reading page count at 0x%x\n", addr[0]); fseek((*param)->fp_i, addr[0], SEEK_SET); fread(&(*param)->file_stat->page, 4, 1, (*param)->fp_i); @@ -94,7 +94,7 @@ cnki_info(cnki_t **param) (*param)->file_stat->page); if ((*param)->stat > 1) - printf("Reading outline count at %x\n", addr[1]); + printf("Reading outline count at 0x%x\n", addr[1]); fseek((*param)->fp_i, addr[1], SEEK_SET); fread(&(*param)->file_stat->outline, 4, 1, (*param)->fp_i); diff --git a/src/cnki.h b/src/cnki.h index f444381..f9adeba 100644 --- a/src/cnki.h +++ b/src/cnki.h @@ -44,26 +44,31 @@ typedef struct _object_outline_tree_t { struct _object_outline_tree_t *right; } object_outline_tree_t; -typedef enum _nh_code { +typedef enum _hn_code { CCITTFAX, DCT_0, - DCT_1, + DCT_1, /* Inverted */ JBIG2, JPX -} nh_code; +} hn_code; -typedef struct _object_nh_t { - int32_t address; /* Starting at end of object_outline_t */ +typedef struct _hn_image_t { + int32_t format; /* hn_code */ + int32_t address; int32_t size; - int16_t page[2]; + char *image; +} hn_image_t; + +typedef struct _object_hn_t { + int32_t address; /* Starting at end of object_outline_t */ + int32_t text_size; + int16_t image_length; + int16_t page; int32_t zero[2]; char *text; - int32_t image_format; /* nh_code */ - int32_t image_address; - int32_t image_size; - char *image; - struct _object_nh_t *next; -} object_nh_t; + struct _hn_image_t *image_data; + struct _object_hn_t *next; +} object_hn_t; typedef struct _cnki_t { int stat; @@ -72,7 +77,7 @@ typedef struct _cnki_t { FILE *fp_o; file_stat_t *file_stat; object_outline_t *object_outline; - object_nh_t *object_nh; + object_hn_t *object_hn; } cnki_t; /* cnki_pdf.c */ @@ -82,5 +87,7 @@ int cnki_pdf(cnki_t **param); int cnki_outline_tree(object_outline_tree_t **outline_tree, object_outline_t **outline, int *ids); +/* cnki_zlib.c */ + /* cnki_xml.c */ int cnki_xml(char **xml, FILE **fp); diff --git a/src/cnki_caj.c b/src/cnki_caj.c index 7b49aff..1e3bd8b 100644 --- a/src/cnki_caj.c +++ b/src/cnki_caj.c @@ -18,7 +18,7 @@ cnki_caj(cnki_t **param) printf("Begin 'CAJ' conversion\n"); if ((*param)->stat > 1) - printf("Reading document body address at %x\n", ADDRESS_CAJ_BODY); + printf("Reading document body address at 0x%x\n", ADDRESS_CAJ_BODY); int addr; @@ -29,7 +29,7 @@ cnki_caj(cnki_t **param) fseek((*param)->fp_i, addr, SEEK_SET); if ((*param)->stat > 0) - printf("Advised document body address is %x\n", addr); + printf("Advised document body address is 0x%x\n", addr); cnki_pdf(param); diff --git a/src/cnki_hn.c b/src/cnki_hn.c new file mode 100644 index 0000000..978aa30 --- /dev/null +++ b/src/cnki_hn.c @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2020, yzrh + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include "cnki.h" +#include "pdf.h" +#include "pdf_cnki.h" + +int +cnki_hn(cnki_t **param) +{ + if (*param == NULL) + return 1; + + if ((*param)->stat > 0) + printf("Begin 'HN' conversion\n"); + + if ((*param)->file_stat->page > 0) + (*param)->object_hn = malloc(sizeof(object_hn_t)); + else + return 1; + + if ((*param)->object_hn == NULL) + return 1; + + if ((*param)->stat > 1) { + printf("Loading page(s)\n"); + printf("\t%8s\t%8s\t%6s\t%4s\t%6s\t%6s\t%4s\t%8s\t%8s\n", + "address", + "text", + "length", + "page", + "zero", + "#", + "code", + "address", + "image"); + } + + object_hn_t *ptr = (*param)->object_hn; + for (int i = 0; i < (*param)->file_stat->page; i++) { + fread(&ptr->address, 4, 1, (*param)->fp_i); + fread(&ptr->text_size, 4, 1, (*param)->fp_i); + fread(&ptr->image_length, 2, 1, (*param)->fp_i); + fread(&ptr->page, 2, 1, (*param)->fp_i); + fread(&ptr->zero, 8, 1, (*param)->fp_i); + + ptr->text = NULL; + ptr->image_data = NULL; + ptr->next = NULL; + + if (i < (*param)->file_stat->page - 1) { + ptr->next = malloc(sizeof(object_hn_t)); + + if (ptr->next == NULL) + return 1; + } + + ptr = ptr->next; + } + + ptr = (*param)->object_hn; + while (ptr != NULL) { + ptr->text = malloc(ptr->text_size); + + if (ptr->text == NULL) + return 1; + + fseek((*param)->fp_i, ptr->address, SEEK_SET); + fread(ptr->text, ptr->text_size, 1, (*param)->fp_i); + + if ((*param)->stat > 1) + printf("\t%08x\t%8d\t%6d\t%4d\t{%d, %d}", + ptr->address, + ptr->text_size, + ptr->image_length, + ptr->page, + ptr->zero[0], + ptr->zero[1]); + + ptr->image_data = malloc(ptr->image_length * sizeof(hn_image_t)); + + if (ptr->image_data == NULL) + return 1; + + for (int i = 0; i < ptr->image_length; i++) { + fread(&ptr->image_data[i].format, 4, 1, (*param)->fp_i); + fread(&ptr->image_data[i].address, 4, 1, (*param)->fp_i); + fread(&ptr->image_data[i].size, 4, 1, (*param)->fp_i); + } + + for (int i = 0; i < ptr->image_length; i++) { + ptr->image_data[i].image = malloc(ptr->image_data[i].size); + + if (ptr->image_data[i].image == NULL) + return 1; + + fseek((*param)->fp_i, ptr->image_data[i].address, SEEK_SET); + fread(ptr->image_data[i].image, + ptr->image_data[i].size, 1, + (*param)->fp_i); + + if ((*param)->stat > 1) + printf("\t%6d\t%4d\t%08x\t%8d\n", + i, + ptr->image_data[i].format, + ptr->image_data[i].address, + ptr->image_data[i].size); + } + + ptr = ptr->next; + } + + if ((*param)->stat > 1) + printf("Loaded %d page(s)\n", (*param)->file_stat->page); + + if ((*param)->stat > 1) + printf("Creating PDF object(s)\n"); + + pdf_object_t *pdf = NULL; + + if (pdf_obj_create(&pdf) != 0) + return 1; + + if ((*param)->stat > 0) + printf("Conversion ended\n"); + + /* TODO: Finish me please :) */ + return 1; +} diff --git a/src/cnki_nh.c b/src/cnki_nh.c deleted file mode 100644 index 94613b5..0000000 --- a/src/cnki_nh.c +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright (c) 2020, yzrh - * - * SPDX-License-Identifier: Apache-2.0 - */ - -#include - -#include "cnki.h" - -int -cnki_nh(cnki_t **param) -{ - if (*param == NULL) - return 1; - - if ((*param)->stat > 0) - printf("Begin 'HN' conversion\n"); - - if ((*param)->file_stat->page > 0) - (*param)->object_nh = malloc(sizeof(object_nh_t)); - else - return 1; - - if ((*param)->object_nh == NULL) - return 1; - - if ((*param)->stat > 1) { - printf("Loading page(s)\n"); - printf("\t%8s\t%8s\t%13s\t%6s\t%4s\t%8s\t%8s\n", - "address", - "text", - "page", - "zero", - "code", - "address", - "image"); - } - - object_nh_t *ptr = (*param)->object_nh; - for (int i = 0; i < (*param)->file_stat->page; i++) { - fread(&ptr->address, 4, 1, (*param)->fp_i); - fread(&ptr->size, 4, 1, (*param)->fp_i); - fread(&ptr->page, 4, 1, (*param)->fp_i); - fread(&ptr->zero, 8, 1, (*param)->fp_i); - - ptr->text = NULL; - ptr->image_format = -1; - ptr->image_address = 0; - ptr->image_size = 0; - ptr->image = NULL; - ptr->next = NULL; - - if (i < (*param)->file_stat->page - 1) { - ptr->next = malloc(sizeof(object_nh_t)); - - if (ptr->next == NULL) - return 1; - } - - ptr = ptr->next; - } - - ptr = (*param)->object_nh; - while (ptr != NULL) { - ptr->text = malloc(ptr->size); - - if (ptr->text == NULL) - return 1; - - fseek((*param)->fp_i, ptr->address, SEEK_SET); - fread(ptr->text, ptr->size, 1, (*param)->fp_i); - fread(&ptr->image_format, 4, 1, (*param)->fp_i); - fread(&ptr->image_address, 4, 1, (*param)->fp_i); - fread(&ptr->image_size, 4, 1, (*param)->fp_i); - - ptr->image = malloc(ptr->image_size); - - if (ptr->image == NULL) - return 1; - - fseek((*param)->fp_i, ptr->image_address, SEEK_SET); - fread(ptr->image, ptr->image_size, 1, (*param)->fp_i); - - if ((*param)->stat > 1) - printf("\t%08x\t%8d\t{%d, %8d}\t{%d, %d}\t%4d\t%08x\t%8d\n", - ptr->address, - ptr->size, - ptr->page[0], - ptr->page[1], - ptr->zero[0], - ptr->zero[1], - ptr->image_format, - ptr->image_address, - ptr->image_size); - - ptr = ptr->next; - } - - if ((*param)->stat > 1) - printf("Loaded %d page(s)\n", (*param)->file_stat->page); - - /* TODO: Study signed int __fastcall CAJDoc::OpenNHCAJFile(int a1, int a2) */ - - if ((*param)->stat > 0) - printf("Conversion ended\n"); - - /* TODO: Finish me please :) */ - return 1; -} diff --git a/src/cnki_zlib.c b/src/cnki_zlib.c new file mode 100644 index 0000000..4355433 --- /dev/null +++ b/src/cnki_zlib.c @@ -0,0 +1,7 @@ +/* + * Copyright (c) 2020, yzrh + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include diff --git a/src/extern.h b/src/extern.h index cc31aef..4109396 100644 --- a/src/extern.h +++ b/src/extern.h @@ -14,8 +14,8 @@ int cnki_info(cnki_t **param); /* cnki_caj.c */ int cnki_caj(cnki_t **param); -/* cnki_nh.c */ -int cnki_nh(cnki_t **param); +/* cnki_hn.c */ +int cnki_hn(cnki_t **param); /* cnki_kdh.c */ int cnki_kdh(cnki_t **param); diff --git a/src/melon.c b/src/melon.c index cba0217..62b742d 100644 --- a/src/melon.c +++ b/src/melon.c @@ -16,9 +16,6 @@ int main(int argc, char **argv, char **envp) { - printf("Melon " VERSION "." RELEASE "." PATCH EXTRA "\n"); - printf("Copyright (c) 2020, yzrh \n\n"); - cnki_t *param = NULL; if (cnki_create(¶m) != 0) { @@ -83,6 +80,10 @@ main(int argc, char **argv, char **envp) return EXIT_FAILURE; } + if (param->stat > 0) + printf("Melon " VERSION "." RELEASE "." PATCH EXTRA "\n" + "Copyright (c) 2020, yzrh \n\n"); + cnki_info(¶m); if (strcmp(param->file_stat->type, "%PDF") == 0) { @@ -98,7 +99,7 @@ main(int argc, char **argv, char **envp) return EXIT_FAILURE; } } else if (strcmp(param->file_stat->type, "HN") == 0) { - if (cnki_nh(¶m) != 0) { + if (cnki_hn(¶m) != 0) { fprintf(stderr, "%s: %s\n", argv[0], strerror(errno)); return EXIT_FAILURE;