Update HN data structure.

This commit is contained in:
yzrh 2020-12-29 02:10:17 +00:00
parent bcb8ef9cd9
commit 5c5ddc926b
8 changed files with 176 additions and 137 deletions

View file

@ -33,7 +33,7 @@ cnki_create(cnki_t **param)
memset((*param)->file_stat, 0, sizeof(file_stat_t)); memset((*param)->file_stat, 0, sizeof(file_stat_t));
(*param)->object_outline = NULL; (*param)->object_outline = NULL;
(*param)->object_nh = NULL; (*param)->object_hn = NULL;
return 0; return 0;
} }
@ -46,8 +46,8 @@ cnki_destroy(cnki_t **param)
free((*param)->file_stat); free((*param)->file_stat);
if ((*param)->object_outline != NULL) if ((*param)->object_outline != NULL)
free((*param)->object_outline); free((*param)->object_outline);
if ((*param)->object_nh != NULL) if ((*param)->object_hn != NULL)
free((*param)->object_nh); free((*param)->object_hn);
free(*param); free(*param);
} }
} }
@ -59,7 +59,7 @@ cnki_info(cnki_t **param)
return 1; return 1;
if ((*param)->stat > 1) if ((*param)->stat > 1)
printf("Reading file header at %x\n", ADDRESS_HEAD); printf("Reading file header at 0x%x\n", ADDRESS_HEAD);
int addr[2]; int addr[2];
@ -84,7 +84,7 @@ cnki_info(cnki_t **param)
} }
if ((*param)->stat > 1) if ((*param)->stat > 1)
printf("Reading page count at %x\n", addr[0]); printf("Reading page count at 0x%x\n", addr[0]);
fseek((*param)->fp_i, addr[0], SEEK_SET); fseek((*param)->fp_i, addr[0], SEEK_SET);
fread(&(*param)->file_stat->page, 4, 1, (*param)->fp_i); fread(&(*param)->file_stat->page, 4, 1, (*param)->fp_i);
@ -94,7 +94,7 @@ cnki_info(cnki_t **param)
(*param)->file_stat->page); (*param)->file_stat->page);
if ((*param)->stat > 1) if ((*param)->stat > 1)
printf("Reading outline count at %x\n", addr[1]); printf("Reading outline count at 0x%x\n", addr[1]);
fseek((*param)->fp_i, addr[1], SEEK_SET); fseek((*param)->fp_i, addr[1], SEEK_SET);
fread(&(*param)->file_stat->outline, 4, 1, (*param)->fp_i); fread(&(*param)->file_stat->outline, 4, 1, (*param)->fp_i);

View file

@ -44,26 +44,31 @@ typedef struct _object_outline_tree_t {
struct _object_outline_tree_t *right; struct _object_outline_tree_t *right;
} object_outline_tree_t; } object_outline_tree_t;
typedef enum _nh_code { typedef enum _hn_code {
CCITTFAX, CCITTFAX,
DCT_0, DCT_0,
DCT_1, DCT_1, /* Inverted */
JBIG2, JBIG2,
JPX JPX
} nh_code; } hn_code;
typedef struct _object_nh_t { typedef struct _hn_image_t {
int32_t address; /* Starting at end of object_outline_t */ int32_t format; /* hn_code */
int32_t address;
int32_t size; int32_t size;
int16_t page[2]; char *image;
} hn_image_t;
typedef struct _object_hn_t {
int32_t address; /* Starting at end of object_outline_t */
int32_t text_size;
int16_t image_length;
int16_t page;
int32_t zero[2]; int32_t zero[2];
char *text; char *text;
int32_t image_format; /* nh_code */ struct _hn_image_t *image_data;
int32_t image_address; struct _object_hn_t *next;
int32_t image_size; } object_hn_t;
char *image;
struct _object_nh_t *next;
} object_nh_t;
typedef struct _cnki_t { typedef struct _cnki_t {
int stat; int stat;
@ -72,7 +77,7 @@ typedef struct _cnki_t {
FILE *fp_o; FILE *fp_o;
file_stat_t *file_stat; file_stat_t *file_stat;
object_outline_t *object_outline; object_outline_t *object_outline;
object_nh_t *object_nh; object_hn_t *object_hn;
} cnki_t; } cnki_t;
/* cnki_pdf.c */ /* cnki_pdf.c */
@ -82,5 +87,7 @@ int cnki_pdf(cnki_t **param);
int cnki_outline_tree(object_outline_tree_t **outline_tree, int cnki_outline_tree(object_outline_tree_t **outline_tree,
object_outline_t **outline, int *ids); object_outline_t **outline, int *ids);
/* cnki_zlib.c */
/* cnki_xml.c */ /* cnki_xml.c */
int cnki_xml(char **xml, FILE **fp); int cnki_xml(char **xml, FILE **fp);

View file

@ -18,7 +18,7 @@ cnki_caj(cnki_t **param)
printf("Begin 'CAJ' conversion\n"); printf("Begin 'CAJ' conversion\n");
if ((*param)->stat > 1) if ((*param)->stat > 1)
printf("Reading document body address at %x\n", ADDRESS_CAJ_BODY); printf("Reading document body address at 0x%x\n", ADDRESS_CAJ_BODY);
int addr; int addr;
@ -29,7 +29,7 @@ cnki_caj(cnki_t **param)
fseek((*param)->fp_i, addr, SEEK_SET); fseek((*param)->fp_i, addr, SEEK_SET);
if ((*param)->stat > 0) if ((*param)->stat > 0)
printf("Advised document body address is %x\n", addr); printf("Advised document body address is 0x%x\n", addr);
cnki_pdf(param); cnki_pdf(param);

134
src/cnki_hn.c Normal file
View file

@ -0,0 +1,134 @@
/*
* Copyright (c) 2020, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <stdlib.h>
#include "cnki.h"
#include "pdf.h"
#include "pdf_cnki.h"
int
cnki_hn(cnki_t **param)
{
if (*param == NULL)
return 1;
if ((*param)->stat > 0)
printf("Begin 'HN' conversion\n");
if ((*param)->file_stat->page > 0)
(*param)->object_hn = malloc(sizeof(object_hn_t));
else
return 1;
if ((*param)->object_hn == NULL)
return 1;
if ((*param)->stat > 1) {
printf("Loading page(s)\n");
printf("\t%8s\t%8s\t%6s\t%4s\t%6s\t%6s\t%4s\t%8s\t%8s\n",
"address",
"text",
"length",
"page",
"zero",
"#",
"code",
"address",
"image");
}
object_hn_t *ptr = (*param)->object_hn;
for (int i = 0; i < (*param)->file_stat->page; i++) {
fread(&ptr->address, 4, 1, (*param)->fp_i);
fread(&ptr->text_size, 4, 1, (*param)->fp_i);
fread(&ptr->image_length, 2, 1, (*param)->fp_i);
fread(&ptr->page, 2, 1, (*param)->fp_i);
fread(&ptr->zero, 8, 1, (*param)->fp_i);
ptr->text = NULL;
ptr->image_data = NULL;
ptr->next = NULL;
if (i < (*param)->file_stat->page - 1) {
ptr->next = malloc(sizeof(object_hn_t));
if (ptr->next == NULL)
return 1;
}
ptr = ptr->next;
}
ptr = (*param)->object_hn;
while (ptr != NULL) {
ptr->text = malloc(ptr->text_size);
if (ptr->text == NULL)
return 1;
fseek((*param)->fp_i, ptr->address, SEEK_SET);
fread(ptr->text, ptr->text_size, 1, (*param)->fp_i);
if ((*param)->stat > 1)
printf("\t%08x\t%8d\t%6d\t%4d\t{%d, %d}",
ptr->address,
ptr->text_size,
ptr->image_length,
ptr->page,
ptr->zero[0],
ptr->zero[1]);
ptr->image_data = malloc(ptr->image_length * sizeof(hn_image_t));
if (ptr->image_data == NULL)
return 1;
for (int i = 0; i < ptr->image_length; i++) {
fread(&ptr->image_data[i].format, 4, 1, (*param)->fp_i);
fread(&ptr->image_data[i].address, 4, 1, (*param)->fp_i);
fread(&ptr->image_data[i].size, 4, 1, (*param)->fp_i);
}
for (int i = 0; i < ptr->image_length; i++) {
ptr->image_data[i].image = malloc(ptr->image_data[i].size);
if (ptr->image_data[i].image == NULL)
return 1;
fseek((*param)->fp_i, ptr->image_data[i].address, SEEK_SET);
fread(ptr->image_data[i].image,
ptr->image_data[i].size, 1,
(*param)->fp_i);
if ((*param)->stat > 1)
printf("\t%6d\t%4d\t%08x\t%8d\n",
i,
ptr->image_data[i].format,
ptr->image_data[i].address,
ptr->image_data[i].size);
}
ptr = ptr->next;
}
if ((*param)->stat > 1)
printf("Loaded %d page(s)\n", (*param)->file_stat->page);
if ((*param)->stat > 1)
printf("Creating PDF object(s)\n");
pdf_object_t *pdf = NULL;
if (pdf_obj_create(&pdf) != 0)
return 1;
if ((*param)->stat > 0)
printf("Conversion ended\n");
/* TODO: Finish me please :) */
return 1;
}

View file

@ -1,110 +0,0 @@
/*
* Copyright (c) 2020, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <stdlib.h>
#include "cnki.h"
int
cnki_nh(cnki_t **param)
{
if (*param == NULL)
return 1;
if ((*param)->stat > 0)
printf("Begin 'HN' conversion\n");
if ((*param)->file_stat->page > 0)
(*param)->object_nh = malloc(sizeof(object_nh_t));
else
return 1;
if ((*param)->object_nh == NULL)
return 1;
if ((*param)->stat > 1) {
printf("Loading page(s)\n");
printf("\t%8s\t%8s\t%13s\t%6s\t%4s\t%8s\t%8s\n",
"address",
"text",
"page",
"zero",
"code",
"address",
"image");
}
object_nh_t *ptr = (*param)->object_nh;
for (int i = 0; i < (*param)->file_stat->page; i++) {
fread(&ptr->address, 4, 1, (*param)->fp_i);
fread(&ptr->size, 4, 1, (*param)->fp_i);
fread(&ptr->page, 4, 1, (*param)->fp_i);
fread(&ptr->zero, 8, 1, (*param)->fp_i);
ptr->text = NULL;
ptr->image_format = -1;
ptr->image_address = 0;
ptr->image_size = 0;
ptr->image = NULL;
ptr->next = NULL;
if (i < (*param)->file_stat->page - 1) {
ptr->next = malloc(sizeof(object_nh_t));
if (ptr->next == NULL)
return 1;
}
ptr = ptr->next;
}
ptr = (*param)->object_nh;
while (ptr != NULL) {
ptr->text = malloc(ptr->size);
if (ptr->text == NULL)
return 1;
fseek((*param)->fp_i, ptr->address, SEEK_SET);
fread(ptr->text, ptr->size, 1, (*param)->fp_i);
fread(&ptr->image_format, 4, 1, (*param)->fp_i);
fread(&ptr->image_address, 4, 1, (*param)->fp_i);
fread(&ptr->image_size, 4, 1, (*param)->fp_i);
ptr->image = malloc(ptr->image_size);
if (ptr->image == NULL)
return 1;
fseek((*param)->fp_i, ptr->image_address, SEEK_SET);
fread(ptr->image, ptr->image_size, 1, (*param)->fp_i);
if ((*param)->stat > 1)
printf("\t%08x\t%8d\t{%d, %8d}\t{%d, %d}\t%4d\t%08x\t%8d\n",
ptr->address,
ptr->size,
ptr->page[0],
ptr->page[1],
ptr->zero[0],
ptr->zero[1],
ptr->image_format,
ptr->image_address,
ptr->image_size);
ptr = ptr->next;
}
if ((*param)->stat > 1)
printf("Loaded %d page(s)\n", (*param)->file_stat->page);
/* TODO: Study signed int __fastcall CAJDoc::OpenNHCAJFile(int a1, int a2) */
if ((*param)->stat > 0)
printf("Conversion ended\n");
/* TODO: Finish me please :) */
return 1;
}

7
src/cnki_zlib.c Normal file
View file

@ -0,0 +1,7 @@
/*
* Copyright (c) 2020, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <zlib.h>

View file

@ -14,8 +14,8 @@ int cnki_info(cnki_t **param);
/* cnki_caj.c */ /* cnki_caj.c */
int cnki_caj(cnki_t **param); int cnki_caj(cnki_t **param);
/* cnki_nh.c */ /* cnki_hn.c */
int cnki_nh(cnki_t **param); int cnki_hn(cnki_t **param);
/* cnki_kdh.c */ /* cnki_kdh.c */
int cnki_kdh(cnki_t **param); int cnki_kdh(cnki_t **param);

View file

@ -16,9 +16,6 @@
int int
main(int argc, char **argv, char **envp) main(int argc, char **argv, char **envp)
{ {
printf("Melon " VERSION "." RELEASE "." PATCH EXTRA "\n");
printf("Copyright (c) 2020, yzrh <yzrh@noema.org>\n\n");
cnki_t *param = NULL; cnki_t *param = NULL;
if (cnki_create(&param) != 0) { if (cnki_create(&param) != 0) {
@ -83,6 +80,10 @@ main(int argc, char **argv, char **envp)
return EXIT_FAILURE; return EXIT_FAILURE;
} }
if (param->stat > 0)
printf("Melon " VERSION "." RELEASE "." PATCH EXTRA "\n"
"Copyright (c) 2020, yzrh <yzrh@noema.org>\n\n");
cnki_info(&param); cnki_info(&param);
if (strcmp(param->file_stat->type, "%PDF") == 0) { if (strcmp(param->file_stat->type, "%PDF") == 0) {
@ -98,7 +99,7 @@ main(int argc, char **argv, char **envp)
return EXIT_FAILURE; return EXIT_FAILURE;
} }
} else if (strcmp(param->file_stat->type, "HN") == 0) { } else if (strcmp(param->file_stat->type, "HN") == 0) {
if (cnki_nh(&param) != 0) { if (cnki_hn(&param) != 0) {
fprintf(stderr, "%s: %s\n", argv[0], fprintf(stderr, "%s: %s\n", argv[0],
strerror(errno)); strerror(errno));
return EXIT_FAILURE; return EXIT_FAILURE;