Update HN data structure.

This commit is contained in:
yzrh 2020-12-29 02:10:17 +00:00
parent bcb8ef9cd9
commit 5c5ddc926b
8 changed files with 176 additions and 137 deletions

View file

@ -33,7 +33,7 @@ cnki_create(cnki_t **param)
memset((*param)->file_stat, 0, sizeof(file_stat_t));
(*param)->object_outline = NULL;
(*param)->object_nh = NULL;
(*param)->object_hn = NULL;
return 0;
}
@ -46,8 +46,8 @@ cnki_destroy(cnki_t **param)
free((*param)->file_stat);
if ((*param)->object_outline != NULL)
free((*param)->object_outline);
if ((*param)->object_nh != NULL)
free((*param)->object_nh);
if ((*param)->object_hn != NULL)
free((*param)->object_hn);
free(*param);
}
}
@ -59,7 +59,7 @@ cnki_info(cnki_t **param)
return 1;
if ((*param)->stat > 1)
printf("Reading file header at %x\n", ADDRESS_HEAD);
printf("Reading file header at 0x%x\n", ADDRESS_HEAD);
int addr[2];
@ -84,7 +84,7 @@ cnki_info(cnki_t **param)
}
if ((*param)->stat > 1)
printf("Reading page count at %x\n", addr[0]);
printf("Reading page count at 0x%x\n", addr[0]);
fseek((*param)->fp_i, addr[0], SEEK_SET);
fread(&(*param)->file_stat->page, 4, 1, (*param)->fp_i);
@ -94,7 +94,7 @@ cnki_info(cnki_t **param)
(*param)->file_stat->page);
if ((*param)->stat > 1)
printf("Reading outline count at %x\n", addr[1]);
printf("Reading outline count at 0x%x\n", addr[1]);
fseek((*param)->fp_i, addr[1], SEEK_SET);
fread(&(*param)->file_stat->outline, 4, 1, (*param)->fp_i);

View file

@ -44,26 +44,31 @@ typedef struct _object_outline_tree_t {
struct _object_outline_tree_t *right;
} object_outline_tree_t;
typedef enum _nh_code {
typedef enum _hn_code {
CCITTFAX,
DCT_0,
DCT_1,
DCT_1, /* Inverted */
JBIG2,
JPX
} nh_code;
} hn_code;
typedef struct _object_nh_t {
int32_t address; /* Starting at end of object_outline_t */
typedef struct _hn_image_t {
int32_t format; /* hn_code */
int32_t address;
int32_t size;
int16_t page[2];
char *image;
} hn_image_t;
typedef struct _object_hn_t {
int32_t address; /* Starting at end of object_outline_t */
int32_t text_size;
int16_t image_length;
int16_t page;
int32_t zero[2];
char *text;
int32_t image_format; /* nh_code */
int32_t image_address;
int32_t image_size;
char *image;
struct _object_nh_t *next;
} object_nh_t;
struct _hn_image_t *image_data;
struct _object_hn_t *next;
} object_hn_t;
typedef struct _cnki_t {
int stat;
@ -72,7 +77,7 @@ typedef struct _cnki_t {
FILE *fp_o;
file_stat_t *file_stat;
object_outline_t *object_outline;
object_nh_t *object_nh;
object_hn_t *object_hn;
} cnki_t;
/* cnki_pdf.c */
@ -82,5 +87,7 @@ int cnki_pdf(cnki_t **param);
int cnki_outline_tree(object_outline_tree_t **outline_tree,
object_outline_t **outline, int *ids);
/* cnki_zlib.c */
/* cnki_xml.c */
int cnki_xml(char **xml, FILE **fp);

View file

@ -18,7 +18,7 @@ cnki_caj(cnki_t **param)
printf("Begin 'CAJ' conversion\n");
if ((*param)->stat > 1)
printf("Reading document body address at %x\n", ADDRESS_CAJ_BODY);
printf("Reading document body address at 0x%x\n", ADDRESS_CAJ_BODY);
int addr;
@ -29,7 +29,7 @@ cnki_caj(cnki_t **param)
fseek((*param)->fp_i, addr, SEEK_SET);
if ((*param)->stat > 0)
printf("Advised document body address is %x\n", addr);
printf("Advised document body address is 0x%x\n", addr);
cnki_pdf(param);

134
src/cnki_hn.c Normal file
View file

@ -0,0 +1,134 @@
/*
* Copyright (c) 2020, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <stdlib.h>
#include "cnki.h"
#include "pdf.h"
#include "pdf_cnki.h"
int
cnki_hn(cnki_t **param)
{
if (*param == NULL)
return 1;
if ((*param)->stat > 0)
printf("Begin 'HN' conversion\n");
if ((*param)->file_stat->page > 0)
(*param)->object_hn = malloc(sizeof(object_hn_t));
else
return 1;
if ((*param)->object_hn == NULL)
return 1;
if ((*param)->stat > 1) {
printf("Loading page(s)\n");
printf("\t%8s\t%8s\t%6s\t%4s\t%6s\t%6s\t%4s\t%8s\t%8s\n",
"address",
"text",
"length",
"page",
"zero",
"#",
"code",
"address",
"image");
}
object_hn_t *ptr = (*param)->object_hn;
for (int i = 0; i < (*param)->file_stat->page; i++) {
fread(&ptr->address, 4, 1, (*param)->fp_i);
fread(&ptr->text_size, 4, 1, (*param)->fp_i);
fread(&ptr->image_length, 2, 1, (*param)->fp_i);
fread(&ptr->page, 2, 1, (*param)->fp_i);
fread(&ptr->zero, 8, 1, (*param)->fp_i);
ptr->text = NULL;
ptr->image_data = NULL;
ptr->next = NULL;
if (i < (*param)->file_stat->page - 1) {
ptr->next = malloc(sizeof(object_hn_t));
if (ptr->next == NULL)
return 1;
}
ptr = ptr->next;
}
ptr = (*param)->object_hn;
while (ptr != NULL) {
ptr->text = malloc(ptr->text_size);
if (ptr->text == NULL)
return 1;
fseek((*param)->fp_i, ptr->address, SEEK_SET);
fread(ptr->text, ptr->text_size, 1, (*param)->fp_i);
if ((*param)->stat > 1)
printf("\t%08x\t%8d\t%6d\t%4d\t{%d, %d}",
ptr->address,
ptr->text_size,
ptr->image_length,
ptr->page,
ptr->zero[0],
ptr->zero[1]);
ptr->image_data = malloc(ptr->image_length * sizeof(hn_image_t));
if (ptr->image_data == NULL)
return 1;
for (int i = 0; i < ptr->image_length; i++) {
fread(&ptr->image_data[i].format, 4, 1, (*param)->fp_i);
fread(&ptr->image_data[i].address, 4, 1, (*param)->fp_i);
fread(&ptr->image_data[i].size, 4, 1, (*param)->fp_i);
}
for (int i = 0; i < ptr->image_length; i++) {
ptr->image_data[i].image = malloc(ptr->image_data[i].size);
if (ptr->image_data[i].image == NULL)
return 1;
fseek((*param)->fp_i, ptr->image_data[i].address, SEEK_SET);
fread(ptr->image_data[i].image,
ptr->image_data[i].size, 1,
(*param)->fp_i);
if ((*param)->stat > 1)
printf("\t%6d\t%4d\t%08x\t%8d\n",
i,
ptr->image_data[i].format,
ptr->image_data[i].address,
ptr->image_data[i].size);
}
ptr = ptr->next;
}
if ((*param)->stat > 1)
printf("Loaded %d page(s)\n", (*param)->file_stat->page);
if ((*param)->stat > 1)
printf("Creating PDF object(s)\n");
pdf_object_t *pdf = NULL;
if (pdf_obj_create(&pdf) != 0)
return 1;
if ((*param)->stat > 0)
printf("Conversion ended\n");
/* TODO: Finish me please :) */
return 1;
}

View file

@ -1,110 +0,0 @@
/*
* Copyright (c) 2020, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <stdlib.h>
#include "cnki.h"
int
cnki_nh(cnki_t **param)
{
if (*param == NULL)
return 1;
if ((*param)->stat > 0)
printf("Begin 'HN' conversion\n");
if ((*param)->file_stat->page > 0)
(*param)->object_nh = malloc(sizeof(object_nh_t));
else
return 1;
if ((*param)->object_nh == NULL)
return 1;
if ((*param)->stat > 1) {
printf("Loading page(s)\n");
printf("\t%8s\t%8s\t%13s\t%6s\t%4s\t%8s\t%8s\n",
"address",
"text",
"page",
"zero",
"code",
"address",
"image");
}
object_nh_t *ptr = (*param)->object_nh;
for (int i = 0; i < (*param)->file_stat->page; i++) {
fread(&ptr->address, 4, 1, (*param)->fp_i);
fread(&ptr->size, 4, 1, (*param)->fp_i);
fread(&ptr->page, 4, 1, (*param)->fp_i);
fread(&ptr->zero, 8, 1, (*param)->fp_i);
ptr->text = NULL;
ptr->image_format = -1;
ptr->image_address = 0;
ptr->image_size = 0;
ptr->image = NULL;
ptr->next = NULL;
if (i < (*param)->file_stat->page - 1) {
ptr->next = malloc(sizeof(object_nh_t));
if (ptr->next == NULL)
return 1;
}
ptr = ptr->next;
}
ptr = (*param)->object_nh;
while (ptr != NULL) {
ptr->text = malloc(ptr->size);
if (ptr->text == NULL)
return 1;
fseek((*param)->fp_i, ptr->address, SEEK_SET);
fread(ptr->text, ptr->size, 1, (*param)->fp_i);
fread(&ptr->image_format, 4, 1, (*param)->fp_i);
fread(&ptr->image_address, 4, 1, (*param)->fp_i);
fread(&ptr->image_size, 4, 1, (*param)->fp_i);
ptr->image = malloc(ptr->image_size);
if (ptr->image == NULL)
return 1;
fseek((*param)->fp_i, ptr->image_address, SEEK_SET);
fread(ptr->image, ptr->image_size, 1, (*param)->fp_i);
if ((*param)->stat > 1)
printf("\t%08x\t%8d\t{%d, %8d}\t{%d, %d}\t%4d\t%08x\t%8d\n",
ptr->address,
ptr->size,
ptr->page[0],
ptr->page[1],
ptr->zero[0],
ptr->zero[1],
ptr->image_format,
ptr->image_address,
ptr->image_size);
ptr = ptr->next;
}
if ((*param)->stat > 1)
printf("Loaded %d page(s)\n", (*param)->file_stat->page);
/* TODO: Study signed int __fastcall CAJDoc::OpenNHCAJFile(int a1, int a2) */
if ((*param)->stat > 0)
printf("Conversion ended\n");
/* TODO: Finish me please :) */
return 1;
}

7
src/cnki_zlib.c Normal file
View file

@ -0,0 +1,7 @@
/*
* Copyright (c) 2020, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <zlib.h>

View file

@ -14,8 +14,8 @@ int cnki_info(cnki_t **param);
/* cnki_caj.c */
int cnki_caj(cnki_t **param);
/* cnki_nh.c */
int cnki_nh(cnki_t **param);
/* cnki_hn.c */
int cnki_hn(cnki_t **param);
/* cnki_kdh.c */
int cnki_kdh(cnki_t **param);

View file

@ -16,9 +16,6 @@
int
main(int argc, char **argv, char **envp)
{
printf("Melon " VERSION "." RELEASE "." PATCH EXTRA "\n");
printf("Copyright (c) 2020, yzrh <yzrh@noema.org>\n\n");
cnki_t *param = NULL;
if (cnki_create(&param) != 0) {
@ -83,6 +80,10 @@ main(int argc, char **argv, char **envp)
return EXIT_FAILURE;
}
if (param->stat > 0)
printf("Melon " VERSION "." RELEASE "." PATCH EXTRA "\n"
"Copyright (c) 2020, yzrh <yzrh@noema.org>\n\n");
cnki_info(&param);
if (strcmp(param->file_stat->type, "%PDF") == 0) {
@ -98,7 +99,7 @@ main(int argc, char **argv, char **envp)
return EXIT_FAILURE;
}
} else if (strcmp(param->file_stat->type, "HN") == 0) {
if (cnki_nh(&param) != 0) {
if (cnki_hn(&param) != 0) {
fprintf(stderr, "%s: %s\n", argv[0],
strerror(errno));
return EXIT_FAILURE;