Update HN data structure.
This commit is contained in:
parent
bcb8ef9cd9
commit
5c5ddc926b
8 changed files with 176 additions and 137 deletions
12
src/cnki.c
12
src/cnki.c
|
@ -33,7 +33,7 @@ cnki_create(cnki_t **param)
|
|||
memset((*param)->file_stat, 0, sizeof(file_stat_t));
|
||||
|
||||
(*param)->object_outline = NULL;
|
||||
(*param)->object_nh = NULL;
|
||||
(*param)->object_hn = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -46,8 +46,8 @@ cnki_destroy(cnki_t **param)
|
|||
free((*param)->file_stat);
|
||||
if ((*param)->object_outline != NULL)
|
||||
free((*param)->object_outline);
|
||||
if ((*param)->object_nh != NULL)
|
||||
free((*param)->object_nh);
|
||||
if ((*param)->object_hn != NULL)
|
||||
free((*param)->object_hn);
|
||||
free(*param);
|
||||
}
|
||||
}
|
||||
|
@ -59,7 +59,7 @@ cnki_info(cnki_t **param)
|
|||
return 1;
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Reading file header at %x\n", ADDRESS_HEAD);
|
||||
printf("Reading file header at 0x%x\n", ADDRESS_HEAD);
|
||||
|
||||
int addr[2];
|
||||
|
||||
|
@ -84,7 +84,7 @@ cnki_info(cnki_t **param)
|
|||
}
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Reading page count at %x\n", addr[0]);
|
||||
printf("Reading page count at 0x%x\n", addr[0]);
|
||||
|
||||
fseek((*param)->fp_i, addr[0], SEEK_SET);
|
||||
fread(&(*param)->file_stat->page, 4, 1, (*param)->fp_i);
|
||||
|
@ -94,7 +94,7 @@ cnki_info(cnki_t **param)
|
|||
(*param)->file_stat->page);
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Reading outline count at %x\n", addr[1]);
|
||||
printf("Reading outline count at 0x%x\n", addr[1]);
|
||||
|
||||
fseek((*param)->fp_i, addr[1], SEEK_SET);
|
||||
fread(&(*param)->file_stat->outline, 4, 1, (*param)->fp_i);
|
||||
|
|
33
src/cnki.h
33
src/cnki.h
|
@ -44,26 +44,31 @@ typedef struct _object_outline_tree_t {
|
|||
struct _object_outline_tree_t *right;
|
||||
} object_outline_tree_t;
|
||||
|
||||
typedef enum _nh_code {
|
||||
typedef enum _hn_code {
|
||||
CCITTFAX,
|
||||
DCT_0,
|
||||
DCT_1,
|
||||
DCT_1, /* Inverted */
|
||||
JBIG2,
|
||||
JPX
|
||||
} nh_code;
|
||||
} hn_code;
|
||||
|
||||
typedef struct _object_nh_t {
|
||||
int32_t address; /* Starting at end of object_outline_t */
|
||||
typedef struct _hn_image_t {
|
||||
int32_t format; /* hn_code */
|
||||
int32_t address;
|
||||
int32_t size;
|
||||
int16_t page[2];
|
||||
char *image;
|
||||
} hn_image_t;
|
||||
|
||||
typedef struct _object_hn_t {
|
||||
int32_t address; /* Starting at end of object_outline_t */
|
||||
int32_t text_size;
|
||||
int16_t image_length;
|
||||
int16_t page;
|
||||
int32_t zero[2];
|
||||
char *text;
|
||||
int32_t image_format; /* nh_code */
|
||||
int32_t image_address;
|
||||
int32_t image_size;
|
||||
char *image;
|
||||
struct _object_nh_t *next;
|
||||
} object_nh_t;
|
||||
struct _hn_image_t *image_data;
|
||||
struct _object_hn_t *next;
|
||||
} object_hn_t;
|
||||
|
||||
typedef struct _cnki_t {
|
||||
int stat;
|
||||
|
@ -72,7 +77,7 @@ typedef struct _cnki_t {
|
|||
FILE *fp_o;
|
||||
file_stat_t *file_stat;
|
||||
object_outline_t *object_outline;
|
||||
object_nh_t *object_nh;
|
||||
object_hn_t *object_hn;
|
||||
} cnki_t;
|
||||
|
||||
/* cnki_pdf.c */
|
||||
|
@ -82,5 +87,7 @@ int cnki_pdf(cnki_t **param);
|
|||
int cnki_outline_tree(object_outline_tree_t **outline_tree,
|
||||
object_outline_t **outline, int *ids);
|
||||
|
||||
/* cnki_zlib.c */
|
||||
|
||||
/* cnki_xml.c */
|
||||
int cnki_xml(char **xml, FILE **fp);
|
||||
|
|
|
@ -18,7 +18,7 @@ cnki_caj(cnki_t **param)
|
|||
printf("Begin 'CAJ' conversion\n");
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Reading document body address at %x\n", ADDRESS_CAJ_BODY);
|
||||
printf("Reading document body address at 0x%x\n", ADDRESS_CAJ_BODY);
|
||||
|
||||
int addr;
|
||||
|
||||
|
@ -29,7 +29,7 @@ cnki_caj(cnki_t **param)
|
|||
fseek((*param)->fp_i, addr, SEEK_SET);
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Advised document body address is %x\n", addr);
|
||||
printf("Advised document body address is 0x%x\n", addr);
|
||||
|
||||
cnki_pdf(param);
|
||||
|
||||
|
|
134
src/cnki_hn.c
Normal file
134
src/cnki_hn.c
Normal file
|
@ -0,0 +1,134 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "cnki.h"
|
||||
#include "pdf.h"
|
||||
#include "pdf_cnki.h"
|
||||
|
||||
int
|
||||
cnki_hn(cnki_t **param)
|
||||
{
|
||||
if (*param == NULL)
|
||||
return 1;
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Begin 'HN' conversion\n");
|
||||
|
||||
if ((*param)->file_stat->page > 0)
|
||||
(*param)->object_hn = malloc(sizeof(object_hn_t));
|
||||
else
|
||||
return 1;
|
||||
|
||||
if ((*param)->object_hn == NULL)
|
||||
return 1;
|
||||
|
||||
if ((*param)->stat > 1) {
|
||||
printf("Loading page(s)\n");
|
||||
printf("\t%8s\t%8s\t%6s\t%4s\t%6s\t%6s\t%4s\t%8s\t%8s\n",
|
||||
"address",
|
||||
"text",
|
||||
"length",
|
||||
"page",
|
||||
"zero",
|
||||
"#",
|
||||
"code",
|
||||
"address",
|
||||
"image");
|
||||
}
|
||||
|
||||
object_hn_t *ptr = (*param)->object_hn;
|
||||
for (int i = 0; i < (*param)->file_stat->page; i++) {
|
||||
fread(&ptr->address, 4, 1, (*param)->fp_i);
|
||||
fread(&ptr->text_size, 4, 1, (*param)->fp_i);
|
||||
fread(&ptr->image_length, 2, 1, (*param)->fp_i);
|
||||
fread(&ptr->page, 2, 1, (*param)->fp_i);
|
||||
fread(&ptr->zero, 8, 1, (*param)->fp_i);
|
||||
|
||||
ptr->text = NULL;
|
||||
ptr->image_data = NULL;
|
||||
ptr->next = NULL;
|
||||
|
||||
if (i < (*param)->file_stat->page - 1) {
|
||||
ptr->next = malloc(sizeof(object_hn_t));
|
||||
|
||||
if (ptr->next == NULL)
|
||||
return 1;
|
||||
}
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
ptr = (*param)->object_hn;
|
||||
while (ptr != NULL) {
|
||||
ptr->text = malloc(ptr->text_size);
|
||||
|
||||
if (ptr->text == NULL)
|
||||
return 1;
|
||||
|
||||
fseek((*param)->fp_i, ptr->address, SEEK_SET);
|
||||
fread(ptr->text, ptr->text_size, 1, (*param)->fp_i);
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("\t%08x\t%8d\t%6d\t%4d\t{%d, %d}",
|
||||
ptr->address,
|
||||
ptr->text_size,
|
||||
ptr->image_length,
|
||||
ptr->page,
|
||||
ptr->zero[0],
|
||||
ptr->zero[1]);
|
||||
|
||||
ptr->image_data = malloc(ptr->image_length * sizeof(hn_image_t));
|
||||
|
||||
if (ptr->image_data == NULL)
|
||||
return 1;
|
||||
|
||||
for (int i = 0; i < ptr->image_length; i++) {
|
||||
fread(&ptr->image_data[i].format, 4, 1, (*param)->fp_i);
|
||||
fread(&ptr->image_data[i].address, 4, 1, (*param)->fp_i);
|
||||
fread(&ptr->image_data[i].size, 4, 1, (*param)->fp_i);
|
||||
}
|
||||
|
||||
for (int i = 0; i < ptr->image_length; i++) {
|
||||
ptr->image_data[i].image = malloc(ptr->image_data[i].size);
|
||||
|
||||
if (ptr->image_data[i].image == NULL)
|
||||
return 1;
|
||||
|
||||
fseek((*param)->fp_i, ptr->image_data[i].address, SEEK_SET);
|
||||
fread(ptr->image_data[i].image,
|
||||
ptr->image_data[i].size, 1,
|
||||
(*param)->fp_i);
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("\t%6d\t%4d\t%08x\t%8d\n",
|
||||
i,
|
||||
ptr->image_data[i].format,
|
||||
ptr->image_data[i].address,
|
||||
ptr->image_data[i].size);
|
||||
}
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Loaded %d page(s)\n", (*param)->file_stat->page);
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Creating PDF object(s)\n");
|
||||
|
||||
pdf_object_t *pdf = NULL;
|
||||
|
||||
if (pdf_obj_create(&pdf) != 0)
|
||||
return 1;
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Conversion ended\n");
|
||||
|
||||
/* TODO: Finish me please :) */
|
||||
return 1;
|
||||
}
|
110
src/cnki_nh.c
110
src/cnki_nh.c
|
@ -1,110 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "cnki.h"
|
||||
|
||||
int
|
||||
cnki_nh(cnki_t **param)
|
||||
{
|
||||
if (*param == NULL)
|
||||
return 1;
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Begin 'HN' conversion\n");
|
||||
|
||||
if ((*param)->file_stat->page > 0)
|
||||
(*param)->object_nh = malloc(sizeof(object_nh_t));
|
||||
else
|
||||
return 1;
|
||||
|
||||
if ((*param)->object_nh == NULL)
|
||||
return 1;
|
||||
|
||||
if ((*param)->stat > 1) {
|
||||
printf("Loading page(s)\n");
|
||||
printf("\t%8s\t%8s\t%13s\t%6s\t%4s\t%8s\t%8s\n",
|
||||
"address",
|
||||
"text",
|
||||
"page",
|
||||
"zero",
|
||||
"code",
|
||||
"address",
|
||||
"image");
|
||||
}
|
||||
|
||||
object_nh_t *ptr = (*param)->object_nh;
|
||||
for (int i = 0; i < (*param)->file_stat->page; i++) {
|
||||
fread(&ptr->address, 4, 1, (*param)->fp_i);
|
||||
fread(&ptr->size, 4, 1, (*param)->fp_i);
|
||||
fread(&ptr->page, 4, 1, (*param)->fp_i);
|
||||
fread(&ptr->zero, 8, 1, (*param)->fp_i);
|
||||
|
||||
ptr->text = NULL;
|
||||
ptr->image_format = -1;
|
||||
ptr->image_address = 0;
|
||||
ptr->image_size = 0;
|
||||
ptr->image = NULL;
|
||||
ptr->next = NULL;
|
||||
|
||||
if (i < (*param)->file_stat->page - 1) {
|
||||
ptr->next = malloc(sizeof(object_nh_t));
|
||||
|
||||
if (ptr->next == NULL)
|
||||
return 1;
|
||||
}
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
ptr = (*param)->object_nh;
|
||||
while (ptr != NULL) {
|
||||
ptr->text = malloc(ptr->size);
|
||||
|
||||
if (ptr->text == NULL)
|
||||
return 1;
|
||||
|
||||
fseek((*param)->fp_i, ptr->address, SEEK_SET);
|
||||
fread(ptr->text, ptr->size, 1, (*param)->fp_i);
|
||||
fread(&ptr->image_format, 4, 1, (*param)->fp_i);
|
||||
fread(&ptr->image_address, 4, 1, (*param)->fp_i);
|
||||
fread(&ptr->image_size, 4, 1, (*param)->fp_i);
|
||||
|
||||
ptr->image = malloc(ptr->image_size);
|
||||
|
||||
if (ptr->image == NULL)
|
||||
return 1;
|
||||
|
||||
fseek((*param)->fp_i, ptr->image_address, SEEK_SET);
|
||||
fread(ptr->image, ptr->image_size, 1, (*param)->fp_i);
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("\t%08x\t%8d\t{%d, %8d}\t{%d, %d}\t%4d\t%08x\t%8d\n",
|
||||
ptr->address,
|
||||
ptr->size,
|
||||
ptr->page[0],
|
||||
ptr->page[1],
|
||||
ptr->zero[0],
|
||||
ptr->zero[1],
|
||||
ptr->image_format,
|
||||
ptr->image_address,
|
||||
ptr->image_size);
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Loaded %d page(s)\n", (*param)->file_stat->page);
|
||||
|
||||
/* TODO: Study signed int __fastcall CAJDoc::OpenNHCAJFile(int a1, int a2) */
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Conversion ended\n");
|
||||
|
||||
/* TODO: Finish me please :) */
|
||||
return 1;
|
||||
}
|
7
src/cnki_zlib.c
Normal file
7
src/cnki_zlib.c
Normal file
|
@ -0,0 +1,7 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <zlib.h>
|
|
@ -14,8 +14,8 @@ int cnki_info(cnki_t **param);
|
|||
/* cnki_caj.c */
|
||||
int cnki_caj(cnki_t **param);
|
||||
|
||||
/* cnki_nh.c */
|
||||
int cnki_nh(cnki_t **param);
|
||||
/* cnki_hn.c */
|
||||
int cnki_hn(cnki_t **param);
|
||||
|
||||
/* cnki_kdh.c */
|
||||
int cnki_kdh(cnki_t **param);
|
||||
|
|
|
@ -16,9 +16,6 @@
|
|||
int
|
||||
main(int argc, char **argv, char **envp)
|
||||
{
|
||||
printf("Melon " VERSION "." RELEASE "." PATCH EXTRA "\n");
|
||||
printf("Copyright (c) 2020, yzrh <yzrh@noema.org>\n\n");
|
||||
|
||||
cnki_t *param = NULL;
|
||||
|
||||
if (cnki_create(¶m) != 0) {
|
||||
|
@ -83,6 +80,10 @@ main(int argc, char **argv, char **envp)
|
|||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
if (param->stat > 0)
|
||||
printf("Melon " VERSION "." RELEASE "." PATCH EXTRA "\n"
|
||||
"Copyright (c) 2020, yzrh <yzrh@noema.org>\n\n");
|
||||
|
||||
cnki_info(¶m);
|
||||
|
||||
if (strcmp(param->file_stat->type, "%PDF") == 0) {
|
||||
|
@ -98,7 +99,7 @@ main(int argc, char **argv, char **envp)
|
|||
return EXIT_FAILURE;
|
||||
}
|
||||
} else if (strcmp(param->file_stat->type, "HN") == 0) {
|
||||
if (cnki_nh(¶m) != 0) {
|
||||
if (cnki_hn(¶m) != 0) {
|
||||
fprintf(stderr, "%s: %s\n", argv[0],
|
||||
strerror(errno));
|
||||
return EXIT_FAILURE;
|
||||
|
|
Loading…
Reference in a new issue