Update HN data structure.
This commit is contained in:
parent
bcb8ef9cd9
commit
5c5ddc926b
8 changed files with 176 additions and 137 deletions
12
src/cnki.c
12
src/cnki.c
|
@ -33,7 +33,7 @@ cnki_create(cnki_t **param)
|
||||||
memset((*param)->file_stat, 0, sizeof(file_stat_t));
|
memset((*param)->file_stat, 0, sizeof(file_stat_t));
|
||||||
|
|
||||||
(*param)->object_outline = NULL;
|
(*param)->object_outline = NULL;
|
||||||
(*param)->object_nh = NULL;
|
(*param)->object_hn = NULL;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -46,8 +46,8 @@ cnki_destroy(cnki_t **param)
|
||||||
free((*param)->file_stat);
|
free((*param)->file_stat);
|
||||||
if ((*param)->object_outline != NULL)
|
if ((*param)->object_outline != NULL)
|
||||||
free((*param)->object_outline);
|
free((*param)->object_outline);
|
||||||
if ((*param)->object_nh != NULL)
|
if ((*param)->object_hn != NULL)
|
||||||
free((*param)->object_nh);
|
free((*param)->object_hn);
|
||||||
free(*param);
|
free(*param);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -59,7 +59,7 @@ cnki_info(cnki_t **param)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
if ((*param)->stat > 1)
|
if ((*param)->stat > 1)
|
||||||
printf("Reading file header at %x\n", ADDRESS_HEAD);
|
printf("Reading file header at 0x%x\n", ADDRESS_HEAD);
|
||||||
|
|
||||||
int addr[2];
|
int addr[2];
|
||||||
|
|
||||||
|
@ -84,7 +84,7 @@ cnki_info(cnki_t **param)
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((*param)->stat > 1)
|
if ((*param)->stat > 1)
|
||||||
printf("Reading page count at %x\n", addr[0]);
|
printf("Reading page count at 0x%x\n", addr[0]);
|
||||||
|
|
||||||
fseek((*param)->fp_i, addr[0], SEEK_SET);
|
fseek((*param)->fp_i, addr[0], SEEK_SET);
|
||||||
fread(&(*param)->file_stat->page, 4, 1, (*param)->fp_i);
|
fread(&(*param)->file_stat->page, 4, 1, (*param)->fp_i);
|
||||||
|
@ -94,7 +94,7 @@ cnki_info(cnki_t **param)
|
||||||
(*param)->file_stat->page);
|
(*param)->file_stat->page);
|
||||||
|
|
||||||
if ((*param)->stat > 1)
|
if ((*param)->stat > 1)
|
||||||
printf("Reading outline count at %x\n", addr[1]);
|
printf("Reading outline count at 0x%x\n", addr[1]);
|
||||||
|
|
||||||
fseek((*param)->fp_i, addr[1], SEEK_SET);
|
fseek((*param)->fp_i, addr[1], SEEK_SET);
|
||||||
fread(&(*param)->file_stat->outline, 4, 1, (*param)->fp_i);
|
fread(&(*param)->file_stat->outline, 4, 1, (*param)->fp_i);
|
||||||
|
|
33
src/cnki.h
33
src/cnki.h
|
@ -44,26 +44,31 @@ typedef struct _object_outline_tree_t {
|
||||||
struct _object_outline_tree_t *right;
|
struct _object_outline_tree_t *right;
|
||||||
} object_outline_tree_t;
|
} object_outline_tree_t;
|
||||||
|
|
||||||
typedef enum _nh_code {
|
typedef enum _hn_code {
|
||||||
CCITTFAX,
|
CCITTFAX,
|
||||||
DCT_0,
|
DCT_0,
|
||||||
DCT_1,
|
DCT_1, /* Inverted */
|
||||||
JBIG2,
|
JBIG2,
|
||||||
JPX
|
JPX
|
||||||
} nh_code;
|
} hn_code;
|
||||||
|
|
||||||
typedef struct _object_nh_t {
|
typedef struct _hn_image_t {
|
||||||
int32_t address; /* Starting at end of object_outline_t */
|
int32_t format; /* hn_code */
|
||||||
|
int32_t address;
|
||||||
int32_t size;
|
int32_t size;
|
||||||
int16_t page[2];
|
char *image;
|
||||||
|
} hn_image_t;
|
||||||
|
|
||||||
|
typedef struct _object_hn_t {
|
||||||
|
int32_t address; /* Starting at end of object_outline_t */
|
||||||
|
int32_t text_size;
|
||||||
|
int16_t image_length;
|
||||||
|
int16_t page;
|
||||||
int32_t zero[2];
|
int32_t zero[2];
|
||||||
char *text;
|
char *text;
|
||||||
int32_t image_format; /* nh_code */
|
struct _hn_image_t *image_data;
|
||||||
int32_t image_address;
|
struct _object_hn_t *next;
|
||||||
int32_t image_size;
|
} object_hn_t;
|
||||||
char *image;
|
|
||||||
struct _object_nh_t *next;
|
|
||||||
} object_nh_t;
|
|
||||||
|
|
||||||
typedef struct _cnki_t {
|
typedef struct _cnki_t {
|
||||||
int stat;
|
int stat;
|
||||||
|
@ -72,7 +77,7 @@ typedef struct _cnki_t {
|
||||||
FILE *fp_o;
|
FILE *fp_o;
|
||||||
file_stat_t *file_stat;
|
file_stat_t *file_stat;
|
||||||
object_outline_t *object_outline;
|
object_outline_t *object_outline;
|
||||||
object_nh_t *object_nh;
|
object_hn_t *object_hn;
|
||||||
} cnki_t;
|
} cnki_t;
|
||||||
|
|
||||||
/* cnki_pdf.c */
|
/* cnki_pdf.c */
|
||||||
|
@ -82,5 +87,7 @@ int cnki_pdf(cnki_t **param);
|
||||||
int cnki_outline_tree(object_outline_tree_t **outline_tree,
|
int cnki_outline_tree(object_outline_tree_t **outline_tree,
|
||||||
object_outline_t **outline, int *ids);
|
object_outline_t **outline, int *ids);
|
||||||
|
|
||||||
|
/* cnki_zlib.c */
|
||||||
|
|
||||||
/* cnki_xml.c */
|
/* cnki_xml.c */
|
||||||
int cnki_xml(char **xml, FILE **fp);
|
int cnki_xml(char **xml, FILE **fp);
|
||||||
|
|
|
@ -18,7 +18,7 @@ cnki_caj(cnki_t **param)
|
||||||
printf("Begin 'CAJ' conversion\n");
|
printf("Begin 'CAJ' conversion\n");
|
||||||
|
|
||||||
if ((*param)->stat > 1)
|
if ((*param)->stat > 1)
|
||||||
printf("Reading document body address at %x\n", ADDRESS_CAJ_BODY);
|
printf("Reading document body address at 0x%x\n", ADDRESS_CAJ_BODY);
|
||||||
|
|
||||||
int addr;
|
int addr;
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ cnki_caj(cnki_t **param)
|
||||||
fseek((*param)->fp_i, addr, SEEK_SET);
|
fseek((*param)->fp_i, addr, SEEK_SET);
|
||||||
|
|
||||||
if ((*param)->stat > 0)
|
if ((*param)->stat > 0)
|
||||||
printf("Advised document body address is %x\n", addr);
|
printf("Advised document body address is 0x%x\n", addr);
|
||||||
|
|
||||||
cnki_pdf(param);
|
cnki_pdf(param);
|
||||||
|
|
||||||
|
|
134
src/cnki_hn.c
Normal file
134
src/cnki_hn.c
Normal file
|
@ -0,0 +1,134 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2020, yzrh <yzrh@noema.org>
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include "cnki.h"
|
||||||
|
#include "pdf.h"
|
||||||
|
#include "pdf_cnki.h"
|
||||||
|
|
||||||
|
int
|
||||||
|
cnki_hn(cnki_t **param)
|
||||||
|
{
|
||||||
|
if (*param == NULL)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if ((*param)->stat > 0)
|
||||||
|
printf("Begin 'HN' conversion\n");
|
||||||
|
|
||||||
|
if ((*param)->file_stat->page > 0)
|
||||||
|
(*param)->object_hn = malloc(sizeof(object_hn_t));
|
||||||
|
else
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if ((*param)->object_hn == NULL)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if ((*param)->stat > 1) {
|
||||||
|
printf("Loading page(s)\n");
|
||||||
|
printf("\t%8s\t%8s\t%6s\t%4s\t%6s\t%6s\t%4s\t%8s\t%8s\n",
|
||||||
|
"address",
|
||||||
|
"text",
|
||||||
|
"length",
|
||||||
|
"page",
|
||||||
|
"zero",
|
||||||
|
"#",
|
||||||
|
"code",
|
||||||
|
"address",
|
||||||
|
"image");
|
||||||
|
}
|
||||||
|
|
||||||
|
object_hn_t *ptr = (*param)->object_hn;
|
||||||
|
for (int i = 0; i < (*param)->file_stat->page; i++) {
|
||||||
|
fread(&ptr->address, 4, 1, (*param)->fp_i);
|
||||||
|
fread(&ptr->text_size, 4, 1, (*param)->fp_i);
|
||||||
|
fread(&ptr->image_length, 2, 1, (*param)->fp_i);
|
||||||
|
fread(&ptr->page, 2, 1, (*param)->fp_i);
|
||||||
|
fread(&ptr->zero, 8, 1, (*param)->fp_i);
|
||||||
|
|
||||||
|
ptr->text = NULL;
|
||||||
|
ptr->image_data = NULL;
|
||||||
|
ptr->next = NULL;
|
||||||
|
|
||||||
|
if (i < (*param)->file_stat->page - 1) {
|
||||||
|
ptr->next = malloc(sizeof(object_hn_t));
|
||||||
|
|
||||||
|
if (ptr->next == NULL)
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
ptr = ptr->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
ptr = (*param)->object_hn;
|
||||||
|
while (ptr != NULL) {
|
||||||
|
ptr->text = malloc(ptr->text_size);
|
||||||
|
|
||||||
|
if (ptr->text == NULL)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
fseek((*param)->fp_i, ptr->address, SEEK_SET);
|
||||||
|
fread(ptr->text, ptr->text_size, 1, (*param)->fp_i);
|
||||||
|
|
||||||
|
if ((*param)->stat > 1)
|
||||||
|
printf("\t%08x\t%8d\t%6d\t%4d\t{%d, %d}",
|
||||||
|
ptr->address,
|
||||||
|
ptr->text_size,
|
||||||
|
ptr->image_length,
|
||||||
|
ptr->page,
|
||||||
|
ptr->zero[0],
|
||||||
|
ptr->zero[1]);
|
||||||
|
|
||||||
|
ptr->image_data = malloc(ptr->image_length * sizeof(hn_image_t));
|
||||||
|
|
||||||
|
if (ptr->image_data == NULL)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
for (int i = 0; i < ptr->image_length; i++) {
|
||||||
|
fread(&ptr->image_data[i].format, 4, 1, (*param)->fp_i);
|
||||||
|
fread(&ptr->image_data[i].address, 4, 1, (*param)->fp_i);
|
||||||
|
fread(&ptr->image_data[i].size, 4, 1, (*param)->fp_i);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < ptr->image_length; i++) {
|
||||||
|
ptr->image_data[i].image = malloc(ptr->image_data[i].size);
|
||||||
|
|
||||||
|
if (ptr->image_data[i].image == NULL)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
fseek((*param)->fp_i, ptr->image_data[i].address, SEEK_SET);
|
||||||
|
fread(ptr->image_data[i].image,
|
||||||
|
ptr->image_data[i].size, 1,
|
||||||
|
(*param)->fp_i);
|
||||||
|
|
||||||
|
if ((*param)->stat > 1)
|
||||||
|
printf("\t%6d\t%4d\t%08x\t%8d\n",
|
||||||
|
i,
|
||||||
|
ptr->image_data[i].format,
|
||||||
|
ptr->image_data[i].address,
|
||||||
|
ptr->image_data[i].size);
|
||||||
|
}
|
||||||
|
|
||||||
|
ptr = ptr->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((*param)->stat > 1)
|
||||||
|
printf("Loaded %d page(s)\n", (*param)->file_stat->page);
|
||||||
|
|
||||||
|
if ((*param)->stat > 1)
|
||||||
|
printf("Creating PDF object(s)\n");
|
||||||
|
|
||||||
|
pdf_object_t *pdf = NULL;
|
||||||
|
|
||||||
|
if (pdf_obj_create(&pdf) != 0)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if ((*param)->stat > 0)
|
||||||
|
printf("Conversion ended\n");
|
||||||
|
|
||||||
|
/* TODO: Finish me please :) */
|
||||||
|
return 1;
|
||||||
|
}
|
110
src/cnki_nh.c
110
src/cnki_nh.c
|
@ -1,110 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2020, yzrh <yzrh@noema.org>
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
#include "cnki.h"
|
|
||||||
|
|
||||||
int
|
|
||||||
cnki_nh(cnki_t **param)
|
|
||||||
{
|
|
||||||
if (*param == NULL)
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
if ((*param)->stat > 0)
|
|
||||||
printf("Begin 'HN' conversion\n");
|
|
||||||
|
|
||||||
if ((*param)->file_stat->page > 0)
|
|
||||||
(*param)->object_nh = malloc(sizeof(object_nh_t));
|
|
||||||
else
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
if ((*param)->object_nh == NULL)
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
if ((*param)->stat > 1) {
|
|
||||||
printf("Loading page(s)\n");
|
|
||||||
printf("\t%8s\t%8s\t%13s\t%6s\t%4s\t%8s\t%8s\n",
|
|
||||||
"address",
|
|
||||||
"text",
|
|
||||||
"page",
|
|
||||||
"zero",
|
|
||||||
"code",
|
|
||||||
"address",
|
|
||||||
"image");
|
|
||||||
}
|
|
||||||
|
|
||||||
object_nh_t *ptr = (*param)->object_nh;
|
|
||||||
for (int i = 0; i < (*param)->file_stat->page; i++) {
|
|
||||||
fread(&ptr->address, 4, 1, (*param)->fp_i);
|
|
||||||
fread(&ptr->size, 4, 1, (*param)->fp_i);
|
|
||||||
fread(&ptr->page, 4, 1, (*param)->fp_i);
|
|
||||||
fread(&ptr->zero, 8, 1, (*param)->fp_i);
|
|
||||||
|
|
||||||
ptr->text = NULL;
|
|
||||||
ptr->image_format = -1;
|
|
||||||
ptr->image_address = 0;
|
|
||||||
ptr->image_size = 0;
|
|
||||||
ptr->image = NULL;
|
|
||||||
ptr->next = NULL;
|
|
||||||
|
|
||||||
if (i < (*param)->file_stat->page - 1) {
|
|
||||||
ptr->next = malloc(sizeof(object_nh_t));
|
|
||||||
|
|
||||||
if (ptr->next == NULL)
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
ptr = ptr->next;
|
|
||||||
}
|
|
||||||
|
|
||||||
ptr = (*param)->object_nh;
|
|
||||||
while (ptr != NULL) {
|
|
||||||
ptr->text = malloc(ptr->size);
|
|
||||||
|
|
||||||
if (ptr->text == NULL)
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
fseek((*param)->fp_i, ptr->address, SEEK_SET);
|
|
||||||
fread(ptr->text, ptr->size, 1, (*param)->fp_i);
|
|
||||||
fread(&ptr->image_format, 4, 1, (*param)->fp_i);
|
|
||||||
fread(&ptr->image_address, 4, 1, (*param)->fp_i);
|
|
||||||
fread(&ptr->image_size, 4, 1, (*param)->fp_i);
|
|
||||||
|
|
||||||
ptr->image = malloc(ptr->image_size);
|
|
||||||
|
|
||||||
if (ptr->image == NULL)
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
fseek((*param)->fp_i, ptr->image_address, SEEK_SET);
|
|
||||||
fread(ptr->image, ptr->image_size, 1, (*param)->fp_i);
|
|
||||||
|
|
||||||
if ((*param)->stat > 1)
|
|
||||||
printf("\t%08x\t%8d\t{%d, %8d}\t{%d, %d}\t%4d\t%08x\t%8d\n",
|
|
||||||
ptr->address,
|
|
||||||
ptr->size,
|
|
||||||
ptr->page[0],
|
|
||||||
ptr->page[1],
|
|
||||||
ptr->zero[0],
|
|
||||||
ptr->zero[1],
|
|
||||||
ptr->image_format,
|
|
||||||
ptr->image_address,
|
|
||||||
ptr->image_size);
|
|
||||||
|
|
||||||
ptr = ptr->next;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((*param)->stat > 1)
|
|
||||||
printf("Loaded %d page(s)\n", (*param)->file_stat->page);
|
|
||||||
|
|
||||||
/* TODO: Study signed int __fastcall CAJDoc::OpenNHCAJFile(int a1, int a2) */
|
|
||||||
|
|
||||||
if ((*param)->stat > 0)
|
|
||||||
printf("Conversion ended\n");
|
|
||||||
|
|
||||||
/* TODO: Finish me please :) */
|
|
||||||
return 1;
|
|
||||||
}
|
|
7
src/cnki_zlib.c
Normal file
7
src/cnki_zlib.c
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2020, yzrh <yzrh@noema.org>
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <zlib.h>
|
|
@ -14,8 +14,8 @@ int cnki_info(cnki_t **param);
|
||||||
/* cnki_caj.c */
|
/* cnki_caj.c */
|
||||||
int cnki_caj(cnki_t **param);
|
int cnki_caj(cnki_t **param);
|
||||||
|
|
||||||
/* cnki_nh.c */
|
/* cnki_hn.c */
|
||||||
int cnki_nh(cnki_t **param);
|
int cnki_hn(cnki_t **param);
|
||||||
|
|
||||||
/* cnki_kdh.c */
|
/* cnki_kdh.c */
|
||||||
int cnki_kdh(cnki_t **param);
|
int cnki_kdh(cnki_t **param);
|
||||||
|
|
|
@ -16,9 +16,6 @@
|
||||||
int
|
int
|
||||||
main(int argc, char **argv, char **envp)
|
main(int argc, char **argv, char **envp)
|
||||||
{
|
{
|
||||||
printf("Melon " VERSION "." RELEASE "." PATCH EXTRA "\n");
|
|
||||||
printf("Copyright (c) 2020, yzrh <yzrh@noema.org>\n\n");
|
|
||||||
|
|
||||||
cnki_t *param = NULL;
|
cnki_t *param = NULL;
|
||||||
|
|
||||||
if (cnki_create(¶m) != 0) {
|
if (cnki_create(¶m) != 0) {
|
||||||
|
@ -83,6 +80,10 @@ main(int argc, char **argv, char **envp)
|
||||||
return EXIT_FAILURE;
|
return EXIT_FAILURE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (param->stat > 0)
|
||||||
|
printf("Melon " VERSION "." RELEASE "." PATCH EXTRA "\n"
|
||||||
|
"Copyright (c) 2020, yzrh <yzrh@noema.org>\n\n");
|
||||||
|
|
||||||
cnki_info(¶m);
|
cnki_info(¶m);
|
||||||
|
|
||||||
if (strcmp(param->file_stat->type, "%PDF") == 0) {
|
if (strcmp(param->file_stat->type, "%PDF") == 0) {
|
||||||
|
@ -98,7 +99,7 @@ main(int argc, char **argv, char **envp)
|
||||||
return EXIT_FAILURE;
|
return EXIT_FAILURE;
|
||||||
}
|
}
|
||||||
} else if (strcmp(param->file_stat->type, "HN") == 0) {
|
} else if (strcmp(param->file_stat->type, "HN") == 0) {
|
||||||
if (cnki_nh(¶m) != 0) {
|
if (cnki_hn(¶m) != 0) {
|
||||||
fprintf(stderr, "%s: %s\n", argv[0],
|
fprintf(stderr, "%s: %s\n", argv[0],
|
||||||
strerror(errno));
|
strerror(errno));
|
||||||
return EXIT_FAILURE;
|
return EXIT_FAILURE;
|
||||||
|
|
Loading…
Reference in a new issue