2020-08-14 22:04:26 +00:00
|
|
|
/*
|
2020-09-08 00:58:40 +00:00
|
|
|
* Copyright (c) 2020, yzrh <yzrh@noema.org>
|
2020-08-14 22:04:26 +00:00
|
|
|
*
|
|
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <stdint.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
|
|
|
|
#define ADDRESS_HEAD 0x0000
|
|
|
|
|
|
|
|
#define ADDRESS_CAJ_PAGE 0x0010
|
|
|
|
#define ADDRESS_CAJ_OUTLINE 0x0110
|
|
|
|
#define ADDRESS_CAJ_BODY 0x0014
|
|
|
|
|
|
|
|
#define ADDRESS_HN_PAGE 0x0090
|
|
|
|
#define ADDRESS_HN_OUTLINE 0x0158
|
|
|
|
|
|
|
|
#define ADDRESS_KDH_BODY 0x00fe
|
|
|
|
|
|
|
|
#define KEY_KDH "FZHMEI"
|
|
|
|
#define KEY_KDH_LENGTH 6
|
|
|
|
|
|
|
|
typedef struct _file_stat_t {
|
|
|
|
char type[4];
|
|
|
|
int32_t page;
|
|
|
|
int32_t outline;
|
|
|
|
} file_stat_t;
|
|
|
|
|
|
|
|
typedef struct _object_outline_t {
|
|
|
|
char title[256]; /* Starting at file_stat_t->outline + 4 */
|
|
|
|
char hierarchy[24];
|
|
|
|
char page[12];
|
|
|
|
char text[12];
|
|
|
|
int32_t depth;
|
|
|
|
struct _object_outline_t *next;
|
|
|
|
} object_outline_t;
|
|
|
|
|
|
|
|
typedef struct _object_outline_tree_t {
|
|
|
|
int id;
|
|
|
|
struct _object_outline_t *item;
|
|
|
|
struct _object_outline_tree_t *up;
|
|
|
|
struct _object_outline_tree_t *left;
|
|
|
|
struct _object_outline_tree_t *right;
|
|
|
|
} object_outline_tree_t;
|
|
|
|
|
2020-12-29 02:10:17 +00:00
|
|
|
typedef enum _hn_code {
|
2020-08-14 22:04:26 +00:00
|
|
|
CCITTFAX,
|
|
|
|
DCT_0,
|
2020-12-29 02:10:17 +00:00
|
|
|
DCT_1, /* Inverted */
|
2020-08-14 22:04:26 +00:00
|
|
|
JBIG2,
|
|
|
|
JPX
|
2020-12-29 02:10:17 +00:00
|
|
|
} hn_code;
|
2020-08-14 22:04:26 +00:00
|
|
|
|
2020-12-29 02:10:17 +00:00
|
|
|
typedef struct _hn_image_t {
|
|
|
|
int32_t format; /* hn_code */
|
|
|
|
int32_t address;
|
2020-08-14 22:04:26 +00:00
|
|
|
int32_t size;
|
2020-12-29 02:10:17 +00:00
|
|
|
char *image;
|
|
|
|
} hn_image_t;
|
|
|
|
|
|
|
|
typedef struct _object_hn_t {
|
|
|
|
int32_t address; /* Starting at end of object_outline_t */
|
|
|
|
int32_t text_size;
|
|
|
|
int16_t image_length;
|
|
|
|
int16_t page;
|
2020-12-30 03:09:00 +00:00
|
|
|
int32_t unknown[2]; /* TODO: what is it? */
|
2020-08-14 22:04:26 +00:00
|
|
|
char *text;
|
2020-12-29 02:10:17 +00:00
|
|
|
struct _hn_image_t *image_data;
|
|
|
|
struct _object_hn_t *next;
|
|
|
|
} object_hn_t;
|
2020-08-14 22:04:26 +00:00
|
|
|
|
|
|
|
typedef struct _cnki_t {
|
|
|
|
int stat;
|
|
|
|
int size_buf;
|
|
|
|
FILE *fp_i;
|
|
|
|
FILE *fp_o;
|
|
|
|
file_stat_t *file_stat;
|
|
|
|
object_outline_t *object_outline;
|
2020-12-29 02:10:17 +00:00
|
|
|
object_hn_t *object_hn;
|
2020-08-14 22:04:26 +00:00
|
|
|
} cnki_t;
|
|
|
|
|
|
|
|
/* cnki_pdf.c */
|
|
|
|
int cnki_pdf(cnki_t **param);
|
|
|
|
|
|
|
|
/* cnki_outline_tree.c */
|
|
|
|
int cnki_outline_tree(object_outline_tree_t **outline_tree,
|
|
|
|
object_outline_t **outline, int *ids);
|
|
|
|
|
2020-12-29 02:10:17 +00:00
|
|
|
/* cnki_zlib.c */
|
2020-12-30 03:09:00 +00:00
|
|
|
int cnki_zlib(char **dst, int *dst_size,
|
|
|
|
const char * restrict src, int src_size);
|
2020-12-29 02:10:17 +00:00
|
|
|
|
2020-08-14 22:04:26 +00:00
|
|
|
/* cnki_xml.c */
|
|
|
|
int cnki_xml(char **xml, FILE **fp);
|