Handle headless HN and page with no image.
Signed-off-by: yzrh <yzrh@noema.org>
This commit is contained in:
parent
d2826fa075
commit
c2ad6549fb
6 changed files with 248 additions and 191 deletions
27
src/cnki.c
27
src/cnki.c
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
@ -54,6 +54,11 @@ cnki_destroy(cnki_t **param)
|
|||
object_hn_t *ptr_hn;
|
||||
while ((ptr_hn = (*param)->object_hn) != NULL) {
|
||||
(*param)->object_hn = (*param)->object_hn->next;
|
||||
free(ptr_hn->text);
|
||||
if (ptr_hn->image_data != NULL)
|
||||
for (int i = 0; i < ptr_hn->image_length; i++)
|
||||
free(ptr_hn->image_data[i].image);
|
||||
free(ptr_hn->image_data);
|
||||
free(ptr_hn);
|
||||
}
|
||||
|
||||
|
@ -71,12 +76,19 @@ cnki_info(cnki_t **param)
|
|||
printf("Reading file header at 0x%x\n", ADDRESS_HEAD);
|
||||
|
||||
int addr[2];
|
||||
unsigned char str[2];
|
||||
|
||||
fseek((*param)->fp_i, ADDRESS_HEAD, SEEK_SET);
|
||||
fread((*param)->file_stat->type, 4, 1, (*param)->fp_i);
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
fread(str, 2, 1, (*param)->fp_i);
|
||||
|
||||
if ((*param)->stat > 0) {
|
||||
if ((unsigned char) (*param)->file_stat->type[0] > 0x7f)
|
||||
printf("File type is '%02x'\n", (unsigned char) (*param)->file_stat->type[0]);
|
||||
else
|
||||
printf("File type is '%s'\n", (*param)->file_stat->type);
|
||||
}
|
||||
|
||||
if (strncmp((*param)->file_stat->type, "%PDF", 4) == 0) {
|
||||
return 0;
|
||||
|
@ -86,6 +98,9 @@ cnki_info(cnki_t **param)
|
|||
} else if (strncmp((*param)->file_stat->type, "HN", 2) == 0) {
|
||||
addr[0] = ADDRESS_HN_PAGE;
|
||||
addr[1] = ADDRESS_HN_OUTLINE;
|
||||
} else if ((unsigned char) (*param)->file_stat->type[0] == 0xc8) {
|
||||
addr[0] = ADDRESS_C8_PAGE;
|
||||
addr[1] = ADDRESS_HN_OUTLINE;
|
||||
} else if (strncmp((*param)->file_stat->type, "KDH ", 4) == 0) {
|
||||
return 0;
|
||||
} else {
|
||||
|
@ -102,6 +117,14 @@ cnki_info(cnki_t **param)
|
|||
printf("Advised %d page(s)\n",
|
||||
(*param)->file_stat->page);
|
||||
|
||||
if (strncmp((*param)->file_stat->type, "HN", 2) == 0 && str[0] == 0xc8 && str[1] == 0x00) {
|
||||
fseek((*param)->fp_i, 0xd8, SEEK_SET);
|
||||
return 0;
|
||||
} else if ((unsigned char) (*param)->file_stat->type[0] == 0xc8) {
|
||||
fseek((*param)->fp_i, 0x50, SEEK_SET);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Reading outline count at 0x%x\n", addr[1]);
|
||||
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
#define ADDRESS_HN_PAGE 0x0090
|
||||
#define ADDRESS_HN_OUTLINE 0x0158
|
||||
|
||||
#define ADDRESS_C8_PAGE 0x0008
|
||||
|
||||
#define ADDRESS_KDH_BODY 0x00fe
|
||||
|
||||
#define KEY_KDH "FZHMEI"
|
||||
|
@ -64,7 +66,8 @@ typedef struct _object_hn_t {
|
|||
int32_t text_size;
|
||||
int16_t image_length;
|
||||
int16_t page;
|
||||
int32_t unknown[2]; /* TODO: what is it? */
|
||||
int32_t unknown; /* TODO: what is it? */
|
||||
int32_t address_next;
|
||||
char *text;
|
||||
struct _hn_image_t *image_data;
|
||||
struct _object_hn_t *next;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
|
||||
* Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
@ -27,12 +27,13 @@ cnki_hn(cnki_t **param)
|
|||
|
||||
if ((*param)->stat > 1) {
|
||||
printf("Loading page(s)\n");
|
||||
printf("\t%8s\t%8s\t%6s\t%4s\t%16s\t%4s\t%8s\t%8s\n",
|
||||
printf("\t%8s\t%8s\t%6s\t%4s\t%8s\t%8s\t%4s\t%8s\t%8s\n",
|
||||
"address",
|
||||
"text",
|
||||
"length",
|
||||
"page",
|
||||
"unknown",
|
||||
"next",
|
||||
"code",
|
||||
"address",
|
||||
"image");
|
||||
|
@ -44,7 +45,8 @@ cnki_hn(cnki_t **param)
|
|||
fread(&ptr->text_size, 4, 1, (*param)->fp_i);
|
||||
fread(&ptr->image_length, 2, 1, (*param)->fp_i);
|
||||
fread(&ptr->page, 2, 1, (*param)->fp_i);
|
||||
fread(&ptr->unknown, 8, 1, (*param)->fp_i);
|
||||
fread(&ptr->unknown, 4, 1, (*param)->fp_i);
|
||||
fread(&ptr->address_next, 4, 1, (*param)->fp_i);
|
||||
|
||||
ptr->text = NULL;
|
||||
ptr->image_data = NULL;
|
||||
|
@ -62,6 +64,7 @@ cnki_hn(cnki_t **param)
|
|||
|
||||
ptr = (*param)->object_hn;
|
||||
while (ptr != NULL) {
|
||||
if (ptr->text_size > 0) {
|
||||
ptr->text = malloc(ptr->text_size);
|
||||
|
||||
if (ptr->text == NULL)
|
||||
|
@ -69,16 +72,18 @@ cnki_hn(cnki_t **param)
|
|||
|
||||
fseek((*param)->fp_i, ptr->address, SEEK_SET);
|
||||
fread(ptr->text, ptr->text_size, 1, (*param)->fp_i);
|
||||
}
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("\t%08x\t%8d\t%6d\t%4d\t{%4d, %8d}",
|
||||
printf("\t%08x\t%8d\t%6d\t%4d\t%8d\t%08x",
|
||||
ptr->address,
|
||||
ptr->text_size,
|
||||
ptr->image_length,
|
||||
ptr->page,
|
||||
ptr->unknown[0],
|
||||
ptr->unknown[1]);
|
||||
ptr->unknown,
|
||||
ptr->address_next);
|
||||
|
||||
if (ptr->image_length > 0) {
|
||||
ptr->image_data = malloc(ptr->image_length * sizeof(hn_image_t));
|
||||
|
||||
if (ptr->image_data == NULL)
|
||||
|
@ -111,7 +116,8 @@ cnki_hn(cnki_t **param)
|
|||
ptr->image_data[i].address,
|
||||
ptr->image_data[i].size);
|
||||
} else {
|
||||
printf("\t%8s\t%8s\t%6s\t%4s\t%16s\t%4d\t%08x\t%8d\n",
|
||||
printf("\t%8s\t%8s\t%6s\t%4s\t%8s\t%8s\t%4d\t%08x\t%8d\n",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
|
@ -123,6 +129,12 @@ cnki_hn(cnki_t **param)
|
|||
}
|
||||
}
|
||||
}
|
||||
} else if ((*param)->stat > 1) {
|
||||
printf("\t%4s\t%8s\t%8s\n",
|
||||
"",
|
||||
"",
|
||||
"");
|
||||
}
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
|
105
src/cnki_pdf.c
105
src/cnki_pdf.c
|
@ -481,7 +481,10 @@ cnki_pdf_hn(cnki_t **param)
|
|||
* page object
|
||||
*/
|
||||
int *ids = NULL;
|
||||
if (ptr->image_length > 0)
|
||||
pdf_get_free_ids(&pdf, &ids, ptr->image_length + 3);
|
||||
else
|
||||
pdf_get_free_ids(&pdf, &ids, 2);
|
||||
|
||||
int bitmap_size;
|
||||
char *bitmap;
|
||||
|
@ -489,10 +492,10 @@ cnki_pdf_hn(cnki_t **param)
|
|||
int stream_size;
|
||||
char *stream;
|
||||
|
||||
int *dim = malloc(2 * ptr->image_length * sizeof(int));
|
||||
int *dim;
|
||||
|
||||
int ret;
|
||||
int info[3];
|
||||
if (ptr->image_length > 0) {
|
||||
dim = malloc(2 * ptr->image_length * sizeof(int));
|
||||
|
||||
if (dim == NULL) {
|
||||
free(root_kid);
|
||||
|
@ -509,6 +512,10 @@ cnki_pdf_hn(cnki_t **param)
|
|||
free(dim);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
int ret;
|
||||
int info[3];
|
||||
|
||||
for (int i = 0; i < ptr->image_length; i++) {
|
||||
memset(dictionary, 0, dictionary_size);
|
||||
|
@ -684,6 +691,7 @@ cnki_pdf_hn(cnki_t **param)
|
|||
}
|
||||
}
|
||||
|
||||
if (ptr->image_length > 0) {
|
||||
memset(dictionary, 0, dictionary_size);
|
||||
|
||||
strcat(dictionary, "<<\n/XObject <<");
|
||||
|
@ -701,49 +709,24 @@ cnki_pdf_hn(cnki_t **param)
|
|||
pdf_obj_append(&pdf, ids[ptr->image_length], NULL, dictionary, NULL, 0);
|
||||
|
||||
free(dictionary);
|
||||
}
|
||||
|
||||
int conv_size;
|
||||
char *conv_dst;
|
||||
char conv_src[2];
|
||||
char conv_hex[3];
|
||||
|
||||
if (strncmp(ptr->text + 8, "COMPRESSTEXT", 12) == 0) {
|
||||
if (ptr->text_size > 0) {
|
||||
if (strncmp(ptr->text + 8, "COMPRESSTEXT", 12) == 0 ||
|
||||
strncmp(ptr->text, "COMPRESSTEXT", 12) == 0) {
|
||||
cnki_zlib(&stream, &stream_size, ptr->text, ptr->text_size);
|
||||
|
||||
dictionary_size = 64 + 2 * stream_size;
|
||||
dictionary = malloc(dictionary_size);
|
||||
free(ptr->text);
|
||||
|
||||
if (dictionary == NULL) {
|
||||
free(root_kid);
|
||||
free(ids);
|
||||
free(dim);
|
||||
return 1;
|
||||
ptr->text_size = stream_size;
|
||||
ptr->text = stream;
|
||||
}
|
||||
|
||||
memset(dictionary, 0, dictionary_size);
|
||||
|
||||
strcat(dictionary, "<feff");
|
||||
|
||||
for (int i = 0; i < stream_size; i += 16) {
|
||||
conv_src[0] = stream[i + 7];
|
||||
conv_src[1] = stream[i + 6];
|
||||
|
||||
conv_size = 6;
|
||||
|
||||
if (strconv(&conv_dst, "UTF-16BE",
|
||||
conv_src, "GB18030", &conv_size) == 0) {
|
||||
for (int j = 0; j < conv_size - 2; j++) {
|
||||
snprintf(conv_hex, 3,
|
||||
"%02x", (unsigned char) conv_dst[j]);
|
||||
strcat(dictionary, conv_hex);
|
||||
}
|
||||
free(conv_dst);
|
||||
}
|
||||
}
|
||||
free(stream);
|
||||
|
||||
strcat(dictionary, ">");
|
||||
} else {
|
||||
dictionary_size = 64 + 2 * ptr->text_size;
|
||||
dictionary = malloc(dictionary_size);
|
||||
|
||||
|
@ -758,9 +741,26 @@ cnki_pdf_hn(cnki_t **param)
|
|||
|
||||
strcat(dictionary, "<feff");
|
||||
|
||||
for (int i = 0; i < ptr->text_size; i += 4) {
|
||||
conv_src[0] = ptr->text[i + 3];
|
||||
conv_src[1] = ptr->text[i + 2];
|
||||
for (int i = 0; i < ptr->text_size; i += 6) {
|
||||
if (i + 5 >= ptr->text_size)
|
||||
break;
|
||||
|
||||
conv_src[0] = ptr->text[i + 5];
|
||||
conv_src[1] = ptr->text[i + 4];
|
||||
|
||||
if ((conv_src[0] << 8 | conv_src[1]) == 0xa389) {
|
||||
strcat(dictionary, "a389");
|
||||
continue;
|
||||
} else if ((conv_src[0] << 8 | conv_src[1]) == 0xa38a) {
|
||||
strcat(dictionary, "a38a");
|
||||
continue;
|
||||
} else if ((conv_src[0] << 8 | conv_src[1]) == 0xa38d) {
|
||||
strcat(dictionary, "a38d");
|
||||
continue;
|
||||
} else if ((conv_src[0] << 8 | conv_src[1]) == 0xa3a0) {
|
||||
strcat(dictionary, "a3a0");
|
||||
continue;
|
||||
}
|
||||
|
||||
conv_size = 6;
|
||||
|
||||
|
@ -776,12 +776,12 @@ cnki_pdf_hn(cnki_t **param)
|
|||
}
|
||||
|
||||
strcat(dictionary, ">");
|
||||
}
|
||||
|
||||
/* FIXME: Use the text somehow? */
|
||||
free(dictionary);
|
||||
}
|
||||
|
||||
dictionary_size = 64 + 64 * ptr->image_length;
|
||||
dictionary_size = 64 + 128 * ptr->image_length;
|
||||
dictionary = malloc(dictionary_size);
|
||||
|
||||
if (dictionary == NULL) {
|
||||
|
@ -791,6 +791,7 @@ cnki_pdf_hn(cnki_t **param)
|
|||
return 1;
|
||||
}
|
||||
|
||||
if (ptr->image_length > 0) {
|
||||
memset(dictionary, 0, dictionary_size);
|
||||
|
||||
strcat(dictionary, "q\n");
|
||||
|
@ -858,29 +859,41 @@ cnki_pdf_hn(cnki_t **param)
|
|||
NULL, dictionary, stream, stream_size);
|
||||
|
||||
free(stream);
|
||||
}
|
||||
|
||||
memset(dictionary, 0, dictionary_size);
|
||||
|
||||
strcat(dictionary, "<<\n/Type /Page\n");
|
||||
|
||||
/* A4 paper */
|
||||
strcat(dictionary, "/MediaBox [0 0 595.276 841.89]\n");
|
||||
|
||||
if (ptr->image_length > 0) {
|
||||
free(dim);
|
||||
|
||||
snprintf(buf, 64, "/Resources %d 0 R\n", ids[ptr->image_length]);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
snprintf(buf, 64, "/Contents %d 0 R\n", ids[ptr->image_length + 1]);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
/* A4 paper */
|
||||
strcat(dictionary, "/MediaBox [0 0 595.276 841.89]\n");
|
||||
|
||||
/* Add /Parent when we know root */
|
||||
pdf_obj_append(&pdf, ids[ptr->image_length + 2], NULL, dictionary, NULL, 0);
|
||||
|
||||
root_kid[cnt++] = ids[ptr->image_length + 2];
|
||||
} else {
|
||||
snprintf(buf, 64, "/Contents %d 0 R\n", ids[ptr->image_length]);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
/* Add /Parent when we know root */
|
||||
pdf_obj_append(&pdf, ids[ptr->image_length + 1], NULL, dictionary, NULL, 0);
|
||||
|
||||
root_kid[cnt++] = ids[ptr->image_length + 1];
|
||||
}
|
||||
|
||||
free(dictionary);
|
||||
|
||||
root_kid[cnt++] = ids[ptr->image_length + 2];
|
||||
|
||||
free(ids);
|
||||
free(dim);
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
|
|
@ -13,12 +13,17 @@ int
|
|||
cnki_zlib(char **dst, int *dst_size,
|
||||
const char * restrict src, int src_size)
|
||||
{
|
||||
uint8_t padding = 0;
|
||||
int32_t size;
|
||||
memcpy(&size, src + 20, 4);
|
||||
|
||||
if (strncmp(src + 8, "COMPRESSTEXT", 12) == 0)
|
||||
padding = 8;
|
||||
|
||||
memcpy(&size, src + 12 + padding, 4);
|
||||
|
||||
*dst_size = size;
|
||||
|
||||
if (strinflate(dst, size, src + 24, src_size - 24) != 0)
|
||||
if (strinflate(dst, size, src + 16 + padding, src_size - 16 - padding) != 0)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -98,7 +98,8 @@ main(int argc, char **argv)
|
|||
strerror(errno));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
} else if (strncmp(param->file_stat->type, "HN", 2) == 0) {
|
||||
} else if (strncmp(param->file_stat->type, "HN", 2) == 0 ||
|
||||
(unsigned char) param->file_stat->type[0] == 0xc8) {
|
||||
if (cnki_hn(¶m) != 0) {
|
||||
fprintf(stderr, "%s: %s\n", argv[0],
|
||||
strerror(errno));
|
||||
|
|
Loading…
Reference in a new issue