Produce PDF directly from KDH.

This commit is contained in:
yzrh 2020-12-30 17:06:55 +00:00
parent 98691d4203
commit 1f62c53da6
7 changed files with 103 additions and 28 deletions

View file

@ -6,15 +6,11 @@ Melon: Converter that produces PDF from CNKI proprietary formats
Development
-----------
Currently, PDF, CAJ, and KDH can be converted. Please report
Currently, CAJ and KDH can be converted. Please report
any failures with a sample that can reproduce the behaviour.
HN support is being worked on.
KDH is essentially an invalid PDF file xor'ed with a predetermined key.
You may want to convert the decrypted KDH to valid PDF, although some
PDF readers can display the invalid PDF.
Usage
=====

View file

@ -27,6 +27,11 @@ cnki_kdh(cnki_t **param)
char buf[(*param)->size_buf];
FILE *tmp = tmpfile();
if (tmp == NULL)
return 1;
for (;;) {
fread(buf, (*param)->size_buf, 1, (*param)->fp_i);
@ -35,15 +40,27 @@ cnki_kdh(cnki_t **param)
key_cur++;
}
fwrite(buf, (*param)->size_buf, 1, (*param)->fp_o);
fwrite(buf, (*param)->size_buf, 1, tmp);
if (ftell((*param)->fp_i) == size)
break;
}
if ((*param)->stat > 0)
printf("Decryption ended total %ld byte(s) written\n",
ftell((*param)->fp_o));
printf("Decrypted %ld byte(s)\n", ftell(tmp));
fseek(tmp, 0, SEEK_SET);
FILE *orig = (*param)->fp_i;
(*param)->fp_i = tmp;
cnki_pdf(param);
(*param)->fp_i = orig;
fclose(tmp);
if ((*param)->stat > 0)
printf("Conversion ended\n");
return 0;
}

View file

@ -232,29 +232,63 @@ cnki_pdf(cnki_t **param)
}
if ((*param)->stat > 1)
printf("Generating '/Catalog' dictionary\n");
printf("Searching for catalog object\n");
snprintf(buf, 64,
"<<\n/Type /Catalog\n/Pages %d 0 R\n",
root);
strcat(dictionary, buf);
int catalog = pdf_get_catalog_id(&pdf);
if (catalog != 0) {
if ((*param)->stat > 0)
printf("catalog object is %d.\n", catalog);
} else {
if ((*param)->stat > 0)
printf("catalog object is missing\n");
if ((*param)->stat > 1)
printf("Generating catalog object\n");
if (ids != NULL) {
snprintf(buf, 64,
"/Outlines %d 0 R\n/PageMode /UseOutlines\n",
ids[0]);
"<<\n/Type /Catalog\n/Pages %d 0 R\n",
root);
strcat(dictionary, buf);
if (ids != NULL) {
snprintf(buf, 64,
"/Outlines %d 0 R\n/PageMode /UseOutlines\n",
ids[0]);
strcat(dictionary, buf);
}
strcat(dictionary, ">>\n");
pdf_obj_append(&pdf, 0, NULL, dictionary, NULL);
if ((*param)->stat > 0)
printf("Generated catalog object\n");
}
strcat(dictionary, ">>\n");
if ((*param)->stat > 1)
printf("Searching for xref object\n");
pdf_obj_append(&pdf, 0, NULL, dictionary, NULL);
int xref = pdf_get_xref_id(&pdf);
if (xref != 0) {
if ((*param)->stat > 0)
printf("xref object is %d.\n", xref);
if ((*param)->stat > 1)
printf("Deleting xref object\n");
pdf_obj_del(&pdf, xref);
if ((*param)->stat > 0)
printf("Deleted xref object\n");
} else {
if ((*param)->stat > 0)
printf("xref object is missing\n");
}
free(dictionary);
if ((*param)->stat > 0)
printf("Generated '/Catalog' dictionary\n");
if ((*param)->stat > 1)
printf("Sorting object(s)\n");
@ -312,7 +346,7 @@ cnki_pdf(cnki_t **param)
pdf_get_count(&pdf),
ftell((*param)->fp_o));
long xref = ftell((*param)->fp_o);
long cur_xref = ftell((*param)->fp_o);
if ((*param)->stat > 1)
printf("Writing cross-reference table\n");
@ -323,7 +357,7 @@ cnki_pdf(cnki_t **param)
} else {
if ((*param)->stat > 0)
printf("Cross-reference table %ld byte(s) written\n",
ftell((*param)->fp_o) - xref);
ftell((*param)->fp_o) - cur_xref);
}
if ((*param)->stat > 1)
@ -332,7 +366,7 @@ cnki_pdf(cnki_t **param)
if ((*param)->stat > 0)
cur = ftell((*param)->fp_o);
if (pdf_dump_trailer(&pdf, &(*param)->fp_o, xref) != 0) {
if (pdf_dump_trailer(&pdf, &(*param)->fp_o, cur_xref) != 0) {
if ((*param)->stat > 0)
printf("Trailer not written\n");
} else {

View file

@ -54,6 +54,7 @@ int pdf_get_size(pdf_object_t **pdf);
int pdf_get_free_id(pdf_object_t **pdf);
int pdf_get_free_ids(pdf_object_t **pdf, int **ids, int count);
int pdf_get_catalog_id(pdf_object_t **pdf);
int pdf_get_xref_id(pdf_object_t **pdf);
int pdf_get_parent_id(pdf_object_t **pdf, int **id);
int pdf_get_kid_id(pdf_object_t **pdf, int id, int **kid);
int pdf_get_kid_count(pdf_object_t **pdf, int id);

View file

@ -161,6 +161,27 @@ pdf_get_catalog_id(pdf_object_t **pdf)
return catalog_id;
}
int
pdf_get_xref_id(pdf_object_t **pdf)
{
if (*pdf == NULL)
return 1;
int xref_id = 0;
pdf_object_t *ptr = (*pdf)->next;
while (ptr != NULL) {
if (ptr->dictionary != NULL &&
strstr(ptr->dictionary, "/XRef") != NULL)
xref_id = ptr->id;
ptr = ptr->next;
}
return xref_id;
}
int
pdf_get_parent_id(pdf_object_t **pdf, int **id)
{

View file

@ -154,10 +154,16 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
if ((head = memmem(buf, ptr->size, "<<", 2)) != NULL &&
(tail = _memmem_whitespace(buf, ptr->size, ">>", 2)) != NULL) {
/* A dictionary object may have nested dictionary */
/*
* A dictionary object may have nested dictionary,
* but it should not be in a stream
*/
while ((tmp = _memmem_whitespace(tail + 2,
ptr->size - (tail - buf) - 2,
">>", 2)) != NULL)
">>", 2)) != NULL &&
memmem(tail + 2,
ptr->size - (tail - buf) - 2,
"stream\r\n", 8) == NULL)
tail = tmp;
ptr->dictionary_size = tail - head + 2;

View file

@ -27,10 +27,10 @@ pdf_dump_obj(pdf_object_t **pdf, FILE **fp)
fprintf(*fp, "%d 0 obj\n", ptr->id);
if (ptr->dictionary != NULL) {
fputs(ptr->dictionary, *fp);
fwrite(ptr->dictionary, ptr->dictionary_size, 1, *fp);
fputs("\n", *fp);
} else if (ptr->object != NULL) {
fputs(ptr->object, *fp);
fwrite(ptr->object, ptr->object_size, 1, *fp);
fputs("\n", *fp);
} else if (ptr->stream == NULL) {
fputs("null\n", *fp);