Produce PDF directly from KDH.

This commit is contained in:
yzrh 2020-12-30 17:06:55 +00:00
parent 98691d4203
commit 1f62c53da6
7 changed files with 103 additions and 28 deletions

View file

@ -6,15 +6,11 @@ Melon: Converter that produces PDF from CNKI proprietary formats
Development Development
----------- -----------
Currently, PDF, CAJ, and KDH can be converted. Please report Currently, CAJ and KDH can be converted. Please report
any failures with a sample that can reproduce the behaviour. any failures with a sample that can reproduce the behaviour.
HN support is being worked on. HN support is being worked on.
KDH is essentially an invalid PDF file xor'ed with a predetermined key.
You may want to convert the decrypted KDH to valid PDF, although some
PDF readers can display the invalid PDF.
Usage Usage
===== =====

View file

@ -27,6 +27,11 @@ cnki_kdh(cnki_t **param)
char buf[(*param)->size_buf]; char buf[(*param)->size_buf];
FILE *tmp = tmpfile();
if (tmp == NULL)
return 1;
for (;;) { for (;;) {
fread(buf, (*param)->size_buf, 1, (*param)->fp_i); fread(buf, (*param)->size_buf, 1, (*param)->fp_i);
@ -35,15 +40,27 @@ cnki_kdh(cnki_t **param)
key_cur++; key_cur++;
} }
fwrite(buf, (*param)->size_buf, 1, (*param)->fp_o); fwrite(buf, (*param)->size_buf, 1, tmp);
if (ftell((*param)->fp_i) == size) if (ftell((*param)->fp_i) == size)
break; break;
} }
if ((*param)->stat > 0) if ((*param)->stat > 0)
printf("Decryption ended total %ld byte(s) written\n", printf("Decrypted %ld byte(s)\n", ftell(tmp));
ftell((*param)->fp_o));
fseek(tmp, 0, SEEK_SET);
FILE *orig = (*param)->fp_i;
(*param)->fp_i = tmp;
cnki_pdf(param);
(*param)->fp_i = orig;
fclose(tmp);
if ((*param)->stat > 0)
printf("Conversion ended\n");
return 0; return 0;
} }

View file

@ -232,7 +232,19 @@ cnki_pdf(cnki_t **param)
} }
if ((*param)->stat > 1) if ((*param)->stat > 1)
printf("Generating '/Catalog' dictionary\n"); printf("Searching for catalog object\n");
int catalog = pdf_get_catalog_id(&pdf);
if (catalog != 0) {
if ((*param)->stat > 0)
printf("catalog object is %d.\n", catalog);
} else {
if ((*param)->stat > 0)
printf("catalog object is missing\n");
if ((*param)->stat > 1)
printf("Generating catalog object\n");
snprintf(buf, 64, snprintf(buf, 64,
"<<\n/Type /Catalog\n/Pages %d 0 R\n", "<<\n/Type /Catalog\n/Pages %d 0 R\n",
@ -250,10 +262,32 @@ cnki_pdf(cnki_t **param)
pdf_obj_append(&pdf, 0, NULL, dictionary, NULL); pdf_obj_append(&pdf, 0, NULL, dictionary, NULL);
free(dictionary); if ((*param)->stat > 0)
printf("Generated catalog object\n");
}
if ((*param)->stat > 1)
printf("Searching for xref object\n");
int xref = pdf_get_xref_id(&pdf);
if (xref != 0) {
if ((*param)->stat > 0)
printf("xref object is %d.\n", xref);
if ((*param)->stat > 1)
printf("Deleting xref object\n");
pdf_obj_del(&pdf, xref);
if ((*param)->stat > 0) if ((*param)->stat > 0)
printf("Generated '/Catalog' dictionary\n"); printf("Deleted xref object\n");
} else {
if ((*param)->stat > 0)
printf("xref object is missing\n");
}
free(dictionary);
if ((*param)->stat > 1) if ((*param)->stat > 1)
printf("Sorting object(s)\n"); printf("Sorting object(s)\n");
@ -312,7 +346,7 @@ cnki_pdf(cnki_t **param)
pdf_get_count(&pdf), pdf_get_count(&pdf),
ftell((*param)->fp_o)); ftell((*param)->fp_o));
long xref = ftell((*param)->fp_o); long cur_xref = ftell((*param)->fp_o);
if ((*param)->stat > 1) if ((*param)->stat > 1)
printf("Writing cross-reference table\n"); printf("Writing cross-reference table\n");
@ -323,7 +357,7 @@ cnki_pdf(cnki_t **param)
} else { } else {
if ((*param)->stat > 0) if ((*param)->stat > 0)
printf("Cross-reference table %ld byte(s) written\n", printf("Cross-reference table %ld byte(s) written\n",
ftell((*param)->fp_o) - xref); ftell((*param)->fp_o) - cur_xref);
} }
if ((*param)->stat > 1) if ((*param)->stat > 1)
@ -332,7 +366,7 @@ cnki_pdf(cnki_t **param)
if ((*param)->stat > 0) if ((*param)->stat > 0)
cur = ftell((*param)->fp_o); cur = ftell((*param)->fp_o);
if (pdf_dump_trailer(&pdf, &(*param)->fp_o, xref) != 0) { if (pdf_dump_trailer(&pdf, &(*param)->fp_o, cur_xref) != 0) {
if ((*param)->stat > 0) if ((*param)->stat > 0)
printf("Trailer not written\n"); printf("Trailer not written\n");
} else { } else {

View file

@ -54,6 +54,7 @@ int pdf_get_size(pdf_object_t **pdf);
int pdf_get_free_id(pdf_object_t **pdf); int pdf_get_free_id(pdf_object_t **pdf);
int pdf_get_free_ids(pdf_object_t **pdf, int **ids, int count); int pdf_get_free_ids(pdf_object_t **pdf, int **ids, int count);
int pdf_get_catalog_id(pdf_object_t **pdf); int pdf_get_catalog_id(pdf_object_t **pdf);
int pdf_get_xref_id(pdf_object_t **pdf);
int pdf_get_parent_id(pdf_object_t **pdf, int **id); int pdf_get_parent_id(pdf_object_t **pdf, int **id);
int pdf_get_kid_id(pdf_object_t **pdf, int id, int **kid); int pdf_get_kid_id(pdf_object_t **pdf, int id, int **kid);
int pdf_get_kid_count(pdf_object_t **pdf, int id); int pdf_get_kid_count(pdf_object_t **pdf, int id);

View file

@ -161,6 +161,27 @@ pdf_get_catalog_id(pdf_object_t **pdf)
return catalog_id; return catalog_id;
} }
int
pdf_get_xref_id(pdf_object_t **pdf)
{
if (*pdf == NULL)
return 1;
int xref_id = 0;
pdf_object_t *ptr = (*pdf)->next;
while (ptr != NULL) {
if (ptr->dictionary != NULL &&
strstr(ptr->dictionary, "/XRef") != NULL)
xref_id = ptr->id;
ptr = ptr->next;
}
return xref_id;
}
int int
pdf_get_parent_id(pdf_object_t **pdf, int **id) pdf_get_parent_id(pdf_object_t **pdf, int **id)
{ {

View file

@ -154,10 +154,16 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
if ((head = memmem(buf, ptr->size, "<<", 2)) != NULL && if ((head = memmem(buf, ptr->size, "<<", 2)) != NULL &&
(tail = _memmem_whitespace(buf, ptr->size, ">>", 2)) != NULL) { (tail = _memmem_whitespace(buf, ptr->size, ">>", 2)) != NULL) {
/* A dictionary object may have nested dictionary */ /*
* A dictionary object may have nested dictionary,
* but it should not be in a stream
*/
while ((tmp = _memmem_whitespace(tail + 2, while ((tmp = _memmem_whitespace(tail + 2,
ptr->size - (tail - buf) - 2, ptr->size - (tail - buf) - 2,
">>", 2)) != NULL) ">>", 2)) != NULL &&
memmem(tail + 2,
ptr->size - (tail - buf) - 2,
"stream\r\n", 8) == NULL)
tail = tmp; tail = tmp;
ptr->dictionary_size = tail - head + 2; ptr->dictionary_size = tail - head + 2;

View file

@ -27,10 +27,10 @@ pdf_dump_obj(pdf_object_t **pdf, FILE **fp)
fprintf(*fp, "%d 0 obj\n", ptr->id); fprintf(*fp, "%d 0 obj\n", ptr->id);
if (ptr->dictionary != NULL) { if (ptr->dictionary != NULL) {
fputs(ptr->dictionary, *fp); fwrite(ptr->dictionary, ptr->dictionary_size, 1, *fp);
fputs("\n", *fp); fputs("\n", *fp);
} else if (ptr->object != NULL) { } else if (ptr->object != NULL) {
fputs(ptr->object, *fp); fwrite(ptr->object, ptr->object_size, 1, *fp);
fputs("\n", *fp); fputs("\n", *fp);
} else if (ptr->stream == NULL) { } else if (ptr->stream == NULL) {
fputs("null\n", *fp); fputs("null\n", *fp);