Produce PDF directly from KDH.
This commit is contained in:
parent
98691d4203
commit
1f62c53da6
7 changed files with 103 additions and 28 deletions
|
@ -6,15 +6,11 @@ Melon: Converter that produces PDF from CNKI proprietary formats
|
|||
Development
|
||||
-----------
|
||||
|
||||
Currently, PDF, CAJ, and KDH can be converted. Please report
|
||||
Currently, CAJ and KDH can be converted. Please report
|
||||
any failures with a sample that can reproduce the behaviour.
|
||||
|
||||
HN support is being worked on.
|
||||
|
||||
KDH is essentially an invalid PDF file xor'ed with a predetermined key.
|
||||
You may want to convert the decrypted KDH to valid PDF, although some
|
||||
PDF readers can display the invalid PDF.
|
||||
|
||||
Usage
|
||||
=====
|
||||
|
||||
|
|
|
@ -27,6 +27,11 @@ cnki_kdh(cnki_t **param)
|
|||
|
||||
char buf[(*param)->size_buf];
|
||||
|
||||
FILE *tmp = tmpfile();
|
||||
|
||||
if (tmp == NULL)
|
||||
return 1;
|
||||
|
||||
for (;;) {
|
||||
fread(buf, (*param)->size_buf, 1, (*param)->fp_i);
|
||||
|
||||
|
@ -35,15 +40,27 @@ cnki_kdh(cnki_t **param)
|
|||
key_cur++;
|
||||
}
|
||||
|
||||
fwrite(buf, (*param)->size_buf, 1, (*param)->fp_o);
|
||||
fwrite(buf, (*param)->size_buf, 1, tmp);
|
||||
|
||||
if (ftell((*param)->fp_i) == size)
|
||||
break;
|
||||
}
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Decryption ended total %ld byte(s) written\n",
|
||||
ftell((*param)->fp_o));
|
||||
printf("Decrypted %ld byte(s)\n", ftell(tmp));
|
||||
|
||||
fseek(tmp, 0, SEEK_SET);
|
||||
|
||||
FILE *orig = (*param)->fp_i;
|
||||
(*param)->fp_i = tmp;
|
||||
|
||||
cnki_pdf(param);
|
||||
|
||||
(*param)->fp_i = orig;
|
||||
fclose(tmp);
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Conversion ended\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -232,29 +232,63 @@ cnki_pdf(cnki_t **param)
|
|||
}
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Generating '/Catalog' dictionary\n");
|
||||
printf("Searching for catalog object\n");
|
||||
|
||||
snprintf(buf, 64,
|
||||
"<<\n/Type /Catalog\n/Pages %d 0 R\n",
|
||||
root);
|
||||
strcat(dictionary, buf);
|
||||
int catalog = pdf_get_catalog_id(&pdf);
|
||||
|
||||
if (catalog != 0) {
|
||||
if ((*param)->stat > 0)
|
||||
printf("catalog object is %d.\n", catalog);
|
||||
} else {
|
||||
if ((*param)->stat > 0)
|
||||
printf("catalog object is missing\n");
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Generating catalog object\n");
|
||||
|
||||
if (ids != NULL) {
|
||||
snprintf(buf, 64,
|
||||
"/Outlines %d 0 R\n/PageMode /UseOutlines\n",
|
||||
ids[0]);
|
||||
"<<\n/Type /Catalog\n/Pages %d 0 R\n",
|
||||
root);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
if (ids != NULL) {
|
||||
snprintf(buf, 64,
|
||||
"/Outlines %d 0 R\n/PageMode /UseOutlines\n",
|
||||
ids[0]);
|
||||
strcat(dictionary, buf);
|
||||
}
|
||||
|
||||
strcat(dictionary, ">>\n");
|
||||
|
||||
pdf_obj_append(&pdf, 0, NULL, dictionary, NULL);
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Generated catalog object\n");
|
||||
}
|
||||
|
||||
strcat(dictionary, ">>\n");
|
||||
if ((*param)->stat > 1)
|
||||
printf("Searching for xref object\n");
|
||||
|
||||
pdf_obj_append(&pdf, 0, NULL, dictionary, NULL);
|
||||
int xref = pdf_get_xref_id(&pdf);
|
||||
|
||||
if (xref != 0) {
|
||||
if ((*param)->stat > 0)
|
||||
printf("xref object is %d.\n", xref);
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Deleting xref object\n");
|
||||
|
||||
pdf_obj_del(&pdf, xref);
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Deleted xref object\n");
|
||||
} else {
|
||||
if ((*param)->stat > 0)
|
||||
printf("xref object is missing\n");
|
||||
}
|
||||
|
||||
free(dictionary);
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Generated '/Catalog' dictionary\n");
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Sorting object(s)\n");
|
||||
|
||||
|
@ -312,7 +346,7 @@ cnki_pdf(cnki_t **param)
|
|||
pdf_get_count(&pdf),
|
||||
ftell((*param)->fp_o));
|
||||
|
||||
long xref = ftell((*param)->fp_o);
|
||||
long cur_xref = ftell((*param)->fp_o);
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Writing cross-reference table\n");
|
||||
|
@ -323,7 +357,7 @@ cnki_pdf(cnki_t **param)
|
|||
} else {
|
||||
if ((*param)->stat > 0)
|
||||
printf("Cross-reference table %ld byte(s) written\n",
|
||||
ftell((*param)->fp_o) - xref);
|
||||
ftell((*param)->fp_o) - cur_xref);
|
||||
}
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
|
@ -332,7 +366,7 @@ cnki_pdf(cnki_t **param)
|
|||
if ((*param)->stat > 0)
|
||||
cur = ftell((*param)->fp_o);
|
||||
|
||||
if (pdf_dump_trailer(&pdf, &(*param)->fp_o, xref) != 0) {
|
||||
if (pdf_dump_trailer(&pdf, &(*param)->fp_o, cur_xref) != 0) {
|
||||
if ((*param)->stat > 0)
|
||||
printf("Trailer not written\n");
|
||||
} else {
|
||||
|
|
|
@ -54,6 +54,7 @@ int pdf_get_size(pdf_object_t **pdf);
|
|||
int pdf_get_free_id(pdf_object_t **pdf);
|
||||
int pdf_get_free_ids(pdf_object_t **pdf, int **ids, int count);
|
||||
int pdf_get_catalog_id(pdf_object_t **pdf);
|
||||
int pdf_get_xref_id(pdf_object_t **pdf);
|
||||
int pdf_get_parent_id(pdf_object_t **pdf, int **id);
|
||||
int pdf_get_kid_id(pdf_object_t **pdf, int id, int **kid);
|
||||
int pdf_get_kid_count(pdf_object_t **pdf, int id);
|
||||
|
|
|
@ -161,6 +161,27 @@ pdf_get_catalog_id(pdf_object_t **pdf)
|
|||
return catalog_id;
|
||||
}
|
||||
|
||||
int
|
||||
pdf_get_xref_id(pdf_object_t **pdf)
|
||||
{
|
||||
if (*pdf == NULL)
|
||||
return 1;
|
||||
|
||||
int xref_id = 0;
|
||||
|
||||
pdf_object_t *ptr = (*pdf)->next;
|
||||
|
||||
while (ptr != NULL) {
|
||||
if (ptr->dictionary != NULL &&
|
||||
strstr(ptr->dictionary, "/XRef") != NULL)
|
||||
xref_id = ptr->id;
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
return xref_id;
|
||||
}
|
||||
|
||||
int
|
||||
pdf_get_parent_id(pdf_object_t **pdf, int **id)
|
||||
{
|
||||
|
|
|
@ -154,10 +154,16 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
|
|||
|
||||
if ((head = memmem(buf, ptr->size, "<<", 2)) != NULL &&
|
||||
(tail = _memmem_whitespace(buf, ptr->size, ">>", 2)) != NULL) {
|
||||
/* A dictionary object may have nested dictionary */
|
||||
/*
|
||||
* A dictionary object may have nested dictionary,
|
||||
* but it should not be in a stream
|
||||
*/
|
||||
while ((tmp = _memmem_whitespace(tail + 2,
|
||||
ptr->size - (tail - buf) - 2,
|
||||
">>", 2)) != NULL)
|
||||
">>", 2)) != NULL &&
|
||||
memmem(tail + 2,
|
||||
ptr->size - (tail - buf) - 2,
|
||||
"stream\r\n", 8) == NULL)
|
||||
tail = tmp;
|
||||
|
||||
ptr->dictionary_size = tail - head + 2;
|
||||
|
|
|
@ -27,10 +27,10 @@ pdf_dump_obj(pdf_object_t **pdf, FILE **fp)
|
|||
fprintf(*fp, "%d 0 obj\n", ptr->id);
|
||||
|
||||
if (ptr->dictionary != NULL) {
|
||||
fputs(ptr->dictionary, *fp);
|
||||
fwrite(ptr->dictionary, ptr->dictionary_size, 1, *fp);
|
||||
fputs("\n", *fp);
|
||||
} else if (ptr->object != NULL) {
|
||||
fputs(ptr->object, *fp);
|
||||
fwrite(ptr->object, ptr->object_size, 1, *fp);
|
||||
fputs("\n", *fp);
|
||||
} else if (ptr->stream == NULL) {
|
||||
fputs("null\n", *fp);
|
||||
|
|
Loading…
Reference in a new issue