Produce PDF directly from KDH.
This commit is contained in:
parent
98691d4203
commit
1f62c53da6
7 changed files with 103 additions and 28 deletions
|
@ -6,15 +6,11 @@ Melon: Converter that produces PDF from CNKI proprietary formats
|
||||||
Development
|
Development
|
||||||
-----------
|
-----------
|
||||||
|
|
||||||
Currently, PDF, CAJ, and KDH can be converted. Please report
|
Currently, CAJ and KDH can be converted. Please report
|
||||||
any failures with a sample that can reproduce the behaviour.
|
any failures with a sample that can reproduce the behaviour.
|
||||||
|
|
||||||
HN support is being worked on.
|
HN support is being worked on.
|
||||||
|
|
||||||
KDH is essentially an invalid PDF file xor'ed with a predetermined key.
|
|
||||||
You may want to convert the decrypted KDH to valid PDF, although some
|
|
||||||
PDF readers can display the invalid PDF.
|
|
||||||
|
|
||||||
Usage
|
Usage
|
||||||
=====
|
=====
|
||||||
|
|
||||||
|
|
|
@ -27,6 +27,11 @@ cnki_kdh(cnki_t **param)
|
||||||
|
|
||||||
char buf[(*param)->size_buf];
|
char buf[(*param)->size_buf];
|
||||||
|
|
||||||
|
FILE *tmp = tmpfile();
|
||||||
|
|
||||||
|
if (tmp == NULL)
|
||||||
|
return 1;
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
fread(buf, (*param)->size_buf, 1, (*param)->fp_i);
|
fread(buf, (*param)->size_buf, 1, (*param)->fp_i);
|
||||||
|
|
||||||
|
@ -35,15 +40,27 @@ cnki_kdh(cnki_t **param)
|
||||||
key_cur++;
|
key_cur++;
|
||||||
}
|
}
|
||||||
|
|
||||||
fwrite(buf, (*param)->size_buf, 1, (*param)->fp_o);
|
fwrite(buf, (*param)->size_buf, 1, tmp);
|
||||||
|
|
||||||
if (ftell((*param)->fp_i) == size)
|
if (ftell((*param)->fp_i) == size)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((*param)->stat > 0)
|
if ((*param)->stat > 0)
|
||||||
printf("Decryption ended total %ld byte(s) written\n",
|
printf("Decrypted %ld byte(s)\n", ftell(tmp));
|
||||||
ftell((*param)->fp_o));
|
|
||||||
|
fseek(tmp, 0, SEEK_SET);
|
||||||
|
|
||||||
|
FILE *orig = (*param)->fp_i;
|
||||||
|
(*param)->fp_i = tmp;
|
||||||
|
|
||||||
|
cnki_pdf(param);
|
||||||
|
|
||||||
|
(*param)->fp_i = orig;
|
||||||
|
fclose(tmp);
|
||||||
|
|
||||||
|
if ((*param)->stat > 0)
|
||||||
|
printf("Conversion ended\n");
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -232,7 +232,19 @@ cnki_pdf(cnki_t **param)
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((*param)->stat > 1)
|
if ((*param)->stat > 1)
|
||||||
printf("Generating '/Catalog' dictionary\n");
|
printf("Searching for catalog object\n");
|
||||||
|
|
||||||
|
int catalog = pdf_get_catalog_id(&pdf);
|
||||||
|
|
||||||
|
if (catalog != 0) {
|
||||||
|
if ((*param)->stat > 0)
|
||||||
|
printf("catalog object is %d.\n", catalog);
|
||||||
|
} else {
|
||||||
|
if ((*param)->stat > 0)
|
||||||
|
printf("catalog object is missing\n");
|
||||||
|
|
||||||
|
if ((*param)->stat > 1)
|
||||||
|
printf("Generating catalog object\n");
|
||||||
|
|
||||||
snprintf(buf, 64,
|
snprintf(buf, 64,
|
||||||
"<<\n/Type /Catalog\n/Pages %d 0 R\n",
|
"<<\n/Type /Catalog\n/Pages %d 0 R\n",
|
||||||
|
@ -250,10 +262,32 @@ cnki_pdf(cnki_t **param)
|
||||||
|
|
||||||
pdf_obj_append(&pdf, 0, NULL, dictionary, NULL);
|
pdf_obj_append(&pdf, 0, NULL, dictionary, NULL);
|
||||||
|
|
||||||
free(dictionary);
|
if ((*param)->stat > 0)
|
||||||
|
printf("Generated catalog object\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((*param)->stat > 1)
|
||||||
|
printf("Searching for xref object\n");
|
||||||
|
|
||||||
|
int xref = pdf_get_xref_id(&pdf);
|
||||||
|
|
||||||
|
if (xref != 0) {
|
||||||
|
if ((*param)->stat > 0)
|
||||||
|
printf("xref object is %d.\n", xref);
|
||||||
|
|
||||||
|
if ((*param)->stat > 1)
|
||||||
|
printf("Deleting xref object\n");
|
||||||
|
|
||||||
|
pdf_obj_del(&pdf, xref);
|
||||||
|
|
||||||
if ((*param)->stat > 0)
|
if ((*param)->stat > 0)
|
||||||
printf("Generated '/Catalog' dictionary\n");
|
printf("Deleted xref object\n");
|
||||||
|
} else {
|
||||||
|
if ((*param)->stat > 0)
|
||||||
|
printf("xref object is missing\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
free(dictionary);
|
||||||
|
|
||||||
if ((*param)->stat > 1)
|
if ((*param)->stat > 1)
|
||||||
printf("Sorting object(s)\n");
|
printf("Sorting object(s)\n");
|
||||||
|
@ -312,7 +346,7 @@ cnki_pdf(cnki_t **param)
|
||||||
pdf_get_count(&pdf),
|
pdf_get_count(&pdf),
|
||||||
ftell((*param)->fp_o));
|
ftell((*param)->fp_o));
|
||||||
|
|
||||||
long xref = ftell((*param)->fp_o);
|
long cur_xref = ftell((*param)->fp_o);
|
||||||
|
|
||||||
if ((*param)->stat > 1)
|
if ((*param)->stat > 1)
|
||||||
printf("Writing cross-reference table\n");
|
printf("Writing cross-reference table\n");
|
||||||
|
@ -323,7 +357,7 @@ cnki_pdf(cnki_t **param)
|
||||||
} else {
|
} else {
|
||||||
if ((*param)->stat > 0)
|
if ((*param)->stat > 0)
|
||||||
printf("Cross-reference table %ld byte(s) written\n",
|
printf("Cross-reference table %ld byte(s) written\n",
|
||||||
ftell((*param)->fp_o) - xref);
|
ftell((*param)->fp_o) - cur_xref);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((*param)->stat > 1)
|
if ((*param)->stat > 1)
|
||||||
|
@ -332,7 +366,7 @@ cnki_pdf(cnki_t **param)
|
||||||
if ((*param)->stat > 0)
|
if ((*param)->stat > 0)
|
||||||
cur = ftell((*param)->fp_o);
|
cur = ftell((*param)->fp_o);
|
||||||
|
|
||||||
if (pdf_dump_trailer(&pdf, &(*param)->fp_o, xref) != 0) {
|
if (pdf_dump_trailer(&pdf, &(*param)->fp_o, cur_xref) != 0) {
|
||||||
if ((*param)->stat > 0)
|
if ((*param)->stat > 0)
|
||||||
printf("Trailer not written\n");
|
printf("Trailer not written\n");
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -54,6 +54,7 @@ int pdf_get_size(pdf_object_t **pdf);
|
||||||
int pdf_get_free_id(pdf_object_t **pdf);
|
int pdf_get_free_id(pdf_object_t **pdf);
|
||||||
int pdf_get_free_ids(pdf_object_t **pdf, int **ids, int count);
|
int pdf_get_free_ids(pdf_object_t **pdf, int **ids, int count);
|
||||||
int pdf_get_catalog_id(pdf_object_t **pdf);
|
int pdf_get_catalog_id(pdf_object_t **pdf);
|
||||||
|
int pdf_get_xref_id(pdf_object_t **pdf);
|
||||||
int pdf_get_parent_id(pdf_object_t **pdf, int **id);
|
int pdf_get_parent_id(pdf_object_t **pdf, int **id);
|
||||||
int pdf_get_kid_id(pdf_object_t **pdf, int id, int **kid);
|
int pdf_get_kid_id(pdf_object_t **pdf, int id, int **kid);
|
||||||
int pdf_get_kid_count(pdf_object_t **pdf, int id);
|
int pdf_get_kid_count(pdf_object_t **pdf, int id);
|
||||||
|
|
|
@ -161,6 +161,27 @@ pdf_get_catalog_id(pdf_object_t **pdf)
|
||||||
return catalog_id;
|
return catalog_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
pdf_get_xref_id(pdf_object_t **pdf)
|
||||||
|
{
|
||||||
|
if (*pdf == NULL)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
int xref_id = 0;
|
||||||
|
|
||||||
|
pdf_object_t *ptr = (*pdf)->next;
|
||||||
|
|
||||||
|
while (ptr != NULL) {
|
||||||
|
if (ptr->dictionary != NULL &&
|
||||||
|
strstr(ptr->dictionary, "/XRef") != NULL)
|
||||||
|
xref_id = ptr->id;
|
||||||
|
|
||||||
|
ptr = ptr->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
return xref_id;
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
pdf_get_parent_id(pdf_object_t **pdf, int **id)
|
pdf_get_parent_id(pdf_object_t **pdf, int **id)
|
||||||
{
|
{
|
||||||
|
|
|
@ -154,10 +154,16 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
|
||||||
|
|
||||||
if ((head = memmem(buf, ptr->size, "<<", 2)) != NULL &&
|
if ((head = memmem(buf, ptr->size, "<<", 2)) != NULL &&
|
||||||
(tail = _memmem_whitespace(buf, ptr->size, ">>", 2)) != NULL) {
|
(tail = _memmem_whitespace(buf, ptr->size, ">>", 2)) != NULL) {
|
||||||
/* A dictionary object may have nested dictionary */
|
/*
|
||||||
|
* A dictionary object may have nested dictionary,
|
||||||
|
* but it should not be in a stream
|
||||||
|
*/
|
||||||
while ((tmp = _memmem_whitespace(tail + 2,
|
while ((tmp = _memmem_whitespace(tail + 2,
|
||||||
ptr->size - (tail - buf) - 2,
|
ptr->size - (tail - buf) - 2,
|
||||||
">>", 2)) != NULL)
|
">>", 2)) != NULL &&
|
||||||
|
memmem(tail + 2,
|
||||||
|
ptr->size - (tail - buf) - 2,
|
||||||
|
"stream\r\n", 8) == NULL)
|
||||||
tail = tmp;
|
tail = tmp;
|
||||||
|
|
||||||
ptr->dictionary_size = tail - head + 2;
|
ptr->dictionary_size = tail - head + 2;
|
||||||
|
|
|
@ -27,10 +27,10 @@ pdf_dump_obj(pdf_object_t **pdf, FILE **fp)
|
||||||
fprintf(*fp, "%d 0 obj\n", ptr->id);
|
fprintf(*fp, "%d 0 obj\n", ptr->id);
|
||||||
|
|
||||||
if (ptr->dictionary != NULL) {
|
if (ptr->dictionary != NULL) {
|
||||||
fputs(ptr->dictionary, *fp);
|
fwrite(ptr->dictionary, ptr->dictionary_size, 1, *fp);
|
||||||
fputs("\n", *fp);
|
fputs("\n", *fp);
|
||||||
} else if (ptr->object != NULL) {
|
} else if (ptr->object != NULL) {
|
||||||
fputs(ptr->object, *fp);
|
fwrite(ptr->object, ptr->object_size, 1, *fp);
|
||||||
fputs("\n", *fp);
|
fputs("\n", *fp);
|
||||||
} else if (ptr->stream == NULL) {
|
} else if (ptr->stream == NULL) {
|
||||||
fputs("null\n", *fp);
|
fputs("null\n", *fp);
|
||||||
|
|
Loading…
Reference in a new issue