Initial commit.
This commit is contained in:
commit
12ecdd7159
25 changed files with 2563 additions and 0 deletions
4
CHANGE.md
Normal file
4
CHANGE.md
Normal file
|
@ -0,0 +1,4 @@
|
|||
0.1.0 (2020-04-08)
|
||||
==================
|
||||
|
||||
* Initial release
|
202
COPYING
Normal file
202
COPYING
Normal file
|
@ -0,0 +1,202 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
40
README.md
Normal file
40
README.md
Normal file
|
@ -0,0 +1,40 @@
|
|||
Melon
|
||||
=====
|
||||
|
||||
Melon: Converter that produces PDF from CNKI proprietary formats
|
||||
|
||||
Development
|
||||
-----------
|
||||
|
||||
Currently, PDF, CAJ, and KDH can be converted. Please report
|
||||
any failures with a sample that can reproduce the behaviour.
|
||||
|
||||
KDH is essentially an invalid PDF file xor'ed with a predetermined key.
|
||||
You may want to convert the decrypted KDH to valid PDF, although some
|
||||
PDF readers can display the invalid PDF.
|
||||
|
||||
Usage
|
||||
=====
|
||||
|
||||
`make`
|
||||
|
||||
Optionally, `make install`
|
||||
|
||||
`melon -o OUTPUT INPUT`
|
||||
|
||||
Options
|
||||
-------
|
||||
|
||||
-o, --output
|
||||
Specify output file
|
||||
|
||||
-b, --buffer
|
||||
Set buffer size (default 512k)
|
||||
|
||||
-v, --verbose
|
||||
Print more information (twice for even more)
|
||||
|
||||
Thanks
|
||||
======
|
||||
|
||||
This project is inspired by [https://github.com/JeziL/caj2pdf](https://github.com/JeziL/caj2pdf)
|
28
src/GNUmakefile
Normal file
28
src/GNUmakefile
Normal file
|
@ -0,0 +1,28 @@
|
|||
#
|
||||
# Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
src != ls *.c
|
||||
obj = ${src:.c=.o}
|
||||
|
||||
PREFIX = /usr/local
|
||||
|
||||
CFLAGS = -O3 -march=native -pipe -Wall
|
||||
LDFLAGS = -Wl,-O3 -lcrypto -Wl,--as-needed
|
||||
|
||||
all: ${obj}
|
||||
${CC} ${LDFLAGS} -o melon $^
|
||||
|
||||
clean:
|
||||
rm -f melon ${obj}
|
||||
|
||||
install:
|
||||
install -d ${PREFIX}/bin
|
||||
install melon ${PREFIX}/bin/
|
||||
|
||||
deinstall:
|
||||
rm -f ${PREFIX}/bin/melon
|
||||
|
||||
.PHONY: all clean install deinstall
|
28
src/Makefile
Normal file
28
src/Makefile
Normal file
|
@ -0,0 +1,28 @@
|
|||
#
|
||||
# Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
src != ls *.c
|
||||
obj = ${src:.c=.o}
|
||||
|
||||
PREFIX = /usr/local
|
||||
|
||||
CFLAGS = -O3 -march=native -pipe -flto=thin -Wall
|
||||
LDFLAGS = -Wl,-O3 -lcrypto -Wl,--as-needed
|
||||
|
||||
all: ${obj}
|
||||
${CC} ${LDFLAGS} -o melon $>
|
||||
|
||||
clean:
|
||||
rm -f melon ${obj}
|
||||
|
||||
install:
|
||||
install -d ${PREFIX}/bin
|
||||
install melon ${PREFIX}/bin/
|
||||
|
||||
deinstall:
|
||||
rm -f ${PREFIX}/bin/melon
|
||||
|
||||
.PHONY: all clean install deinstall
|
168
src/cnki.c
Normal file
168
src/cnki.c
Normal file
|
@ -0,0 +1,168 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "cnki.h"
|
||||
|
||||
int
|
||||
cnki_create(cnki_t **param)
|
||||
{
|
||||
if (*param != NULL)
|
||||
return 1;
|
||||
|
||||
*param = malloc(sizeof(cnki_t));
|
||||
|
||||
if (*param == NULL)
|
||||
return 1;
|
||||
|
||||
(*param)->stat = 0;
|
||||
(*param)->size_buf = 524288;
|
||||
(*param)->fp_i = NULL;
|
||||
(*param)->fp_o = NULL;
|
||||
|
||||
(*param)->file_stat = malloc(sizeof(file_stat_t));
|
||||
|
||||
if ((*param)->file_stat== NULL)
|
||||
return 1;
|
||||
|
||||
memset((*param)->file_stat, 0, sizeof(file_stat_t));
|
||||
|
||||
(*param)->object_outline = NULL;
|
||||
(*param)->object_nh = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
cnki_destroy(cnki_t **param)
|
||||
{
|
||||
if (*param != NULL) {
|
||||
if ((*param)->file_stat != NULL)
|
||||
free((*param)->file_stat);
|
||||
if ((*param)->object_outline != NULL)
|
||||
free((*param)->object_outline);
|
||||
if ((*param)->object_nh != NULL)
|
||||
free((*param)->object_nh);
|
||||
free(*param);
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
cnki_info(cnki_t **param)
|
||||
{
|
||||
if (*param == NULL)
|
||||
return 1;
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Reading file header at %x\n", ADDRESS_HEAD);
|
||||
|
||||
int addr[2];
|
||||
|
||||
fseek((*param)->fp_i, ADDRESS_HEAD, SEEK_SET);
|
||||
fread((*param)->file_stat->type, 4, 1, (*param)->fp_i);
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("File type is '%s'\n", (*param)->file_stat->type);
|
||||
|
||||
if (strcmp((*param)->file_stat->type, "%PDF") == 0) {
|
||||
return 0;
|
||||
} else if (strcmp((*param)->file_stat->type, "CAJ") == 0) {
|
||||
addr[0] = ADDRESS_CAJ_PAGE;
|
||||
addr[1] = ADDRESS_CAJ_OUTLINE;
|
||||
} else if (strcmp((*param)->file_stat->type, "HN") == 0) {
|
||||
addr[0] = ADDRESS_HN_PAGE;
|
||||
addr[1] = ADDRESS_HN_OUTLINE;
|
||||
} else if (strcmp((*param)->file_stat->type, "KDH ") == 0) {
|
||||
return 0;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Reading page count at %x\n", addr[0]);
|
||||
|
||||
fseek((*param)->fp_i, addr[0], SEEK_SET);
|
||||
fread(&(*param)->file_stat->page, 4, 1, (*param)->fp_i);
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Advised %d page(s)\n",
|
||||
(*param)->file_stat->page);
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Reading outline count at %x\n", addr[1]);
|
||||
|
||||
fseek((*param)->fp_i, addr[1], SEEK_SET);
|
||||
fread(&(*param)->file_stat->outline, 4, 1, (*param)->fp_i);
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Advised %d outline(s)\n",
|
||||
(*param)->file_stat->outline);
|
||||
|
||||
if ((*param)->file_stat->outline > 0) {
|
||||
if ((*param)->stat > 1) {
|
||||
printf("Loading outline(s)\n");
|
||||
printf("\t%16s\t%-24s\t%12s\t%12s\t%5s\n",
|
||||
"title",
|
||||
"hierarchy",
|
||||
"page",
|
||||
"text",
|
||||
"depth");
|
||||
}
|
||||
|
||||
(*param)->object_outline = malloc(sizeof(object_outline_t));
|
||||
|
||||
if ((*param)->object_outline == NULL)
|
||||
return 1;
|
||||
|
||||
object_outline_t *ptr = (*param)->object_outline;
|
||||
for (int i = 0; i < (*param)->file_stat->outline; i++) {
|
||||
fread(ptr->title, 256, 1, (*param)->fp_i);
|
||||
fread(ptr->hierarchy, 24, 1, (*param)->fp_i);
|
||||
fread(ptr->page, 12, 1, (*param)->fp_i);
|
||||
fread(ptr->text, 12, 1, (*param)->fp_i);
|
||||
fread(&ptr->depth, 4, 1, (*param)->fp_i);
|
||||
|
||||
ptr->next = NULL;
|
||||
|
||||
if ((*param)->stat > 1) {
|
||||
printf("\t");
|
||||
for (int j = 1; j <= 256; j++) {
|
||||
printf("%02x", (unsigned char) ptr->title[j - 1]);
|
||||
|
||||
if (j % 8 == 0 && ptr->title[j] == '\0')
|
||||
break;
|
||||
|
||||
if (j % 8 == 0)
|
||||
printf("\n\t");
|
||||
else if (j % 2 == 0)
|
||||
printf(" ");
|
||||
}
|
||||
printf("\t%-24s\t%12s\t%12s\t%5d\n",
|
||||
ptr->hierarchy,
|
||||
ptr->page,
|
||||
ptr->text,
|
||||
ptr->depth);
|
||||
}
|
||||
|
||||
if (i < (*param)->file_stat->outline - 1) {
|
||||
ptr->next = malloc(sizeof(object_outline_t));
|
||||
|
||||
if (ptr->next == NULL)
|
||||
return 1;
|
||||
}
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Loaded %d outline(s)\n",
|
||||
(*param)->file_stat->outline);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
86
src/cnki.h
Normal file
86
src/cnki.h
Normal file
|
@ -0,0 +1,86 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#define ADDRESS_HEAD 0x0000
|
||||
|
||||
#define ADDRESS_CAJ_PAGE 0x0010
|
||||
#define ADDRESS_CAJ_OUTLINE 0x0110
|
||||
#define ADDRESS_CAJ_BODY 0x0014
|
||||
|
||||
#define ADDRESS_HN_PAGE 0x0090
|
||||
#define ADDRESS_HN_OUTLINE 0x0158
|
||||
|
||||
#define ADDRESS_KDH_BODY 0x00fe
|
||||
|
||||
#define KEY_KDH "FZHMEI"
|
||||
#define KEY_KDH_LENGTH 6
|
||||
|
||||
typedef struct _file_stat_t {
|
||||
char type[4];
|
||||
int32_t page;
|
||||
int32_t outline;
|
||||
} file_stat_t;
|
||||
|
||||
typedef struct _object_outline_t {
|
||||
char title[256]; /* Starting at file_stat_t->outline + 4 */
|
||||
char hierarchy[24];
|
||||
char page[12];
|
||||
char text[12];
|
||||
int32_t depth;
|
||||
struct _object_outline_t *next;
|
||||
} object_outline_t;
|
||||
|
||||
typedef struct _object_outline_tree_t {
|
||||
int id;
|
||||
struct _object_outline_t *item;
|
||||
struct _object_outline_tree_t *up;
|
||||
struct _object_outline_tree_t *left;
|
||||
struct _object_outline_tree_t *right;
|
||||
} object_outline_tree_t;
|
||||
|
||||
typedef enum _nh_code {
|
||||
CCITTFAX,
|
||||
DCT_0,
|
||||
DCT_1,
|
||||
JBIG2,
|
||||
JPX
|
||||
} nh_code;
|
||||
|
||||
typedef struct _object_nh_t {
|
||||
int32_t address; /* Starting at end of object_outline_t */
|
||||
int32_t size;
|
||||
int16_t page[2];
|
||||
int32_t zero[2];
|
||||
char *text;
|
||||
int32_t image_format; /* nh_code */
|
||||
int32_t image_address;
|
||||
int32_t image_size;
|
||||
char *image;
|
||||
struct _object_nh_t *next;
|
||||
} object_nh_t;
|
||||
|
||||
typedef struct _cnki_t {
|
||||
int stat;
|
||||
int size_buf;
|
||||
FILE *fp_i;
|
||||
FILE *fp_o;
|
||||
file_stat_t *file_stat;
|
||||
object_outline_t *object_outline;
|
||||
object_nh_t *object_nh;
|
||||
} cnki_t;
|
||||
|
||||
/* cnki_pdf.c */
|
||||
int cnki_pdf(cnki_t **param);
|
||||
|
||||
/* cnki_outline_tree.c */
|
||||
int cnki_outline_tree(object_outline_tree_t **outline_tree,
|
||||
object_outline_t **outline, int *ids);
|
||||
|
||||
/* cnki_xml.c */
|
||||
int cnki_xml(char **xml, FILE **fp);
|
40
src/cnki_caj.c
Normal file
40
src/cnki_caj.c
Normal file
|
@ -0,0 +1,40 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "cnki.h"
|
||||
|
||||
int
|
||||
cnki_caj(cnki_t **param)
|
||||
{
|
||||
if (*param == NULL)
|
||||
return 1;
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Begin 'CAJ' conversion\n");
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Reading document body address at %x\n", ADDRESS_CAJ_BODY);
|
||||
|
||||
int addr;
|
||||
|
||||
fseek((*param)->fp_i, ADDRESS_CAJ_BODY, SEEK_SET);
|
||||
fread(&addr, 4, 1, (*param)->fp_i);
|
||||
fseek((*param)->fp_i, addr, SEEK_SET);
|
||||
fread(&addr, 4, 1, (*param)->fp_i);
|
||||
fseek((*param)->fp_i, addr, SEEK_SET);
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Advised document body address is %x\n", addr);
|
||||
|
||||
cnki_pdf(param);
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Conversion ended\n");
|
||||
|
||||
return 0;
|
||||
}
|
49
src/cnki_kdh.c
Normal file
49
src/cnki_kdh.c
Normal file
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include "cnki.h"
|
||||
|
||||
int
|
||||
cnki_kdh(cnki_t **param)
|
||||
{
|
||||
if (*param == NULL)
|
||||
return 1;
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Begin 'KDH' decryption\n");
|
||||
|
||||
fseek((*param)->fp_i, 0, SEEK_END);
|
||||
|
||||
long size = ftell((*param)->fp_i);
|
||||
|
||||
fseek((*param)->fp_i, ADDRESS_KDH_BODY, SEEK_SET);
|
||||
|
||||
const char key[] = KEY_KDH;
|
||||
const int key_len = KEY_KDH_LENGTH;
|
||||
long key_cur = 0;
|
||||
|
||||
char buf[(*param)->size_buf];
|
||||
|
||||
for (;;) {
|
||||
fread(buf, (*param)->size_buf, 1, (*param)->fp_i);
|
||||
|
||||
for (int i = 0; i < (*param)->size_buf; i++) {
|
||||
buf[i] ^= key[key_cur % key_len];
|
||||
key_cur++;
|
||||
}
|
||||
|
||||
fwrite(buf, (*param)->size_buf, 1, (*param)->fp_o);
|
||||
|
||||
if (ftell((*param)->fp_i) == size)
|
||||
break;
|
||||
}
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Decryption ended total %ld byte(s) written\n",
|
||||
ftell((*param)->fp_o));
|
||||
|
||||
return 0;
|
||||
}
|
110
src/cnki_nh.c
Normal file
110
src/cnki_nh.c
Normal file
|
@ -0,0 +1,110 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "cnki.h"
|
||||
|
||||
int
|
||||
cnki_nh(cnki_t **param)
|
||||
{
|
||||
if (*param == NULL)
|
||||
return 1;
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Begin 'HN' conversion\n");
|
||||
|
||||
if ((*param)->file_stat->page > 0)
|
||||
(*param)->object_nh = malloc(sizeof(object_nh_t));
|
||||
else
|
||||
return 1;
|
||||
|
||||
if ((*param)->object_nh == NULL)
|
||||
return 1;
|
||||
|
||||
if ((*param)->stat > 1) {
|
||||
printf("Loading page(s)\n");
|
||||
printf("\t%8s\t%8s\t%13s\t%6s\t%4s\t%8s\t%8s\n",
|
||||
"address",
|
||||
"text",
|
||||
"page",
|
||||
"zero",
|
||||
"code",
|
||||
"address",
|
||||
"image");
|
||||
}
|
||||
|
||||
object_nh_t *ptr = (*param)->object_nh;
|
||||
for (int i = 0; i < (*param)->file_stat->page; i++) {
|
||||
fread(&ptr->address, 4, 1, (*param)->fp_i);
|
||||
fread(&ptr->size, 4, 1, (*param)->fp_i);
|
||||
fread(&ptr->page, 4, 1, (*param)->fp_i);
|
||||
fread(&ptr->zero, 8, 1, (*param)->fp_i);
|
||||
|
||||
ptr->text = NULL;
|
||||
ptr->image_format = -1;
|
||||
ptr->image_address = 0;
|
||||
ptr->image_size = 0;
|
||||
ptr->image = NULL;
|
||||
ptr->next = NULL;
|
||||
|
||||
if (i < (*param)->file_stat->page - 1) {
|
||||
ptr->next = malloc(sizeof(object_nh_t));
|
||||
|
||||
if (ptr->next == NULL)
|
||||
return 1;
|
||||
}
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
ptr = (*param)->object_nh;
|
||||
while (ptr != NULL) {
|
||||
ptr->text = malloc(ptr->size);
|
||||
|
||||
if (ptr->text == NULL)
|
||||
return 1;
|
||||
|
||||
fseek((*param)->fp_i, ptr->address, SEEK_SET);
|
||||
fread(ptr->text, ptr->size, 1, (*param)->fp_i);
|
||||
fread(&ptr->image_format, 4, 1, (*param)->fp_i);
|
||||
fread(&ptr->image_address, 4, 1, (*param)->fp_i);
|
||||
fread(&ptr->image_size, 4, 1, (*param)->fp_i);
|
||||
|
||||
ptr->image = malloc(ptr->image_size);
|
||||
|
||||
if (ptr->image == NULL)
|
||||
return 1;
|
||||
|
||||
fseek((*param)->fp_i, ptr->image_address, SEEK_SET);
|
||||
fread(ptr->image, ptr->image_size, 1, (*param)->fp_i);
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("\t%08x\t%8d\t{%d, %8d}\t{%d, %d}\t%4d\t%08x\t%8d\n",
|
||||
ptr->address,
|
||||
ptr->size,
|
||||
ptr->page[0],
|
||||
ptr->page[1],
|
||||
ptr->zero[0],
|
||||
ptr->zero[1],
|
||||
ptr->image_format,
|
||||
ptr->image_address,
|
||||
ptr->image_size);
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Loaded %d page(s)\n", (*param)->file_stat->page);
|
||||
|
||||
/* TODO: Study signed int __fastcall CAJDoc::OpenNHCAJFile(int a1, int a2) */
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Conversion ended\n");
|
||||
|
||||
/* TODO: Finish me please :) */
|
||||
return 1;
|
||||
}
|
73
src/cnki_outline_tree.c
Normal file
73
src/cnki_outline_tree.c
Normal file
|
@ -0,0 +1,73 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "cnki.h"
|
||||
|
||||
int
|
||||
cnki_outline_tree(object_outline_tree_t **outline_tree,
|
||||
object_outline_t **outline, int *ids)
|
||||
{
|
||||
if (*outline_tree != NULL || *outline == NULL)
|
||||
return 1;
|
||||
|
||||
int pos = 0;
|
||||
|
||||
*outline_tree = malloc(sizeof(object_outline_tree_t));
|
||||
|
||||
if (*outline_tree == NULL)
|
||||
return 1;
|
||||
|
||||
object_outline_tree_t *tree = *outline_tree;
|
||||
|
||||
tree->id = ids[pos++];
|
||||
tree->item = NULL;
|
||||
tree->up = NULL;
|
||||
tree->left = NULL;
|
||||
tree->right = NULL;
|
||||
|
||||
object_outline_t *ptr = *outline;
|
||||
while (ptr != NULL) {
|
||||
if (tree->item == NULL ||
|
||||
ptr->depth == tree->item->depth) {
|
||||
while (tree->left != NULL)
|
||||
tree = tree->left;
|
||||
|
||||
tree->left = malloc(sizeof(object_outline_tree_t));
|
||||
|
||||
if (tree->left == NULL)
|
||||
return 1;
|
||||
|
||||
tree->left->id = ids[pos++];
|
||||
tree->left->item = ptr;
|
||||
tree->left->up = tree;
|
||||
tree->left->left = NULL;
|
||||
tree->left->right = NULL;
|
||||
|
||||
tree = tree->left;
|
||||
} else if (ptr->depth == tree->item->depth + 1) {
|
||||
tree->right = malloc(sizeof(object_outline_tree_t));
|
||||
|
||||
if (tree->right == NULL)
|
||||
return 1;
|
||||
|
||||
tree->right->id = ids[pos++];
|
||||
tree->right->item = ptr;
|
||||
tree->right->up = tree;
|
||||
tree->right->left = NULL;
|
||||
tree->right->right = NULL;
|
||||
|
||||
tree = tree->right;
|
||||
} else {
|
||||
tree = tree->up;
|
||||
continue;
|
||||
}
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
351
src/cnki_pdf.c
Normal file
351
src/cnki_pdf.c
Normal file
|
@ -0,0 +1,351 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "cnki.h"
|
||||
#include "pdf.h"
|
||||
#include "pdf_cnki.h"
|
||||
|
||||
int
|
||||
cnki_pdf(cnki_t **param)
|
||||
{
|
||||
if (*param == NULL)
|
||||
return 1;
|
||||
|
||||
pdf_object_t *pdf = NULL;
|
||||
|
||||
if (pdf_obj_create(&pdf) != 0)
|
||||
return 1;
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Begin processing PDF\n");
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Loading object(s)\n");
|
||||
|
||||
if (pdf_load(&pdf, &(*param)->fp_i, (*param)->size_buf) != 0)
|
||||
return 1;
|
||||
|
||||
if ((*param)->stat > 1) {
|
||||
printf("\t%8s\t%8s\t%8s\t%12s\t%12s\t%12s\n",
|
||||
"address",
|
||||
"size",
|
||||
"id",
|
||||
"object",
|
||||
"dictionary",
|
||||
"stream");
|
||||
|
||||
pdf_object_t *ptr = pdf->next;
|
||||
while (ptr != NULL) {
|
||||
printf("\t%08x\t%8d\t%8d\t%12d\t%12d\t%12d\n",
|
||||
ptr->address,
|
||||
ptr->size,
|
||||
ptr->id,
|
||||
ptr->object_size,
|
||||
ptr->dictionary_size,
|
||||
ptr->stream_size);
|
||||
ptr = ptr->next;
|
||||
}
|
||||
}
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Loaded %d object(s)\n",
|
||||
pdf_get_count(&pdf));
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Searching for parent object(s)\n");
|
||||
|
||||
int *parent = NULL;
|
||||
pdf_get_parent_id(&pdf, &parent);
|
||||
|
||||
if (parent[0] == 0)
|
||||
return 1;
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Discovered %d parent object(s)\n", parent[0]);
|
||||
|
||||
char buf[64];
|
||||
|
||||
int parent_missing[parent[0]];
|
||||
int *kid;
|
||||
int dictionary_size;
|
||||
char *dictionary;
|
||||
|
||||
for (int i = 1; i <= parent[0]; i++) {
|
||||
if ((*param)->stat > 1)
|
||||
printf("Searching for object %d\n", parent[i]);
|
||||
|
||||
kid = NULL;
|
||||
pdf_get_kid_id(&pdf, parent[i], &kid);
|
||||
|
||||
if (kid[0] != 0) {
|
||||
if ((*param)->stat > 0)
|
||||
printf("Object is missing\n");
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Generating object\n");
|
||||
|
||||
dictionary_size = 64 + 12 * kid[0];
|
||||
dictionary = malloc(dictionary_size);
|
||||
|
||||
if (dictionary == NULL)
|
||||
return 1;
|
||||
|
||||
memset(dictionary, 0, dictionary_size);
|
||||
|
||||
snprintf(buf, 64,
|
||||
"<<\n/Type /Pages\n/Kids [");
|
||||
strcat(dictionary, buf);
|
||||
for (int j = 1; j <= kid[0]; j++) {
|
||||
snprintf(buf, 64,
|
||||
"%d 0 R",
|
||||
kid[j]);
|
||||
strcat(dictionary, buf);
|
||||
if (j < kid[0])
|
||||
strcat(dictionary, " ");
|
||||
}
|
||||
snprintf(buf, 64,
|
||||
"]\n/Count %d\n>>\n",
|
||||
pdf_get_kid_count(&pdf, parent[i]));
|
||||
strcat(dictionary, buf);
|
||||
|
||||
pdf_obj_prepend(&pdf, parent[i], NULL, dictionary, NULL);
|
||||
|
||||
parent_missing[i - 1] = 1;
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Generated object for %d child(ren)\n",
|
||||
kid[0]);
|
||||
|
||||
free(dictionary);
|
||||
} else {
|
||||
parent_missing[i - 1] = 0;
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Object exists\n");
|
||||
}
|
||||
|
||||
free(kid);
|
||||
}
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Searching for root object\n");
|
||||
|
||||
dictionary_size = 128;
|
||||
dictionary = malloc(dictionary_size);
|
||||
|
||||
if (dictionary == NULL)
|
||||
return 1;
|
||||
|
||||
memset(dictionary, 0, dictionary_size);
|
||||
|
||||
int root = 0;
|
||||
|
||||
int root_kid = 0;
|
||||
for (int i = 0; i < parent[0]; i++)
|
||||
if (parent_missing[i])
|
||||
root_kid++;
|
||||
|
||||
if (root_kid <= 1) {
|
||||
if (root_kid == 0) {
|
||||
for (int i = 1; i <= parent[0]; i++)
|
||||
if (root == 0 || root < parent[i])
|
||||
root = parent[i];
|
||||
} else {
|
||||
for (int i = 0; i < parent[0]; i++)
|
||||
if (parent_missing[i])
|
||||
root = i;
|
||||
}
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Root object is %d.\n",
|
||||
root);
|
||||
} else {
|
||||
if ((*param)->stat > 0)
|
||||
printf("Root object is missing\n");
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Generating root object\n");
|
||||
|
||||
root = pdf_get_free_id(&pdf);
|
||||
|
||||
snprintf(buf, 64,
|
||||
"<<\n/Type /Pages\n/Kids ");
|
||||
strcat(dictionary, buf);
|
||||
|
||||
if (parent[0] > 1)
|
||||
strcat(dictionary, "[");
|
||||
|
||||
for (int i = 0; i < parent[0]; i++) {
|
||||
if (parent_missing[i]) {
|
||||
snprintf(buf, 64, "%d 0 R", parent[i + 1]);
|
||||
strcat(dictionary, buf);
|
||||
if (i < root_kid)
|
||||
strcat(dictionary, " ");
|
||||
}
|
||||
}
|
||||
|
||||
if (parent[0] > 1)
|
||||
strcat(dictionary, "]");
|
||||
|
||||
strcat(dictionary, "\n");
|
||||
|
||||
snprintf(buf, 64, "/Count %d\n", (*param)->file_stat->page);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
strcat(dictionary, ">>\n");
|
||||
|
||||
pdf_obj_prepend(&pdf, root, NULL, dictionary, NULL);
|
||||
|
||||
memset(dictionary, 0, dictionary_size);
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Generated root object %d.\n",
|
||||
root);
|
||||
}
|
||||
|
||||
int *ids = NULL;
|
||||
|
||||
if ((*param)->file_stat->outline > 0) {
|
||||
if ((*param)->stat > 1)
|
||||
printf("Generating outline object(s)\n\t%8s\n", "id");
|
||||
|
||||
pdf_get_free_ids(&pdf, &ids, (*param)->file_stat->outline + 1);
|
||||
int outline = pdf_cnki_outline(&pdf, &(*param)->object_outline, &ids);
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
for (int i = 0; i < (*param)->file_stat->outline + 1; i++)
|
||||
printf("\t%8d\n", ids[i]);
|
||||
|
||||
if ((*param)->stat > 0) {
|
||||
if (outline != 0)
|
||||
printf("No outline information\n");
|
||||
else
|
||||
printf("Generated %d outline object(s)\n",
|
||||
(*param)->file_stat->outline + 1);
|
||||
}
|
||||
}
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Generating '/Catalog' dictionary\n");
|
||||
|
||||
snprintf(buf, 64,
|
||||
"<<\n/Type /Catalog\n/Pages %d 0 R\n",
|
||||
root);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
if (ids != NULL) {
|
||||
snprintf(buf, 64,
|
||||
"/Outlines %d 0 R\n/PageMode /UseOutlines\n",
|
||||
ids[0]);
|
||||
strcat(dictionary, buf);
|
||||
}
|
||||
|
||||
strcat(dictionary, ">>\n");
|
||||
|
||||
pdf_obj_append(&pdf, 0, NULL, dictionary, NULL);
|
||||
|
||||
free(dictionary);
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Generated '/Catalog' dictionary\n");
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Sorting object(s)\n");
|
||||
|
||||
pdf_obj_sort(&pdf);
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Sorted object(s)\n");
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Writing header\n");
|
||||
|
||||
long cur = 0;
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
cur = ftell((*param)->fp_o);
|
||||
|
||||
if (pdf_dump_header(&pdf, &(*param)->fp_o) != 0) {
|
||||
fprintf(stderr, "Header not written\n");
|
||||
return 1;
|
||||
} else {
|
||||
if ((*param)->stat > 0)
|
||||
printf("Header %ld byte(s) written\n",
|
||||
ftell((*param)->fp_o) - cur);
|
||||
}
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Writing object(s)\n");
|
||||
|
||||
pdf_dump_obj(&pdf, &(*param)->fp_o);
|
||||
|
||||
if ((*param)->stat > 1) {
|
||||
printf("\t%8s\t%8s\t%8s\t%12s\t%12s\t%12s\n",
|
||||
"address",
|
||||
"size",
|
||||
"id",
|
||||
"object",
|
||||
"dictionary",
|
||||
"stream");
|
||||
|
||||
pdf_object_t *ptr = pdf->next;
|
||||
while (ptr != NULL) {
|
||||
printf("\t%08x\t%8d\t%8d\t%12d\t%12d\t%12d\n",
|
||||
ptr->address,
|
||||
ptr->size,
|
||||
ptr->id,
|
||||
ptr->object_size,
|
||||
ptr->dictionary_size,
|
||||
ptr->stream_size);
|
||||
ptr = ptr->next;
|
||||
}
|
||||
}
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("%d object(s) %ld byte(s) written\n",
|
||||
pdf_get_count(&pdf),
|
||||
ftell((*param)->fp_o));
|
||||
|
||||
long xref = ftell((*param)->fp_o);
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Writing cross-reference table\n");
|
||||
|
||||
if (pdf_dump_xref(&pdf, &(*param)->fp_o) != 0) {
|
||||
if ((*param)->stat > 0)
|
||||
printf("Cross-reference table not written\n");
|
||||
} else {
|
||||
if ((*param)->stat > 0)
|
||||
printf("Cross-reference table %ld byte(s) written\n",
|
||||
ftell((*param)->fp_o) - xref);
|
||||
}
|
||||
|
||||
if ((*param)->stat > 1)
|
||||
printf("Writing trailer\n");
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
cur = ftell((*param)->fp_o);
|
||||
|
||||
if (pdf_dump_trailer(&pdf, &(*param)->fp_o, xref) != 0) {
|
||||
if ((*param)->stat > 0)
|
||||
printf("Trailer not written\n");
|
||||
} else {
|
||||
if ((*param)->stat > 0)
|
||||
printf("Trailer %ld byte(s) written\n",
|
||||
ftell((*param)->fp_o) - cur);
|
||||
}
|
||||
|
||||
if ((*param)->stat > 0)
|
||||
printf("Total %ld byte(s) written\n",
|
||||
ftell((*param)->fp_o));
|
||||
|
||||
pdf_obj_destroy(&pdf);
|
||||
|
||||
return 0;
|
||||
}
|
14
src/cnki_xml.c
Normal file
14
src/cnki_xml.c
Normal file
|
@ -0,0 +1,14 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
int
|
||||
cnki_xml(char **xml, FILE **fp)
|
||||
{
|
||||
/* TODO: Extract XML and embed into `/Metadata' */
|
||||
return 1;
|
||||
}
|
21
src/extern.h
Normal file
21
src/extern.h
Normal file
|
@ -0,0 +1,21 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include "cnki.h"
|
||||
|
||||
/* cnki.c */
|
||||
int cnki_create(cnki_t **param);
|
||||
void cnki_destroy(cnki_t **param);
|
||||
int cnki_info(cnki_t **param);
|
||||
|
||||
/* cnki_caj.c */
|
||||
int cnki_caj(cnki_t **param);
|
||||
|
||||
/* cnki_nh.c */
|
||||
int cnki_nh(cnki_t **param);
|
||||
|
||||
/* cnki_kdh.c */
|
||||
int cnki_kdh(cnki_t **param);
|
70
src/iconv.c
Normal file
70
src/iconv.c
Normal file
|
@ -0,0 +1,70 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <iconv.h>
|
||||
|
||||
/* So, why would anyone use something other than UTF-8? */
|
||||
int
|
||||
strconv(char **dst,
|
||||
const char * restrict dst_code,
|
||||
const char * restrict src,
|
||||
const char * restrict src_code,
|
||||
int *size)
|
||||
{
|
||||
size_t dst_size = *size;
|
||||
char *dst_conv = malloc(dst_size);
|
||||
|
||||
if (dst_conv == NULL)
|
||||
return 1;
|
||||
|
||||
size_t src_size = strlen(src) + 1;
|
||||
char *src_conv = malloc(src_size);
|
||||
|
||||
if (src_conv == NULL) {
|
||||
free(dst_conv);
|
||||
return 1;
|
||||
}
|
||||
|
||||
strncpy(src_conv, src, src_size);
|
||||
|
||||
char *dst_start = dst_conv;
|
||||
char *src_start = src_conv;
|
||||
|
||||
iconv_t conv_src_dst = iconv_open(dst_code, src_code);
|
||||
|
||||
if (conv_src_dst == (iconv_t) - 1) {
|
||||
free(dst_conv);
|
||||
free(src_conv);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (iconv(conv_src_dst,
|
||||
&src_conv, &src_size,
|
||||
&dst_conv, &dst_size) == (size_t) - 1) {
|
||||
free(dst_start);
|
||||
free(src_start);
|
||||
return 1;
|
||||
} else {
|
||||
/* Not including NULL */
|
||||
*size -= dst_size + 2;
|
||||
|
||||
*dst = malloc(*size);
|
||||
|
||||
if (*dst != NULL)
|
||||
memcpy(*dst, dst_start, *size);
|
||||
|
||||
free(dst_start);
|
||||
free(src_start);
|
||||
}
|
||||
|
||||
if (iconv_close(conv_src_dst) != 0 || *dst == NULL)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
12
src/iconv.h
Normal file
12
src/iconv.h
Normal file
|
@ -0,0 +1,12 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
int
|
||||
strconv(char **dst,
|
||||
const char * restrict dst_code,
|
||||
const char * restrict src,
|
||||
const char * restrict src_code,
|
||||
int *size);
|
127
src/melon.c
Normal file
127
src/melon.c
Normal file
|
@ -0,0 +1,127 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <getopt.h>
|
||||
|
||||
#include "extern.h"
|
||||
#include "version.h"
|
||||
|
||||
int
|
||||
main(int argc, char **argv, char **envp)
|
||||
{
|
||||
printf("Melon " VERSION "." RELEASE "." PATCH EXTRA "\n");
|
||||
printf("Copyright (c) 2020, yzrh <yzrh@tuta.io>\n\n");
|
||||
|
||||
cnki_t *param = NULL;
|
||||
|
||||
if (cnki_create(¶m) != 0) {
|
||||
fprintf(stderr, "%s: %s\n", argv[0], strerror(errno));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
int c;
|
||||
|
||||
for (;;) {
|
||||
static struct option long_options[] = {
|
||||
{"output", required_argument, 0, 'o'},
|
||||
{"buffer", required_argument, 0, 'b'},
|
||||
{"verbose", no_argument, 0, 'v'},
|
||||
{0, 0, 0, 0}
|
||||
};
|
||||
|
||||
int option_index = 0;
|
||||
|
||||
c = getopt_long(argc, argv, "o:b:v",
|
||||
long_options, &option_index);
|
||||
|
||||
if (c == -1)
|
||||
break;
|
||||
|
||||
switch (c) {
|
||||
case 'o':
|
||||
if ((param->fp_o = fopen(optarg, "w")) == NULL) {
|
||||
fprintf(stderr, "%s: %s\n", argv[0],
|
||||
strerror(errno));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
break;
|
||||
case 'b':
|
||||
param->size_buf = atoi(optarg);
|
||||
break;
|
||||
case 'v':
|
||||
param->stat += 1;
|
||||
break;
|
||||
case '?':
|
||||
break;
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
if (argc - optind == 1) {
|
||||
if (param->fp_o == NULL) {
|
||||
if (param->stat == 0) {
|
||||
param->fp_o = stdout;
|
||||
} else {
|
||||
fprintf(stderr, "%s: --verbose ", argv[0]);
|
||||
fprintf(stderr, "must not be set ");
|
||||
fprintf(stderr, "when using stdout\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
if ((param->fp_i = fopen(argv[optind], "r")) == NULL) {
|
||||
fprintf(stderr, "%s: %s\n", argv[0],
|
||||
strerror(errno));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
cnki_info(¶m);
|
||||
|
||||
if (strcmp(param->file_stat->type, "%PDF") == 0) {
|
||||
if (cnki_pdf(¶m) != 0) {
|
||||
fprintf(stderr, "%s: %s\n", argv[0],
|
||||
strerror(errno));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
} else if (strcmp(param->file_stat->type, "CAJ") == 0) {
|
||||
if (cnki_caj(¶m) != 0) {
|
||||
fprintf(stderr, "%s: %s\n", argv[0],
|
||||
strerror(errno));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
} else if (strcmp(param->file_stat->type, "HN") == 0) {
|
||||
if (cnki_nh(¶m) != 0) {
|
||||
fprintf(stderr, "%s: %s\n", argv[0],
|
||||
strerror(errno));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
} else if (strcmp(param->file_stat->type, "KDH ") == 0) {
|
||||
if (cnki_kdh(¶m) != 0) {
|
||||
fprintf(stderr, "%s: %s\n", argv[0],
|
||||
strerror(errno));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "%s: %s\n", argv[0],
|
||||
"Invalid file");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
fclose(param->fp_i);
|
||||
fclose(param->fp_o);
|
||||
} else {
|
||||
fprintf(stderr, "Usage: %s ", argv[0]);
|
||||
fprintf(stderr, "[--output --buffer --verbose] file\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
cnki_destroy(¶m);
|
||||
}
|
228
src/pdf.c
Normal file
228
src/pdf.c
Normal file
|
@ -0,0 +1,228 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "pdf.h"
|
||||
|
||||
static int
|
||||
_min_id(pdf_object_t **pdf)
|
||||
{
|
||||
int min = 0;
|
||||
|
||||
pdf_object_t *ptr = (*pdf)->next;
|
||||
while (ptr != NULL) {
|
||||
if (min == 0 || ptr->id < min)
|
||||
min = ptr->id;
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
return min;
|
||||
}
|
||||
|
||||
int
|
||||
pdf_obj_create(pdf_object_t **pdf)
|
||||
{
|
||||
if (*pdf != NULL)
|
||||
return 1;
|
||||
|
||||
*pdf = malloc(sizeof(pdf_object_t));
|
||||
|
||||
if (*pdf == NULL)
|
||||
return 1;
|
||||
|
||||
(*pdf)->address = 0;
|
||||
(*pdf)->size = 0;
|
||||
(*pdf)->id = 0;
|
||||
(*pdf)->object_size = 0;
|
||||
(*pdf)->object = NULL;
|
||||
(*pdf)->dictionary_size = 0;
|
||||
(*pdf)->dictionary = NULL;
|
||||
(*pdf)->stream_size = 0;
|
||||
(*pdf)->stream= NULL;
|
||||
(*pdf)->next = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
pdf_obj_destroy(pdf_object_t **pdf)
|
||||
{
|
||||
pdf_object_t *ptr;
|
||||
while ((ptr = *pdf) != NULL) {
|
||||
*pdf = (*pdf)->next;
|
||||
free(ptr->object);
|
||||
free(ptr->dictionary);
|
||||
free(ptr->stream);
|
||||
free(ptr);
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
pdf_obj_add(pdf_object_t **pdf, int id,
|
||||
const char * restrict object,
|
||||
const char * restrict dictionary,
|
||||
const char * restrict stream)
|
||||
{
|
||||
if (*pdf != NULL || id <= 0 ||
|
||||
(object != NULL && dictionary != NULL))
|
||||
return 1;
|
||||
|
||||
*pdf = malloc(sizeof(pdf_object_t));
|
||||
|
||||
if (*pdf == NULL)
|
||||
return 1;
|
||||
|
||||
(*pdf)->address = 0;
|
||||
(*pdf)->size = 0;
|
||||
|
||||
(*pdf)->id = id;
|
||||
|
||||
if (dictionary != NULL) {
|
||||
(*pdf)->dictionary_size = strlen(dictionary) + 1;
|
||||
(*pdf)->dictionary = malloc((*pdf)->dictionary_size);
|
||||
|
||||
if ((*pdf)->dictionary == NULL)
|
||||
return 1;
|
||||
|
||||
strncpy((*pdf)->dictionary, dictionary, (*pdf)->dictionary_size);
|
||||
|
||||
(*pdf)->object_size = 0;
|
||||
(*pdf)->object = NULL;
|
||||
} else if (object != NULL) {
|
||||
(*pdf)->object_size = strlen(object) + 1;
|
||||
(*pdf)->object = malloc((*pdf)->object_size);
|
||||
|
||||
if ((*pdf)->object == NULL)
|
||||
return 1;
|
||||
|
||||
strncpy((*pdf)->object, object, (*pdf)->object_size);
|
||||
|
||||
(*pdf)->dictionary_size = 0;
|
||||
(*pdf)->dictionary = NULL;
|
||||
} else {
|
||||
(*pdf)->object_size = 0;
|
||||
(*pdf)->object = NULL;
|
||||
(*pdf)->dictionary_size = 0;
|
||||
(*pdf)->dictionary = NULL;
|
||||
}
|
||||
|
||||
if (stream != NULL) {
|
||||
(*pdf)->stream_size = sizeof(stream);
|
||||
(*pdf)->stream = malloc((*pdf)->stream_size);
|
||||
|
||||
if ((*pdf)->stream == NULL)
|
||||
return 1;
|
||||
|
||||
memcpy((*pdf)->stream, stream, (*pdf)->stream_size);
|
||||
} else {
|
||||
(*pdf)->stream_size = 0;
|
||||
(*pdf)->stream = NULL;
|
||||
}
|
||||
|
||||
(*pdf)->next = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
pdf_obj_del(pdf_object_t **pdf, int id)
|
||||
{
|
||||
if (*pdf == NULL || id <= 0)
|
||||
return 1;
|
||||
|
||||
pdf_object_t *ptr = *pdf;
|
||||
while (ptr->next != NULL) {
|
||||
if (ptr->next->id == id) {
|
||||
ptr->next = ptr->next->next;
|
||||
break;
|
||||
}
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
pdf_obj_prepend(pdf_object_t **pdf, int id,
|
||||
const char * restrict object,
|
||||
const char * restrict dictionary,
|
||||
const char * restrict stream)
|
||||
{
|
||||
if (*pdf == NULL)
|
||||
return 1;
|
||||
|
||||
if (id <= 0)
|
||||
id = pdf_get_free_id(pdf);
|
||||
|
||||
pdf_object_t *ptr = NULL;
|
||||
|
||||
if (pdf_obj_add(&ptr, id, object, dictionary, stream) != 0) {
|
||||
free(ptr);
|
||||
return 1;
|
||||
}
|
||||
|
||||
ptr->next = (*pdf)->next;
|
||||
(*pdf)->next = ptr;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
pdf_obj_append(pdf_object_t **pdf, int id,
|
||||
const char * restrict object,
|
||||
const char * restrict dictionary,
|
||||
const char * restrict stream)
|
||||
{
|
||||
if (*pdf == NULL)
|
||||
return 1;
|
||||
|
||||
if (id <= 0)
|
||||
id = pdf_get_free_id(pdf);
|
||||
|
||||
pdf_object_t *ptr = *pdf;
|
||||
while (ptr->next != NULL)
|
||||
ptr = ptr->next;
|
||||
|
||||
if (pdf_obj_add(&ptr->next, id, object, dictionary, stream) != 0)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
pdf_obj_sort(pdf_object_t **pdf)
|
||||
{
|
||||
if (*pdf == NULL)
|
||||
return 1;
|
||||
|
||||
int id;
|
||||
pdf_object_t *tmp;
|
||||
pdf_object_t *ptr;
|
||||
|
||||
ptr = *pdf;
|
||||
while (ptr->next != NULL) {
|
||||
id = _min_id(&ptr->next);
|
||||
|
||||
if (id == 0)
|
||||
return 1;
|
||||
|
||||
if (id < ptr->next->id) {
|
||||
pdf_get_obj(&ptr->next, id, &tmp);
|
||||
pdf_obj_del(&ptr->next, id);
|
||||
|
||||
tmp->next = ptr->next;
|
||||
ptr->next = tmp;
|
||||
}
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
61
src/pdf.h
Normal file
61
src/pdf.h
Normal file
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
typedef struct _pdf_object_t {
|
||||
int address;
|
||||
int size;
|
||||
int id;
|
||||
int object_size;
|
||||
char *object;
|
||||
int dictionary_size;
|
||||
char *dictionary;
|
||||
int stream_size;
|
||||
char *stream;
|
||||
struct _pdf_object_t *next;
|
||||
} pdf_object_t;
|
||||
|
||||
/* pdf.c */
|
||||
/* TODO: Rewrite object dictionary */
|
||||
/* TODO: Compact object id */
|
||||
/* TODO: `mutool clean -gggsz' */
|
||||
int pdf_obj_create(pdf_object_t **pdf);
|
||||
void pdf_obj_destroy(pdf_object_t **pdf);
|
||||
int pdf_obj_add(pdf_object_t **pdf, int id,
|
||||
const char * restrict object,
|
||||
const char * restrict dictionary,
|
||||
const char * restrict stream);
|
||||
int pdf_obj_del(pdf_object_t **pdf, int id);
|
||||
int pdf_obj_prepend(pdf_object_t **pdf, int id,
|
||||
const char * restrict object,
|
||||
const char * restrict dictionary,
|
||||
const char * restrict stream);
|
||||
int pdf_obj_append(pdf_object_t **pdf, int id,
|
||||
const char * restrict object,
|
||||
const char * restrict dictionary,
|
||||
const char * restrict stream);
|
||||
int pdf_obj_sort(pdf_object_t **pdf);
|
||||
|
||||
/* pdf_parser.c */
|
||||
int pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf);
|
||||
|
||||
/* pdf_writer.c */
|
||||
int pdf_dump_obj(pdf_object_t **pdf, FILE **fp);
|
||||
int pdf_dump_header(pdf_object_t **pdf, FILE **fp);
|
||||
int pdf_dump_xref(pdf_object_t **pdf, FILE **fp);
|
||||
int pdf_dump_trailer(pdf_object_t **pdf, FILE **fp, int xref);
|
||||
|
||||
/* pdf_get.c */
|
||||
int pdf_get_obj(pdf_object_t **pdf, int id, pdf_object_t **obj);
|
||||
int pdf_get_count(pdf_object_t **pdf);
|
||||
int pdf_get_size(pdf_object_t **pdf);
|
||||
int pdf_get_free_id(pdf_object_t **pdf);
|
||||
int pdf_get_free_ids(pdf_object_t **pdf, int **ids, int count);
|
||||
int pdf_get_catalog_id(pdf_object_t **pdf);
|
||||
int pdf_get_parent_id(pdf_object_t **pdf, int **id);
|
||||
int pdf_get_kid_id(pdf_object_t **pdf, int id, int **kid);
|
||||
int pdf_get_kid_count(pdf_object_t **pdf, int id);
|
134
src/pdf_cnki.c
Normal file
134
src/pdf_cnki.c
Normal file
|
@ -0,0 +1,134 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "cnki.h"
|
||||
#include "iconv.h"
|
||||
#include "pdf.h"
|
||||
|
||||
/*
|
||||
* It will write first, list, and count to *stat
|
||||
* so that when called recursively, it knows
|
||||
* what to do
|
||||
*/
|
||||
static int
|
||||
_outline(pdf_object_t **pdf, object_outline_tree_t **outline_tree, int id, int **stat)
|
||||
{
|
||||
*stat = malloc(3 * sizeof(int));
|
||||
|
||||
if (*stat == NULL)
|
||||
return 1;
|
||||
|
||||
int size;
|
||||
char *str;
|
||||
|
||||
int *ret;
|
||||
|
||||
char buf[64];
|
||||
char dictionary[1024];
|
||||
|
||||
object_outline_tree_t *ptr = *outline_tree;
|
||||
|
||||
(*stat)[0] = ptr->id;
|
||||
(*stat)[2] = 0;
|
||||
|
||||
while (ptr != NULL) {
|
||||
memset(dictionary, 0, 1024);
|
||||
|
||||
strcat(dictionary, "<<\n");
|
||||
|
||||
size = 512;
|
||||
str = NULL;
|
||||
|
||||
if (strconv(&str, "UTF-16BE",
|
||||
ptr->item->title, "GB18030",
|
||||
&size) == 0) {
|
||||
strcat(dictionary, "/Title <feff");
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
snprintf(buf, 64, "%02x", (unsigned char) str[i]);
|
||||
strcat(dictionary, buf);
|
||||
}
|
||||
|
||||
strcat(dictionary, ">\n");
|
||||
}
|
||||
|
||||
free(str);
|
||||
|
||||
snprintf(buf, 64, "/Parent %d 0 R\n", id);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
if (ptr->up != NULL && ptr->up->id != id) {
|
||||
snprintf(buf, 64, "/Prev %d 0 R\n", ptr->up->id);
|
||||
strcat(dictionary, buf);
|
||||
}
|
||||
|
||||
if (ptr->left != NULL) {
|
||||
snprintf(buf, 64, "/Next %d 0 R\n", ptr->left->id);
|
||||
strcat(dictionary, buf);
|
||||
}
|
||||
|
||||
if (ptr->right != NULL) {
|
||||
_outline(pdf, &ptr->right, ptr->id, &ret);
|
||||
|
||||
snprintf(buf, 64, "/First %d 0 R\n", ret[0]);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
snprintf(buf, 64, "/Last %d 0 R\n", ret[1]);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
snprintf(buf, 64, "/Count -%d\n", ret[2]);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
free(ret);
|
||||
}
|
||||
|
||||
/* Page starts from 0 */
|
||||
snprintf(buf, 64, "/Dest [%d /XYZ null null null]\n>>\n",
|
||||
atoi(ptr->item->page) - 1);
|
||||
strcat(dictionary, buf);
|
||||
|
||||
pdf_obj_append(pdf, ptr->id, NULL, dictionary, NULL);
|
||||
|
||||
if (ptr->left == NULL)
|
||||
(*stat)[1] = ptr->id;
|
||||
|
||||
(*stat)[2]++;
|
||||
|
||||
ptr = ptr->left;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
pdf_cnki_outline(pdf_object_t **pdf, object_outline_t **outline, int **ids)
|
||||
{
|
||||
if (*pdf == NULL || *outline == NULL || *ids == NULL)
|
||||
return 1;
|
||||
|
||||
object_outline_tree_t *outline_tree = NULL;
|
||||
cnki_outline_tree(&outline_tree, outline, *ids);
|
||||
|
||||
char buf[128];
|
||||
int *ret;
|
||||
|
||||
_outline(pdf, &outline_tree->left, outline_tree->id, &ret);
|
||||
|
||||
free(outline_tree);
|
||||
|
||||
snprintf(buf, 128,
|
||||
"<<\n/Type Outlines\n/First %d 0 R\n/Last %d 0 R\n/Count %d\n>>\n",
|
||||
ret[0], ret[1], ret[2]);
|
||||
|
||||
free(ret);
|
||||
|
||||
pdf_obj_append(pdf, (*ids)[0], NULL, buf, NULL);
|
||||
|
||||
return 0;
|
||||
}
|
7
src/pdf_cnki.h
Normal file
7
src/pdf_cnki.h
Normal file
|
@ -0,0 +1,7 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
int pdf_cnki_outline(pdf_object_t **pdf, object_outline_t **outline, int **ids);
|
296
src/pdf_get.c
Normal file
296
src/pdf_get.c
Normal file
|
@ -0,0 +1,296 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "pdf.h"
|
||||
|
||||
static int
|
||||
_id_in(int id, int *ids)
|
||||
{
|
||||
for (int i = 1; i <= ids[0]; i++)
|
||||
if (ids[i] == id)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
pdf_get_obj(pdf_object_t **pdf, int id, pdf_object_t **obj)
|
||||
{
|
||||
if (*pdf == NULL || id <= 0)
|
||||
return 1;
|
||||
|
||||
pdf_object_t *ptr = *pdf;
|
||||
while (ptr->next != NULL) {
|
||||
if (ptr->next->id == id) {
|
||||
*obj = ptr->next;
|
||||
return 0;
|
||||
}
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int
|
||||
pdf_get_count(pdf_object_t **pdf)
|
||||
{
|
||||
if (*pdf == NULL)
|
||||
return 1;
|
||||
|
||||
int count = 0;
|
||||
|
||||
pdf_object_t *ptr = (*pdf)->next;
|
||||
while (ptr != NULL) {
|
||||
count++;
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
int
|
||||
pdf_get_size(pdf_object_t **pdf)
|
||||
{
|
||||
if (*pdf == NULL)
|
||||
return 1;
|
||||
|
||||
int size = 0;
|
||||
|
||||
pdf_object_t *ptr = (*pdf)->next;
|
||||
while (ptr != NULL) {
|
||||
size += ptr->size;
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
int
|
||||
pdf_get_free_id(pdf_object_t **pdf)
|
||||
{
|
||||
if (*pdf == NULL)
|
||||
return 1;
|
||||
|
||||
int free_id = 0;
|
||||
|
||||
pdf_object_t *ptr;
|
||||
|
||||
int id = 0;
|
||||
|
||||
for (int i = 1; i < 99999999; i++) {
|
||||
ptr = (*pdf)->next;
|
||||
while (ptr != NULL) {
|
||||
if (ptr->id == i) {
|
||||
id = i;
|
||||
break;
|
||||
}
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
if (i != id) {
|
||||
free_id = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return free_id;
|
||||
}
|
||||
|
||||
int
|
||||
pdf_get_free_ids(pdf_object_t **pdf, int **ids, int count)
|
||||
{
|
||||
if (*pdf == NULL || *ids != NULL || count <= 0)
|
||||
return 1;
|
||||
|
||||
*ids = malloc(count * sizeof(int));
|
||||
|
||||
if (*ids == NULL)
|
||||
return 1;
|
||||
|
||||
int pos = 0;
|
||||
int id = 0;
|
||||
|
||||
pdf_object_t *ptr;
|
||||
for (int i = 1; i < 99999999; i++) {
|
||||
ptr = (*pdf)->next;
|
||||
while (ptr != NULL) {
|
||||
if (ptr->id == i) {
|
||||
id = i;
|
||||
break;
|
||||
}
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
if (i != id) {
|
||||
(*ids)[pos] = i;
|
||||
|
||||
if (pos == count)
|
||||
return 0;
|
||||
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int
|
||||
pdf_get_catalog_id(pdf_object_t **pdf)
|
||||
{
|
||||
if (*pdf == NULL)
|
||||
return 1;
|
||||
|
||||
int catalog_id = 0;
|
||||
|
||||
pdf_object_t *ptr = (*pdf)->next;
|
||||
|
||||
while (ptr != NULL) {
|
||||
if (ptr->dictionary != NULL &&
|
||||
strstr(ptr->dictionary, "/Catalog") != NULL)
|
||||
catalog_id = ptr->id;
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
return catalog_id;
|
||||
}
|
||||
|
||||
int
|
||||
pdf_get_parent_id(pdf_object_t **pdf, int **id)
|
||||
{
|
||||
if (*pdf == NULL || *id != NULL)
|
||||
return 1;
|
||||
|
||||
int id_size = 1;
|
||||
*id = malloc(sizeof(int));
|
||||
|
||||
if (*id == NULL)
|
||||
return 1;
|
||||
|
||||
(*id)[0] = 0;
|
||||
|
||||
pdf_object_t *ptr = (*pdf)->next;
|
||||
|
||||
char *head;
|
||||
char *tail;
|
||||
|
||||
char str[8];
|
||||
int str_val;
|
||||
|
||||
int *ret;
|
||||
|
||||
while (ptr != NULL) {
|
||||
if (ptr->dictionary != NULL &&
|
||||
(head = strstr(ptr->dictionary, "/Parent ")) != NULL &&
|
||||
(tail = strchr(head + 8, ' ')) != NULL) {
|
||||
memset(str, 0, 8);
|
||||
strncpy(str, head + 8, (tail - head) - 8);
|
||||
str_val = atoi(str);
|
||||
|
||||
if (!_id_in(str_val, *id)) {
|
||||
ret = realloc(*id, ++id_size * sizeof(int));
|
||||
|
||||
if (ret == NULL)
|
||||
return 1;
|
||||
else
|
||||
*id = ret;
|
||||
|
||||
(*id)[0]++;
|
||||
(*id)[id_size - 1] = str_val;
|
||||
}
|
||||
}
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
pdf_get_kid_id(pdf_object_t **pdf, int id, int **kid)
|
||||
{
|
||||
if (*pdf == NULL || *kid != NULL)
|
||||
return 1;
|
||||
|
||||
int kid_size = 1;
|
||||
*kid = malloc(sizeof(int));
|
||||
|
||||
if (*kid == NULL)
|
||||
return 1;
|
||||
|
||||
pdf_object_t *ptr = (*pdf)->next;
|
||||
|
||||
char str[32];
|
||||
int *ret;
|
||||
|
||||
snprintf(str, 32, "/Parent %d 0 R", id);
|
||||
|
||||
while (ptr != NULL) {
|
||||
if (ptr->id == id) {
|
||||
(*kid)[0] = 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (ptr->dictionary != NULL &&
|
||||
strstr(ptr->dictionary, str) != NULL) {
|
||||
ret = realloc(*kid, ++kid_size * sizeof(int));
|
||||
|
||||
if (ret == NULL)
|
||||
return 1;
|
||||
else
|
||||
*kid = ret;
|
||||
|
||||
(*kid)[kid_size - 1] = ptr->id;
|
||||
}
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
(*kid)[0] = kid_size - 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
pdf_get_kid_count(pdf_object_t **pdf, int id)
|
||||
{
|
||||
if (*pdf == NULL || id <= 0)
|
||||
return 1;
|
||||
|
||||
int count = 0;
|
||||
|
||||
pdf_object_t *ptr = (*pdf)->next;
|
||||
|
||||
char id_str[32];
|
||||
char *pos;
|
||||
|
||||
char str[8];
|
||||
int str_val;
|
||||
|
||||
snprintf(id_str, 32, "/Parent %d 0 R", id);
|
||||
|
||||
while (ptr != NULL) {
|
||||
if (ptr->dictionary != NULL &&
|
||||
strstr(ptr->dictionary, id_str) != NULL &&
|
||||
(pos = strstr(ptr->dictionary, "/Count ")) != NULL) {
|
||||
for (int i = 8; i >= 0; i--) {
|
||||
if (i + 7 <= ptr->dictionary_size - (pos - ptr->dictionary) &&
|
||||
pos[i + 7] >= '0' && pos[i + 7] <= '9') {
|
||||
memset(str, 0, 8);
|
||||
strncpy(str, pos + 7, i + 1);
|
||||
str_val = atoi(str);
|
||||
count += str_val;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
216
src/pdf_parser.c
Normal file
216
src/pdf_parser.c
Normal file
|
@ -0,0 +1,216 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#ifdef __linux__
|
||||
|
||||
#define _GNU_SOURCE
|
||||
|
||||
#endif /* __linux__ */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "pdf.h"
|
||||
|
||||
static void *
|
||||
_memmem_whitespace(const void *p0, size_t s0, const void *p1, size_t s1)
|
||||
{
|
||||
const char whitespace[6] = {
|
||||
'\r',
|
||||
'\n',
|
||||
'\f',
|
||||
'\t',
|
||||
'\0',
|
||||
' '
|
||||
};
|
||||
|
||||
char tmp[s1 + 1];
|
||||
memcpy(tmp, p1, s1);
|
||||
|
||||
char *ret;
|
||||
|
||||
for (int i = 0; i < 6; i++) {
|
||||
tmp[s1] = whitespace[i];
|
||||
if((ret = memmem(p0, s0, tmp, s1 + 1)) != NULL)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int
|
||||
_locate(pdf_object_t **pdf, FILE **fp, int size_buf)
|
||||
{
|
||||
pdf_object_t *ptr = *pdf;
|
||||
while (ptr->next != NULL)
|
||||
ptr = ptr->next;
|
||||
|
||||
char buf[size_buf];
|
||||
|
||||
long cur = ftell(*fp);
|
||||
long end;
|
||||
|
||||
fseek(*fp, 0, SEEK_END);
|
||||
end = ftell(*fp);
|
||||
fseek(*fp, cur, SEEK_SET);
|
||||
|
||||
int head = 0;
|
||||
int tail = 0;
|
||||
char *pos;
|
||||
char *tmp;
|
||||
|
||||
for (;;) {
|
||||
fread(buf, size_buf, 1, *fp);
|
||||
|
||||
if (head == 0 && (pos = _memmem_whitespace(buf, size_buf, " 0 obj", 6)) != NULL)
|
||||
head = cur + (pos - buf) + 7;
|
||||
|
||||
if (tail == 0 && (pos = _memmem_whitespace(buf, size_buf, "endobj", 6)) != NULL) {
|
||||
/* We need to check if it is the object stored in stream */
|
||||
while (memcmp(pos + 7,
|
||||
"\r\nendstream", 11) == 0 &&
|
||||
(tmp = _memmem_whitespace(pos + 6,
|
||||
size_buf - (pos - buf) - 6,
|
||||
"endobj", 6)) != NULL)
|
||||
pos = tmp;
|
||||
|
||||
if (pos - buf < size_buf - 7)
|
||||
tail = cur + (pos - buf);
|
||||
}
|
||||
|
||||
if (tail > head) {
|
||||
if (ptr->next == NULL) {
|
||||
ptr->next = malloc(sizeof(pdf_object_t));
|
||||
|
||||
if (ptr->next == NULL)
|
||||
return 1;
|
||||
|
||||
ptr->next->id = 0;
|
||||
ptr->next->object_size = 0;
|
||||
ptr->next->object = NULL;
|
||||
ptr->next->dictionary_size = 0;
|
||||
ptr->next->dictionary = NULL;
|
||||
ptr->next->stream_size = 0;
|
||||
ptr->next->stream = NULL;
|
||||
ptr->next->next = NULL;
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
ptr->address = head;
|
||||
ptr->size = tail - head;
|
||||
|
||||
fseek(*fp, tail + 6, SEEK_SET);
|
||||
head = tail = 0;
|
||||
} else {
|
||||
fseek(*fp, -6, SEEK_CUR);
|
||||
}
|
||||
|
||||
if ((cur = ftell(*fp)) + 6 >= end)
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
|
||||
{
|
||||
if (*pdf == NULL || *fp == NULL || size_buf < 7)
|
||||
return 1;
|
||||
|
||||
if (_locate(pdf, fp, size_buf) != 0)
|
||||
return 1;
|
||||
|
||||
pdf_object_t *ptr = (*pdf)->next;
|
||||
|
||||
char *buf;
|
||||
char *head;
|
||||
char *tail;
|
||||
char *tmp;
|
||||
|
||||
while (ptr != NULL) {
|
||||
buf = malloc(ptr->size);
|
||||
|
||||
if (buf == NULL)
|
||||
return 1;
|
||||
|
||||
memset(buf, 0, ptr->size);
|
||||
|
||||
fseek(*fp, ptr->address - 12, SEEK_SET);
|
||||
fread(buf, 8, 1, *fp);
|
||||
|
||||
for (int i = 0; i < 8; i++) {
|
||||
if (buf[i] >= '0' && buf[i] <= '9') {
|
||||
ptr->id = atoi(buf + i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fseek(*fp, ptr->address, SEEK_SET);
|
||||
fread(buf, ptr->size, 1, *fp);
|
||||
|
||||
if ((head = memmem(buf, ptr->size, "<<", 2)) != NULL &&
|
||||
(tail = _memmem_whitespace(buf, ptr->size, ">>", 2)) != NULL) {
|
||||
/* A dictionary object may have nested dictionary */
|
||||
while ((tmp = _memmem_whitespace(tail + 2,
|
||||
ptr->size - (tail - buf) - 2,
|
||||
">>", 2)) != NULL)
|
||||
tail = tmp;
|
||||
|
||||
ptr->dictionary_size = tail - head + 2;
|
||||
ptr->dictionary = malloc(ptr->dictionary_size + 1);
|
||||
|
||||
if (ptr->dictionary == NULL)
|
||||
return 1;
|
||||
|
||||
memset(ptr->dictionary, 0, ptr->dictionary_size + 1);
|
||||
memcpy(ptr->dictionary, head, ptr->dictionary_size);
|
||||
|
||||
if ((head = memmem(tail,
|
||||
ptr->size - (tail - buf),
|
||||
"stream\r\n", 8)) != NULL &&
|
||||
(tail = _memmem_whitespace(head,
|
||||
ptr->size - (head - buf),
|
||||
"endstream", 9)) != NULL) {
|
||||
/*
|
||||
* An object may contain a stream that
|
||||
* contains another object that
|
||||
* contains another stream
|
||||
*/
|
||||
while (_memmem_whitespace(tail,
|
||||
ptr->size - (tail - buf),
|
||||
"endobj", 6) != NULL &&
|
||||
(tmp = _memmem_whitespace(tail + 9,
|
||||
ptr->size - (tail - buf) - 9,
|
||||
"endstream", 9)) != NULL)
|
||||
tail = tmp;
|
||||
|
||||
ptr->stream_size = (tail - head) - 8;
|
||||
ptr->stream = malloc(ptr->stream_size);
|
||||
|
||||
if (ptr->stream == NULL)
|
||||
return 1;
|
||||
|
||||
memcpy(ptr->stream, head + 8, ptr->stream_size);
|
||||
}
|
||||
} else {
|
||||
ptr->object_size = ptr->size;
|
||||
ptr->object = malloc(ptr->object_size + 1);
|
||||
|
||||
if (ptr->object == NULL)
|
||||
return 1;
|
||||
|
||||
memset(ptr->object, 0, ptr->object_size + 1);
|
||||
memcpy(ptr->object, buf, ptr->object_size);
|
||||
}
|
||||
|
||||
free(buf);
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
188
src/pdf_writer.c
Normal file
188
src/pdf_writer.c
Normal file
|
@ -0,0 +1,188 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
#include <openssl/md5.h>
|
||||
|
||||
#include "pdf.h"
|
||||
|
||||
int
|
||||
pdf_dump_obj(pdf_object_t **pdf, FILE **fp)
|
||||
{
|
||||
if (*pdf == NULL || *fp == NULL)
|
||||
return 1;
|
||||
|
||||
long cur;
|
||||
|
||||
pdf_object_t *ptr = (*pdf)->next;
|
||||
while (ptr != NULL) {
|
||||
ptr->address = cur = ftell(*fp);
|
||||
|
||||
fprintf(*fp, "%d 0 obj\n", ptr->id);
|
||||
|
||||
if (ptr->dictionary != NULL)
|
||||
fputs(ptr->dictionary, *fp);
|
||||
else if (ptr->object != NULL)
|
||||
fputs(ptr->object, *fp);
|
||||
else if (ptr->stream == NULL)
|
||||
fputs("null\n", *fp);
|
||||
|
||||
if (ptr->stream != NULL) {
|
||||
fputs("stream\r\n", *fp);
|
||||
fwrite(ptr->stream, ptr->stream_size, 1, *fp);
|
||||
fputs("endstream\n", *fp);
|
||||
}
|
||||
|
||||
fputs("endobj\n", *fp);
|
||||
|
||||
ptr->size = ftell(*fp) - cur;
|
||||
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
pdf_dump_header(pdf_object_t **pdf, FILE **fp)
|
||||
{
|
||||
if (*pdf == NULL || *fp == NULL)
|
||||
return 1;
|
||||
|
||||
fputs("%PDF-1.7\n", *fp);
|
||||
|
||||
const unsigned char bin[4] = {
|
||||
0xf6,
|
||||
0xe4,
|
||||
0xfc,
|
||||
0xdf,
|
||||
};
|
||||
|
||||
fputs("%", *fp);
|
||||
fwrite(bin, 4, 1, *fp);
|
||||
fputs("\n", *fp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
pdf_dump_xref(pdf_object_t **pdf, FILE **fp)
|
||||
{
|
||||
if (*pdf == NULL || *fp == NULL)
|
||||
return 1;
|
||||
|
||||
fputs("xref\n", *fp);
|
||||
|
||||
pdf_object_t *ptr = *pdf;
|
||||
|
||||
pdf_object_t *start = ptr;
|
||||
int count = 1;
|
||||
|
||||
while (ptr != NULL) {
|
||||
if (ptr->next == NULL ||
|
||||
(ptr->next != NULL && ptr->next->id != ptr->id + 1)) {
|
||||
fprintf(*fp, "%d %d\n", start->id, count);
|
||||
|
||||
for (; count > 0; count--) {
|
||||
fprintf(*fp, "%010d %05d %s\r\n",
|
||||
start->address,
|
||||
start->address > 0 ? 0 : 65535,
|
||||
start->size > 0 ? "n" : "f");
|
||||
start = start->next;
|
||||
}
|
||||
|
||||
if (ptr->next != NULL)
|
||||
start = ptr->next;
|
||||
}
|
||||
|
||||
ptr = ptr->next;
|
||||
count++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
pdf_dump_trailer(pdf_object_t **pdf, FILE **fp, int xref)
|
||||
{
|
||||
if (*pdf == NULL || *fp == NULL)
|
||||
return 1;
|
||||
|
||||
fputs("trailer\n", *fp);
|
||||
|
||||
fputs("<<\n", *fp);
|
||||
|
||||
/*
|
||||
* File identifiers should be generated using
|
||||
* (a) Current time
|
||||
* (b) File path
|
||||
* (c) Size of file
|
||||
* (d) Values of all entries in the
|
||||
* file's document information dictionary
|
||||
*
|
||||
* It is recommended to be computed according to RFC 1321
|
||||
*/
|
||||
|
||||
time_t timestamp = time(NULL);
|
||||
int size = pdf_get_size(pdf);
|
||||
|
||||
int buf_size;
|
||||
char buf[64];
|
||||
|
||||
buf_size = snprintf(buf, 64, "%lx%x", timestamp, size);
|
||||
|
||||
unsigned char str[64];
|
||||
memcpy(str, buf, 64);
|
||||
|
||||
unsigned char fid[MD5_DIGEST_LENGTH];
|
||||
MD5(str, buf_size, fid);
|
||||
|
||||
pdf_object_t *ptr = *pdf;
|
||||
while (ptr->next != NULL)
|
||||
ptr = ptr->next;
|
||||
|
||||
/*
|
||||
* TODO: Document information dictionary
|
||||
* `"/Producer (Melon)"'
|
||||
* `"/CreationDate (D:YYYYMMDDHHmmSS+00'00')"'
|
||||
*
|
||||
* Trailer dictionary
|
||||
* `"/Info %d 0 R"'
|
||||
*/
|
||||
fprintf(*fp,
|
||||
"/Size %d\n/Root %d 0 R\n",
|
||||
ptr->id + 1,
|
||||
pdf_get_catalog_id(pdf));
|
||||
|
||||
fputs("/ID [", *fp);
|
||||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
fputs("<", *fp);
|
||||
|
||||
for (int j = 0; j < MD5_DIGEST_LENGTH; j++)
|
||||
fprintf(*fp, "%02x", fid[j]);
|
||||
|
||||
fputs(">", *fp);
|
||||
|
||||
if (i < 1)
|
||||
fputs(" ", *fp);
|
||||
}
|
||||
|
||||
fputs("]\n", *fp);
|
||||
|
||||
fputs(">>\n", *fp);
|
||||
|
||||
fputs("startxref\n", *fp);
|
||||
|
||||
fprintf(*fp, "%d\n", xref);
|
||||
|
||||
fputs("%%EOF\n", *fp);
|
||||
|
||||
return 0;
|
||||
}
|
10
src/version.h
Normal file
10
src/version.h
Normal file
|
@ -0,0 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2020, yzrh <yzrh@tuta.io>
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#define VERSION "0"
|
||||
#define RELEASE "1"
|
||||
#define PATCH "0"
|
||||
#define EXTRA ""
|
Loading…
Reference in a new issue