Handle invalid PDF object.

Signed-off-by: yzrh <yzrh@noema.org>
This commit is contained in:
yzrh 2023-01-04 17:19:06 +00:00
parent 8cd8a8fbba
commit c2afbb3cbc

View file

@ -79,8 +79,25 @@ _locate(pdf_object_t **pdf, FILE **fp, int size_buf)
memset(buf + end - cur, 0, size_buf - end + cur); memset(buf + end - cur, 0, size_buf - end + cur);
} }
if (head == 0 && (pos = _memmem_whitespace(buf, size_buf, " 0 obj", 6)) != NULL) if (head == 0) {
head = cur + (pos - buf) + 7; /* Hack needed for invalid object */
pos = _memmem_whitespace(buf, size_buf, " 0 obj", 6);
tmp = memmem(buf, size_buf, " 0 obj", 6);
while (tmp != NULL && tmp[6] != 0x3c && tmp[6] != 0x5b)
tmp = memmem(tmp + 6, size_buf - (tmp - buf) - 6, " 0 obj", 6);
if (pos != NULL && tmp != NULL) {
if (pos - buf < tmp - buf)
head = cur + (pos - buf) + 7;
else
head = cur + (tmp - buf) + 6;
} else if (pos != NULL) {
head = cur + (pos - buf) + 7;
} else if (tmp != NULL) {
head = cur + (tmp - buf) + 6;
}
}
if (tail == 0 && (pos = _memmem_whitespace(buf, size_buf, "endobj", 6)) != NULL) { if (tail == 0 && (pos = _memmem_whitespace(buf, size_buf, "endobj", 6)) != NULL) {
/* We need to check if it is the object stored in stream */ /* We need to check if it is the object stored in stream */
@ -156,9 +173,46 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
if (buf == NULL) if (buf == NULL)
return 1; return 1;
fseek(*fp, ptr->address - 15, SEEK_SET); fseek(*fp, ptr->address, SEEK_SET);
fread(buf, ptr->size, 1, *fp);
/* Handle incomplete object */
head = buf;
while ((tmp = _memmem_whitespace(head,
ptr->size - (head - buf),
" 0 obj", 6)) != NULL)
head = tmp + 7;
/* Hack needed for invalid object */
while ((tmp = memmem(head,
ptr->size - (head - buf),
" 0 obj", 6)) != NULL)
head = tmp + 6;
if (head - buf > 0) {
ptr->address += head - buf;
ptr->size -= head - buf;
tmp = realloc(buf, ptr->size);
if (tmp == NULL)
return 1;
buf = tmp;
fseek(*fp, ptr->address, SEEK_SET);
fread(buf, ptr->size, 1, *fp);
}
/* Hack needed for invalid object */
fseek(*fp, ptr->address - 14, SEEK_SET);
fread(str, 8, 1, *fp); fread(str, 8, 1, *fp);
if (str[7] < '0' || str[7] > '9') {
fseek(*fp, ptr->address - 15, SEEK_SET);
fread(str, 8, 1, *fp);
}
for (int i = 7; i >= 0; i--) { for (int i = 7; i >= 0; i--) {
if (str[i] < '0' || str[i] > '9') { if (str[i] < '0' || str[i] > '9') {
if (i < 7) if (i < 7)
@ -170,11 +224,10 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
} }
} }
fseek(*fp, ptr->address, SEEK_SET);
fread(buf, ptr->size, 1, *fp);
if ((head = memmem(buf, ptr->size, "<<", 2)) != NULL && if ((head = memmem(buf, ptr->size, "<<", 2)) != NULL &&
(tail = _memmem_whitespace(buf, ptr->size, ">>", 2)) != NULL) { ((tail = _memmem_whitespace(buf, ptr->size, ">>", 2)) != NULL ||
/* Hack needed for invalid object */
(tail = memmem(buf, ptr->size, ">>", 2)) != NULL)) {
/* /*
* A dictionary object may have nested dictionary, * A dictionary object may have nested dictionary,
* but it should not be in a stream * but it should not be in a stream
@ -187,6 +240,15 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
"stream\r\n", 8) == NULL) "stream\r\n", 8) == NULL)
tail = tmp; tail = tmp;
/* Hack needed for invalid object */
while ((tmp = memmem(tail + 2,
ptr->size - (tail - buf) - 2,
">>", 2)) != NULL &&
memmem(tail + 2,
(tmp - tail) - 2,
"stream\r\n", 8) == NULL)
tail = tmp;
ptr->dictionary_size = tail - head + 2; ptr->dictionary_size = tail - head + 2;
ptr->dictionary = malloc(ptr->dictionary_size + 1); ptr->dictionary = malloc(ptr->dictionary_size + 1);
@ -226,27 +288,8 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
free(buf); free(buf);
} else { } else {
/* Handle incomplete object */ ptr->object_size = ptr->size;
head = buf; ptr->object = buf;
while ((tmp = _memmem_whitespace(head,
ptr->size - (head - buf),
" 0 obj", 6)) != NULL)
head = tmp + 7;
if (head - buf > 0) {
ptr->object_size = ptr->size - (head - buf);
ptr->object = malloc(ptr->object_size);
if (ptr->object == NULL)
return 1;
memcpy(ptr->object, head, ptr->object_size);
free(buf);
} else {
ptr->object_size = ptr->size;
ptr->object = buf;
}
} }
ptr = ptr->next; ptr = ptr->next;