Fix inconsistent whitespace detection in PDF parser.

Signed-off-by: yzrh <yzrh@noema.org>
This commit is contained in:
yzrh 2023-01-02 23:40:54 +00:00
parent 7d9d658461
commit 4a02b8bfc7
2 changed files with 23 additions and 13 deletions

View file

@ -8,6 +8,7 @@
* Improve PDF parser. * Improve PDF parser.
* Handle duplicated object in CAJ. * Handle duplicated object in CAJ.
* Handle duplicated image in HN.
* Fix JBIG decoder. * Fix JBIG decoder.
0.2.4 (2022-12-31) 0.2.4 (2022-12-31)

View file

@ -19,26 +19,35 @@ static void *
_memmem_whitespace(const void *p0, size_t s0, const void *p1, size_t s1) _memmem_whitespace(const void *p0, size_t s0, const void *p1, size_t s1)
{ {
const char whitespace[6] = { const char whitespace[6] = {
' ', 0x00,
'\r', 0x09,
'\n', 0x0a,
'\f', 0x0c,
'\t', 0x0d,
'\0' 0x20
}; };
char tmp[s1 + 1]; char *ret = NULL;
memcpy(tmp, p1, s1);
char *ret; char str[s1 + 1];
memcpy(str, p1, s1);
size_t tmp_size = 0;
char *tmp;
for (int i = 0; i < 6; i++) { for (int i = 0; i < 6; i++) {
tmp[s1] = whitespace[i]; str[s1] = whitespace[i];
if ((ret = memmem(p0, s0, tmp, s1 + 1)) != NULL)
return ret; if ((tmp = memmem(p0, s0, str, s1 + 1)) == NULL)
continue;
if (tmp_size == 0 || (size_t) (tmp - (char *) p0) < tmp_size) {
tmp_size = tmp - (char *) p0;
ret = tmp;
}
} }
return NULL; return ret;
} }
static int static int