Fix inconsistent whitespace detection in PDF parser.

Signed-off-by: yzrh <yzrh@noema.org>
This commit is contained in:
yzrh 2023-01-02 23:40:54 +00:00
parent 7d9d658461
commit 4a02b8bfc7
2 changed files with 23 additions and 13 deletions

View file

@ -8,6 +8,7 @@
* Improve PDF parser.
* Handle duplicated object in CAJ.
* Handle duplicated image in HN.
* Fix JBIG decoder.
0.2.4 (2022-12-31)

View file

@ -19,26 +19,35 @@ static void *
_memmem_whitespace(const void *p0, size_t s0, const void *p1, size_t s1)
{
const char whitespace[6] = {
' ',
'\r',
'\n',
'\f',
'\t',
'\0'
0x00,
0x09,
0x0a,
0x0c,
0x0d,
0x20
};
char tmp[s1 + 1];
memcpy(tmp, p1, s1);
char *ret = NULL;
char *ret;
char str[s1 + 1];
memcpy(str, p1, s1);
size_t tmp_size = 0;
char *tmp;
for (int i = 0; i < 6; i++) {
tmp[s1] = whitespace[i];
if ((ret = memmem(p0, s0, tmp, s1 + 1)) != NULL)
return ret;
str[s1] = whitespace[i];
if ((tmp = memmem(p0, s0, str, s1 + 1)) == NULL)
continue;
if (tmp_size == 0 || (size_t) (tmp - (char *) p0) < tmp_size) {
tmp_size = tmp - (char *) p0;
ret = tmp;
}
}
return NULL;
return ret;
}
static int