/* -*- mode: C -*-
*
* File: pdf-stm-f-lzw.c
* Date: Wed Aug 15 14:41:18 2007
*
* GNU PDF Library - LZW encoder/decoder filter
*
*/
/* Copyright (C) 2007-2011 Free Software Foundation, Inc. */
/* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
#include
#include
#include
#include
#include
#include
#include
#include
/* Define LZW encoder */
PDF_STM_FILTER_DEFINE (pdf_stm_f_lzwenc_get,
stm_f_lzwenc_init,
stm_f_lzwenc_apply,
stm_f_lzwenc_deinit);
/* Define LZW decoder */
PDF_STM_FILTER_DEFINE (pdf_stm_f_lzwdec_get,
stm_f_lzwdec_init,
stm_f_lzwdec_apply,
stm_f_lzwdec_deinit);
#define LZW_PARAM_EARLY_CHANGE "EarlyChange"
/* -- LZW helper definitions -- */
typedef unsigned lzw_code_t;
#define LZW_DEFAULT_EARLY_CHANGE 1
#define LZW_CODE_SIZE (sizeof (lzw_code_t) << 3)
#define LZW_CACHE_SIZE (sizeof (lzw_code_t) << 3)
#define LZW_MIN_BITSIZE 9
#define LZW_MAX_BITSIZE 12
#define LZW_MAX_DICTSIZE (1 << LZW_MAX_BITSIZE)
#define LZW_NULL_INDEX ~0U
enum lzw_special_codes_e {
LZW_RESET_CODE = 256,
LZW_EOD_CODE,
LZW_FIRST_CODE
};
/* -- LZW code output/input -- */
/*
* Object to read and write codes of variable bitsize in a buffer.
* Warning: using both get and put functions may break the buffer.
*/
struct lzw_buffer_s
{
pdf_buffer_t *buf;
pdf_uchar_t cache [LZW_CACHE_SIZE];
pdf_size_t cache_rp;
pdf_size_t cache_wp;
lzw_code_t valbuf;
lzw_code_t maxval;
int bitsize;
int valbits;
};
static void
lzw_buffer_init (struct lzw_buffer_s *b,
int bitsize)
{
b->buf = NULL;
b->valbuf = 0;
b->valbits = 0;
b->bitsize = bitsize;
b->maxval = (1 << bitsize) - 1;
b->cache_wp = 0;
b->cache_rp = 0;
}
static enum pdf_stm_filter_apply_status_e
lzw_buffer_get_code (struct lzw_buffer_s *b,
pdf_bool_t finish,
lzw_code_t *code) /* out */
{
lzw_code_t r;
while (b->valbits <= LZW_CODE_SIZE - 8 && !finish)
{
if (!pdf_buffer_eob_p (b->buf))
{
b->valbuf |=
(lzw_code_t) b->buf->data [b->buf->rp++]
<< (LZW_CODE_SIZE - 8 - b->valbits);
b->valbits += 8;
}
else
return PDF_STM_FILTER_APPLY_STATUS_NO_INPUT;
}
if (b->valbits < b->bitsize)
return PDF_STM_FILTER_APPLY_STATUS_EOF;
r = b->valbuf >> (LZW_CODE_SIZE - b->bitsize);
b->valbuf <<= b->bitsize;
b->valbits -= b->bitsize;
*code = r;
return PDF_STM_FILTER_APPLY_STATUS_OK;
}
/* Once finished, call with 0 as code value to flush the buffer. */
static void
lzw_buffer_put_code (struct lzw_buffer_s *b,
lzw_code_t code)
{
b->valbuf |= (lzw_code_t) code << (LZW_CODE_SIZE - b->bitsize - b->valbits);
b->valbits += b->bitsize;
while (b->valbits >= 8)
{
if (pdf_buffer_full_p (b->buf))
{
b->cache [b->cache_wp++] =
(pdf_uchar_t) (b->valbuf >> (LZW_CODE_SIZE - 8));
}
else
{
b->buf->data [b->buf->wp++] =
(pdf_uchar_t) (b->valbuf >> (LZW_CODE_SIZE - 8));
}
b->valbuf <<= 8;
b->valbits -= 8;
}
}
static pdf_bool_t
lzw_buffer_flush (struct lzw_buffer_s *b)
{
while (b->cache_rp != b->cache_wp &&
!pdf_buffer_full_p (b->buf))
{
b->buf->data [b->buf->wp++] = b->cache [b->cache_rp++];
}
if (pdf_buffer_full_p (b->buf))
return PDF_FALSE;
b->cache_wp = 0;
b->cache_rp = 0;
return PDF_TRUE;
}
static pdf_bool_t
lzw_buffer_inc_bitsize (struct lzw_buffer_s *b)
{
if (b->bitsize == LZW_MAX_BITSIZE)
return PDF_FALSE;
++b->bitsize;
b->maxval = (1 << b->bitsize) - 1;
return PDF_TRUE;
}
static void
lzw_buffer_set_bitsize (struct lzw_buffer_s *b,
int newsize)
{
b->bitsize = newsize;
b->maxval = (1 << newsize) - 1;
}
/* -- LZW dictionary handler -- */
/*
* The strings are stored in a non balanced ordered binary tree.
*/
struct lzw_string_s
{
lzw_code_t prefix; /* Prefix string code */
pdf_uchar_t suffix; /* Appended character */
lzw_code_t first; /* First string with the same prefix. */
lzw_code_t left; /* Next string with smaller suffix and same prefix. */
lzw_code_t right; /* Next string with greater suffix and same prefix. */
};
struct lzw_dict_s
{
struct lzw_string_s table [LZW_MAX_DICTSIZE];
pdf_size_t size;
};
static void
lzw_string_init (struct lzw_string_s *s)
{
memset (s, 0xFF, sizeof (struct lzw_string_s));
}
static void
lzw_dict_init (struct lzw_dict_s *d)
{
int i;
memset (d->table,
0xFF,
sizeof (struct lzw_string_s) * LZW_MAX_DICTSIZE);
for (i = 0; i < LZW_FIRST_CODE; i++)
{
d->table[i].suffix = (pdf_uchar_t) (i & 0xFF);
}
d->size = LZW_FIRST_CODE;
}
static pdf_bool_t
lzw_dict_add (struct lzw_dict_s *d,
struct lzw_string_s *s)
{
lzw_code_t index;
pdf_bool_t must_add;
if (s->prefix == LZW_NULL_INDEX)
{
s->prefix = s->suffix;
return PDF_FALSE; /* The string is a basic character, found! */
}
index = d->table[s->prefix].first;
if (index == LZW_NULL_INDEX)
{
d->table[s->prefix].first = d->size;
}
else
{
must_add = PDF_FALSE;
while (!must_add)
{
if (s->suffix == d->table[index].suffix)
{
s->prefix = index;
return PDF_FALSE; /* The string is already in the table, found! */
}
else if (s->suffix < d->table[index].suffix)
{
if (d->table[index].left == LZW_NULL_INDEX)
{
d->table[index].left = d->size;
must_add = PDF_TRUE;
}
else
{
index = d->table[index].left;
}
}
else
{
if (d->table[index].right == LZW_NULL_INDEX)
{
d->table[index].right = d->size;
must_add = PDF_TRUE;
}
else
{
index = d->table[index].right;
}
}
}
}
d->table[d->size++] = *s;
return PDF_TRUE;
}
static void
lzw_dict_reset (struct lzw_dict_s *dict)
{
lzw_dict_init (dict);
}
static void
lzw_dict_fast_add (struct lzw_dict_s *d,
lzw_code_t prefix,
pdf_uchar_t suffix)
{
d->table[d->size].prefix = prefix;
d->table[d->size].suffix = suffix;
d->size++;
}
static void
lzw_dict_decode (struct lzw_dict_s *d,
lzw_code_t code,
pdf_uchar_t **decode,
pdf_size_t *size)
{
*size = 0;
do
{
*(*decode)-- = d->table[code].suffix;
++(*size);
code = d->table[code].prefix;
}
while (code != LZW_NULL_INDEX);
(*decode)++;
}
/* -- THE ENCODER -- */
struct lzwenc_state_s
{
/* cached params */
pdf_i32_t early_change;
/* encoding state */
pdf_bool_t must_reset;
struct lzw_buffer_s buffer;
struct lzw_dict_s dict;
struct lzw_string_s string;
pdf_bool_t really_finish;
};
static pdf_bool_t
stm_f_lzwenc_init (const pdf_hash_t *params,
void **state,
pdf_error_t **error)
{
struct lzwenc_state_s *filter_state;
filter_state = pdf_alloc (sizeof (struct lzwenc_state_s));
if (!filter_state)
{
pdf_set_error (error,
PDF_EDOMAIN_BASE_STM,
PDF_ENOMEM,
"cannot create LZW encoder internal state: "
"couldn't allocate %lu bytes",
(unsigned long)sizeof (struct lzwenc_state_s));
return PDF_FALSE;
}
filter_state->early_change = LZW_DEFAULT_EARLY_CHANGE; /* set default */
/* EarlyChange is optional! */
if (params &&
pdf_hash_key_p (params, LZW_PARAM_EARLY_CHANGE))
{
filter_state->early_change = pdf_hash_get_bool (params,
LZW_PARAM_EARLY_CHANGE);
}
lzw_buffer_init (&filter_state->buffer, LZW_MIN_BITSIZE);
lzw_dict_init (&filter_state->dict);
lzw_string_init (&filter_state->string);
filter_state->must_reset = PDF_TRUE;
filter_state->really_finish = PDF_FALSE;
*state = filter_state;
return PDF_TRUE;
}
static enum pdf_stm_filter_apply_status_e
stm_f_lzwenc_apply (void *state,
pdf_buffer_t *in,
pdf_buffer_t *out,
pdf_bool_t finish,
pdf_error_t **error)
{
struct lzwenc_state_s *st;
st = state;
st->buffer.buf = out;
if (!lzw_buffer_flush (&st->buffer))
return PDF_STM_FILTER_APPLY_STATUS_NO_OUTPUT;
if (st->really_finish)
return PDF_STM_FILTER_APPLY_STATUS_EOF;
if (st->must_reset)
{
lzw_buffer_put_code (&st->buffer, LZW_RESET_CODE);
st->must_reset = PDF_FALSE;
}
while (!pdf_buffer_eob_p (in) &&
!pdf_buffer_full_p (out))
{
st->string.suffix = in->data [in->rp++];
/* FIXME: only test if value is present (use lzw_dict_find)
*
* lzw_dict_add tests for the longest matching input and if not
* matching it adds a new entry into dict. Adding to dict at this point
* makes trouble with bitsize increase.
* As stated in ISO32000 bitsize increase shall happen after inserting
* table entry 511. This is what we do. But the algorithm steps in
* ISO32000 are:
* 1) Accumulate longest input sequence
* 2) Emmit Code
* 3) Create new table entry
*
* We actually do:
* 1,3) inside of lzw_dict_add
* 2)
*/
if (lzw_dict_add (&st->dict, &st->string))
{
lzw_buffer_put_code (&st->buffer, st->string.prefix);
/* FIXME: insert new dict entry here */
st->string.prefix = st->string.suffix;
/* FIXME: first bitsize increase (EarlyChange 0) will happen if
* (511 == 511 - 0) but this is one code to early.
* dict.size is 511 after adding entry 510. ISO32000 specify bitsize
* increase after emitting entry 511 (dict.size is 512 at this point)
* We should test with (st->dict.size - 1 == ...)
*/
if (st->dict.size == st->buffer.maxval - st->early_change)
{
PDF_DEBUG_BASE ("[LZW encoder] Increasing bitsize... "
"(dictsize[%d] == maxval[%d] - earlychange[%d])",
st->dict.size,
st->buffer.maxval,
st->early_change);
if (!lzw_buffer_inc_bitsize (&st->buffer))
{
lzw_buffer_put_code (&st->buffer, LZW_RESET_CODE);
lzw_buffer_set_bitsize (&st->buffer, LZW_MIN_BITSIZE);
lzw_dict_reset (&st->dict);
}
}
}
}
if (finish)
{
lzw_buffer_put_code (&st->buffer, st->string.prefix);
if (st->dict.size == st->buffer.maxval - st->early_change)
{
PDF_DEBUG_BASE ("[LZW encoder] Increasing bitsize (FINISHING)... "
"(dictsize[%d] == maxval[%d] - earlychange[%d])",
st->dict.size,
st->buffer.maxval,
st->early_change);
lzw_buffer_inc_bitsize (&st->buffer);
}
lzw_buffer_put_code (&st->buffer, LZW_EOD_CODE);
lzw_buffer_put_code (&st->buffer, 0); /* flush */
/* Delete 0 byte if buffer was already aligned before flush. */
if (out->data[out->wp - 1] == 0x0)
out->wp--;
st->really_finish = PDF_TRUE;
return (pdf_buffer_full_p (out) ?
PDF_STM_FILTER_APPLY_STATUS_NO_OUTPUT :
PDF_STM_FILTER_APPLY_STATUS_EOF);
}
if (pdf_buffer_full_p (out))
return PDF_STM_FILTER_APPLY_STATUS_NO_OUTPUT;
/* Default, request more input */
return PDF_STM_FILTER_APPLY_STATUS_NO_INPUT;
}
static void
stm_f_lzwenc_deinit (void *state)
{
pdf_dealloc (state);
}
/* -- THE DECODER -- */
enum lzwdec_state
{
LZWDEC_STATE_START,
LZWDEC_STATE_CLEAN,
LZWDEC_STATE_WRITE,
LZWDEC_STATE_READ,
LZWDEC_STATE_LOOP_WRITE,
LZWDEC_STATE_LOOP_READ
};
struct lzwdec_state_s
{
/* cached params */
pdf_i32_t early_change;
/* state */
pdf_uchar_t dec_buf [LZW_MAX_DICTSIZE];
pdf_uchar_t *decoded;
pdf_size_t dec_size;
lzw_code_t new_code;
lzw_code_t old_code;
/* flow managment */
enum lzwdec_state state_pos;
pdf_error_t *tmp_error;
struct lzw_buffer_s buffer;
struct lzw_dict_s dict;
};
static pdf_bool_t
stm_f_lzwdec_init (const pdf_hash_t *params,
void **state,
pdf_error_t **error)
{
struct lzwdec_state_s *filter_state;
filter_state = pdf_alloc (sizeof (struct lzwdec_state_s));
if (!filter_state)
{
pdf_set_error (error,
PDF_EDOMAIN_BASE_STM,
PDF_ENOMEM,
"cannot create LZW decoder internal state: "
"couldn't allocate %lu bytes",
(unsigned long)sizeof (struct lzwdec_state_s));
return PDF_FALSE;
}
filter_state->early_change = LZW_DEFAULT_EARLY_CHANGE; /* set default */
/* EarlyChange is optional! */
if (params &&
pdf_hash_key_p (params, LZW_PARAM_EARLY_CHANGE))
{
filter_state->early_change = pdf_hash_get_bool (params, LZW_PARAM_EARLY_CHANGE);
}
lzw_buffer_init (&filter_state->buffer, LZW_MIN_BITSIZE);
lzw_dict_init (&filter_state->dict);
filter_state->old_code = LZW_NULL_INDEX;
filter_state->decoded = filter_state->dec_buf + (LZW_MAX_DICTSIZE-2);
filter_state->dec_size = 0;
filter_state->state_pos = LZWDEC_STATE_START;
filter_state->tmp_error = NULL;
*state = filter_state;
return PDF_TRUE;
}
static void
stm_f_lzwdec_deinit (void *state)
{
pdf_dealloc (state);
}
/* Returns PDF_FALSE if no more output */
static pdf_bool_t
lzwdec_put_decoded (struct lzwdec_state_s *st,
pdf_buffer_t *out)
{
pdf_bool_t no_output = PDF_FALSE;
if (st->dec_size)
{
pdf_size_t to_write;
PDF_ASSERT (out->size >= out->wp);
/* output the decoded string */
to_write = st->dec_size;
if (st->dec_size > (out->size - out->wp))
{
to_write = out->size - out->wp;
no_output = PDF_TRUE;
}
memcpy (out->data + out->wp, st->decoded, to_write);
out->wp += to_write;
st->decoded += to_write;
st->dec_size -= to_write;
}
return !no_output;
}
/* Returns PDF_FALSE if no more output */
static pdf_bool_t
lzwdec_put_code (struct lzwdec_state_s *st,
pdf_buffer_t *out,
unsigned long code)
{
if (pdf_buffer_full_p (out))
return PDF_FALSE;
out->data [out->wp++] = (pdf_uchar_t) (code & 0xFF);
return PDF_TRUE;
}
#define LZWDEC_CHECK(st, pos, what) \
do { \
enum pdf_stm_filter_apply_status_e ret; \
\
(st)->state_pos = (pos); \
if ((ret = (what)) != PDF_STM_FILTER_APPLY_STATUS_OK) \
{ return ret; } \
} while (0);
static enum pdf_stm_filter_apply_status_e
stm_f_lzwdec_apply (void *state,
pdf_buffer_t *in,
pdf_buffer_t *out,
pdf_bool_t finish,
pdf_error_t **error)
{
struct lzwdec_state_s *st;
st = state;
st->buffer.buf = in;
switch (st->state_pos)
{
case LZWDEC_STATE_START:
break;
case LZWDEC_STATE_CLEAN:
goto lzwdec_state_clean;
case LZWDEC_STATE_WRITE:
goto lzwdec_state_write;
case LZWDEC_STATE_READ:
goto lzwdec_state_read;
case LZWDEC_STATE_LOOP_WRITE:
goto lzwdec_state_loop_write;
case LZWDEC_STATE_LOOP_READ:
goto lzwdec_state_loop_read;
default:
break;
}
do
{
/* need a reset */
lzw_buffer_set_bitsize (&st->buffer, LZW_MIN_BITSIZE);
lzw_dict_reset (&st->dict);
do
{
lzwdec_state_clean:
LZWDEC_CHECK (st,
LZWDEC_STATE_CLEAN,
lzw_buffer_get_code (&st->buffer,
finish,
&st->new_code));
}
while (st->new_code == LZW_RESET_CODE);
if (st->new_code != LZW_EOD_CODE)
{
lzwdec_state_write:
st->state_pos = LZWDEC_STATE_WRITE;
if (!lzwdec_put_code (st, out, st->new_code))
return PDF_STM_FILTER_APPLY_STATUS_NO_OUTPUT; /* No more output */
st->old_code = st->new_code;
lzwdec_state_read:
LZWDEC_CHECK (st,
LZWDEC_STATE_READ,
lzw_buffer_get_code (&st->buffer,
finish,
&st->new_code));
}
while (st->new_code != LZW_EOD_CODE &&
st->new_code != LZW_RESET_CODE)
{
st->decoded = st->dec_buf + (LZW_MAX_DICTSIZE - 2);
/* Is new code in the dict? */
if (st->new_code < st->dict.size)
{
lzw_dict_decode (&st->dict, st->new_code,
&st->decoded, &st->dec_size);
lzw_dict_fast_add (&st->dict, st->old_code, st->decoded[0]);
}
else
{
lzw_dict_decode (&st->dict, st->old_code,
&st->decoded, &st->dec_size);
lzw_dict_fast_add (&st->dict, st->old_code, st->decoded[0]);
st->decoded [st->dec_size++] = st->decoded [0];
}
/* output the decoded string */
lzwdec_state_loop_write:
st->state_pos = LZWDEC_STATE_LOOP_WRITE;
if (!lzwdec_put_decoded (st, out))
return PDF_STM_FILTER_APPLY_STATUS_NO_OUTPUT; /* No more output */
if (st->dict.size == st->buffer.maxval - 1 - st->early_change)
{
lzw_buffer_inc_bitsize (&st->buffer);
/* break; We must wait for the reset code, don't reset yet. */
}
/* get next code */
st->old_code = st->new_code;
lzwdec_state_loop_read:
LZWDEC_CHECK (st,
LZWDEC_STATE_LOOP_READ,
lzw_buffer_get_code (&st->buffer,
finish,
&st->new_code));
}
}
while (st->new_code != LZW_EOD_CODE);
st->state_pos = LZWDEC_STATE_START;
return PDF_STM_FILTER_APPLY_STATUS_EOF;
}
/* End of pdf_stm_f_lzw.c */