/* * Copyright (c) 2000-2001 * Kevin Atkinson * * Permission to use, copy, modify, distribute and sell this software * and its documentation for any purpose is hereby granted without * fee, provided that the above copyright notice appear in all copies * and that both that copyright notice and this permission notice * appear in supporting documentation. Kevin Atkinson makes no * representations about the suitability of this software for any * purpose. It is provided "as is" without express or implied * warranty. * */ #include #if defined(__CYGWIN__) || defined (_WIN32) # include # include # define SETBIN(fno) _setmode( _fileno( fno ), _O_BINARY ) #else # define SETBIN(fno) #endif #define BUFSIZE 256 void usage () { fputs("Compresses or uncompresses sorted word lists.\n" , stderr); fputs("For best result the locale should be set to C\n" , stderr); fputs("before sorting by setting the environmental\n" , stderr); fputs("variable LANG to \"C\" before sorting.\n" , stderr); fputs("Copyright 2001,2004 by Kevin Atkinson.\n" , stderr); fputs("Usage: word-list-compress c[ompress]|d[ecompress]\n" , stderr); } // PRECOND: bufsize >= 2 int get_word(FILE * in, char * w) { int bufsize = BUFSIZE - 1; register int c; while (c = getc(in), c != EOF && c <= 32); if (c != EOF) { do { *w++ = (char)(c); } while (c = getc(in), c != EOF && c > 32 && --bufsize); } //printf(" *** %d ***",bufsize); /******remove this line******/ //printf(" *** %d ***\n", (BUFSIZE - bufsize)); /******remove this line******/ *w = '\0'; ungetc(c, in); if (c == EOF) return 0; /* done */ if (bufsize) return 1; /* normal return */ return 2; /* error, word larger than 255 chars */ } int main (int argc, const char *argv[]) { if (argc == 2) { char c = argv[1][0]; if (c == '-') c = argv[1][1]; if (c == 'v') { fputs("version X\n",stderr); return 0; } if (c == 'c') { char s1[BUFSIZE]; char s2[BUFSIZE]; char * prev = s2; char * cur = s1; *prev = '\0'; int errFlag; SETBIN (stdout); while ((errFlag = get_word(stdin, cur)) == 1) { int i = 0; /* get the length of the prefix */ while (prev[i] != '\0' && prev[i] == cur[i]) ++i; if (i > 31) { if (putc('\0', stdout) < 0) goto error_out_c; } if (putc(i+1, stdout) < 0) goto error_out_c; if (fputs(cur+i, stdout) < 0) goto error_out_c; if (cur == s1) { prev = s1; cur = s2; } else { prev = s2; cur = s1; } } if (fflush(stdout) < 0) goto error_out_c; if (errFlag) goto error_in_c; return 0; } if (c == 'd') { char cur[BUFSIZE+1]; int i; int c; SETBIN (stdin); i = getc(stdin); while (i != -1 ) { if (i == 0) i = getc(stdin); --i; if (i < 0) goto error_in_d; while ((c = getc(stdin)) > 32 && i < BUFSIZE) cur[i++] = (char)c; if (i >= BUFSIZE) goto error_in_d; /* one "puts" faster than 2 puts */ cur[i] = '\n'; cur[++i] = '\0'; if (fputs(cur, stdout) < 0) goto error_out_d; i = c; } return 0; error_in_c: error_in_d: fputs("ERROR: Corrupt Input.\n", stderr); return 2; error_out_c: error_out_d: /* output space full or other output fault */ fputs("ERROR: Output Data Error.\n", stderr); return 3; } } usage(); return 1; }