>From a4381d8b90acf092b0823a9c05448e69505a2199 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Tue, 2 Apr 2019 10:26:30 -0700 Subject: [PATCH] Improve IBM Z patch Most of this is minor changes to use GNU style and C99 constructs. * NEWS: Mention IBM Z. * bootstrap.conf (gnulib_modules): Add stdalign. * dfltcc.c: Include stdalign.h, stdbool.h. (union aligned_dfltcc_qaf_param, union aligned_dfltcc_param_v0): New types, used for C11-style alignment. All uses changed. (init_param): * gzip.c (BUFFER_ALIGNED): New macro. (inbuf, outbuf, window): Use it, so buffers are aligned everywhere. * gzip.h (INBUFSIZ, OUTBUFSIZE): Use big buffers everywhere, unless SMALL_MEM. * zip.c (SLOW, FAST): Now enums since they need not be macros: --- NEWS | 4 + bootstrap.conf | 1 + deflate.c | 8 +- dfltcc.c | 632 +++++++++++++++++++++++++------------------------ gzip.c | 27 +-- gzip.h | 16 +- lib/.gitignore | 1 + m4/.gitignore | 1 + unzip.c | 2 +- util.c | 30 +-- zip.c | 24 +- 11 files changed, 376 insertions(+), 370 deletions(-) diff --git a/NEWS b/NEWS index 8a6b285..dbe13a5 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,10 @@ GNU gzip NEWS -*- outline -*- * Noteworthy changes in release ?.? (????-??-??) [?] +** Performance improvements + + IBM Z platforms now support hardware-accelerated deflation. + * Noteworthy changes in release 1.10 (2018-12-29) [stable] diff --git a/bootstrap.conf b/bootstrap.conf index 163ef82..33c69ae 100644 --- a/bootstrap.conf +++ b/bootstrap.conf @@ -54,6 +54,7 @@ realloc-gnu savedir sigaction stat-time +stdalign stdnoreturn sys_stat time diff --git a/deflate.c b/deflate.c index 869b902..eb697af 100644 --- a/deflate.c +++ b/deflate.c @@ -718,7 +718,8 @@ local off_t deflate_fast() * evaluation for matches: a match is finally adopted only if there is * no better match at the next window position. */ -off_t deflate(int pack_level) +off_t +deflate (int pack_level) { IPos hash_head; /* head of hash chain */ IPos prev_match; /* previous match */ @@ -726,8 +727,9 @@ off_t deflate(int pack_level) int match_available = 0; /* set if previous match exists */ register unsigned match_length = MIN_MATCH-1; /* length of best match */ - lm_init(pack_level); - if (pack_level <= 3) return deflate_fast(); /* optimized for speed */ + lm_init (pack_level); + if (pack_level <= 3) + return deflate_fast(); /* Process the input block. */ while (lookahead != 0) { diff --git a/dfltcc.c b/dfltcc.c index 9010475..ba62968 100644 --- a/dfltcc.c +++ b/dfltcc.c @@ -17,16 +17,20 @@ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include + +#include +#include #include + #ifdef DFLTCC_USDT -#include +# include #endif #include "tailor.h" #include "gzip.h" #ifdef DYN_ALLOC - error: DYN_ALLOC is not supported by DFLTCC +# error "DYN_ALLOC is not supported by DFLTCC" #endif /* =========================================================================== @@ -35,11 +39,11 @@ typedef enum { - DFLTCC_CC_OK = 0, - DFLTCC_CC_OP1_TOO_SHORT = 1, - DFLTCC_CC_OP2_TOO_SHORT = 2, - DFLTCC_CC_OP2_CORRUPT = 2, - DFLTCC_CC_AGAIN = 3, + DFLTCC_CC_OK = 0, + DFLTCC_CC_OP1_TOO_SHORT = 1, + DFLTCC_CC_OP2_TOO_SHORT = 2, + DFLTCC_CC_OP2_CORRUPT = 2, + DFLTCC_CC_AGAIN = 3, } dfltcc_cc; #define DFLTCC_QAF 0 @@ -47,383 +51,391 @@ typedef enum #define DFLTCC_CMPR 2 #define DFLTCC_XPND 4 #define HBT_CIRCULAR (1 << 7) -//#define HB_BITS 15 -//#define HB_SIZE (1 << HB_BITS) +/* #define HB_BITS 15 */ +/* #define HB_SIZE (1 << HB_BITS) */ #define DFLTCC_FACILITY 151 #define DFLTCC_FMT0 0 #define CVT_CRC32 0 #define HTT_FIXED 0 #define HTT_DYNAMIC 1 +#ifndef DFLTCC_BLOCK_SIZE +# define DFLTCC_BLOCK_SIZE 1048576 +#endif +#ifndef DFLTCC_FIRST_FHT_BLOCK_SIZE +# define DFLTCC_FIRST_FHT_BLOCK_SIZE 4096 +#endif +#ifndef DFLTCC_LEVEL_MASK +# define DFLTCC_LEVEL_MASK 0x2 +#endif +#ifndef DFLTCC_RIBM +# define DFLTCC_RIBM 0 +#endif + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + struct dfltcc_qaf_param { - char fns[16]; - char reserved1[8]; - char fmts[2]; - char reserved2[6]; + char fns[16]; + char reserved1[8]; + char fmts[2]; + char reserved2[6]; +}; + +union aligned_dfltcc_qaf_param +{ + struct dfltcc_qaf_param af; + char alignas (8) aligned; }; struct dfltcc_param_v0 { - unsigned short pbvn; /* Parameter-Block-Version Number */ - unsigned char mvn; /* Model-Version Number */ - unsigned char ribm; /* Reserved for IBM use */ - unsigned reserved32 : 31; - unsigned cf : 1; /* Continuation Flag */ - unsigned char reserved64[8]; - unsigned nt : 1; /* New Task */ - unsigned reserved129 : 1; - unsigned cvt : 1; /* Check Value Type */ - unsigned reserved131 : 1; - unsigned htt : 1; /* Huffman-Table Type */ - unsigned bcf : 1; /* Block-Continuation Flag */ - unsigned bcc : 1; /* Block Closing Control */ - unsigned bhf : 1; /* Block Header Final */ - unsigned reserved136 : 1; - unsigned reserved137 : 1; - unsigned dhtgc : 1; /* DHT Generation Control */ - unsigned reserved139 : 5; - unsigned reserved144 : 5; - unsigned sbb : 3; /* Sub-Byte Boundary */ - unsigned char oesc; /* Operation-Ending-Supplemental Code */ - unsigned reserved160 : 12; - unsigned ifs : 4; /* Incomplete-Function Status */ - unsigned short ifl; /* Incomplete-Function Length */ - unsigned char reserved192[8]; - unsigned char reserved256[8]; - unsigned char reserved320[4]; - unsigned short hl; /* History Length */ - unsigned reserved368 : 1; - unsigned short ho : 15; /* History Offset */ - unsigned int cv; /* Check Value */ - unsigned eobs : 15; /* End-of-block Symbol */ - unsigned reserved431 : 1; - unsigned char eobl : 4; /* End-of-block Length */ - unsigned reserved436 : 12; - unsigned reserved448 : 4; - unsigned short cdhtl : 12; /* Compressed-Dynamic-Huffman Table - Length */ - unsigned char reserved464[6]; - unsigned char cdht[288]; - unsigned char reserved[32]; - unsigned char csb[1152]; + unsigned short pbvn; /* Parameter-Block-Version Number */ + unsigned char mvn; /* Model-Version Number */ + unsigned char ribm; /* Reserved for IBM use */ + unsigned reserved32 : 31; + unsigned cf : 1; /* Continuation Flag */ + unsigned char reserved64[8]; + unsigned nt : 1; /* New Task */ + unsigned reserved129 : 1; + unsigned cvt : 1; /* Check Value Type */ + unsigned reserved131 : 1; + unsigned htt : 1; /* Huffman-Table Type */ + unsigned bcf : 1; /* Block-Continuation Flag */ + unsigned bcc : 1; /* Block Closing Control */ + unsigned bhf : 1; /* Block Header Final */ + unsigned reserved136 : 1; + unsigned reserved137 : 1; + unsigned dhtgc : 1; /* DHT Generation Control */ + unsigned reserved139 : 5; + unsigned reserved144 : 5; + unsigned sbb : 3; /* Sub-Byte Boundary */ + unsigned char oesc; /* Operation-Ending-Supplemental Code */ + unsigned reserved160 : 12; + unsigned ifs : 4; /* Incomplete-Function Status */ + unsigned short ifl; /* Incomplete-Function Length */ + unsigned char reserved192[8]; + unsigned char reserved256[8]; + unsigned char reserved320[4]; + unsigned short hl; /* History Length */ + unsigned reserved368 : 1; + unsigned short ho : 15; /* History Offset */ + unsigned int cv; /* Check Value */ + unsigned eobs : 15; /* End-of-block Symbol */ + unsigned reserved431 : 1; + unsigned char eobl : 4; /* End-of-block Length */ + unsigned reserved436 : 12; + unsigned reserved448 : 4; + unsigned short cdhtl : 12; /* Compressed-Dynamic-Huffman Table + Length */ + unsigned char reserved464[6]; + unsigned char cdht[288]; + unsigned char reserved[32]; + unsigned char csb[1152]; }; -static int is_bit_set(const char *bits, int n) +union aligned_dfltcc_param_v0 { - return bits[n / 8] & (1 << (7 - (n % 8))); + struct dfltcc_param_v0 param; + char alignas (8) aligned; +}; + +static int +is_bit_set (const char *bits, int n) +{ + return bits[n / 8] & (1 << (7 - (n % 8))); } -static int is_dfltcc_enabled(void) +static int +is_dfltcc_enabled (void) { - const char *env; - char facilities[((DFLTCC_FACILITY / 64) + 1) * 8]; - register int r0 __asm__("r0"); + char facilities[(DFLTCC_FACILITY / 64 + 1) * 8]; - env = getenv("DFLTCC"); - if (env && !strcmp(env, "0")) { - return 0; - } + char const *env = getenv ("DFLTCC"); + if (env && !strcmp (env, "0")) + return 0; - r0 = sizeof(facilities) / 8; - __asm__("stfle %[facilities]\n" - : [facilities] "=Q"(facilities) : [r0] "r"(r0) : "cc", "memory"); - return is_bit_set((const char *) facilities, DFLTCC_FACILITY); + register int r0 __asm__ ("r0") = sizeof facilities / 8; + __asm__ ("stfle %[facilities]\n" + : [facilities] "=Q"(facilities) : [r0] "r"(r0) : "cc", "memory"); + return is_bit_set (facilities, DFLTCC_FACILITY); } -static dfltcc_cc dfltcc(int fn, void *param, - uch **op1, size_t *len1, - const uch **op2, size_t *len2, - void *hist) +static dfltcc_cc +dfltcc (int fn, void *param, + uch **op1, size_t *len1, + uch const **op2, size_t *len2, + void *hist) { - uch *t2 = op1 ? *op1 : NULL; - size_t t3 = len1 ? *len1 : 0; - const uch *t4 = op2 ? *op2 : NULL; - size_t t5 = len2 ? *len2 : 0; - register int r0 __asm__("r0") = fn; - register void *r1 __asm__("r1") = param; - register uch *r2 __asm__("r2") = t2; - register size_t r3 __asm__("r3") = t3; - register const uch *r4 __asm__("r4") = t4; - register size_t r5 __asm__("r5") = t5; - int cc; - - __asm__ volatile( + uch *t2 = op1 ? *op1 : NULL; + size_t t3 = len1 ? *len1 : 0; + const uch *t4 = op2 ? *op2 : NULL; + size_t t5 = len2 ? *len2 : 0; + register int r0 __asm__ ("r0") = fn; + register void *r1 __asm__ ("r1") = param; + register uch *r2 __asm__ ("r2") = t2; + register size_t r3 __asm__ ("r3") = t3; + register const uch *r4 __asm__ ("r4") = t4; + register size_t r5 __asm__ ("r5") = t5; + int cc; + + __asm__ volatile ( #ifdef DFLTCC_USDT - STAP_PROBE_ASM(zlib, dfltcc_entry, - STAP_PROBE_ASM_TEMPLATE(5)) + STAP_PROBE_ASM (zlib, dfltcc_entry, + STAP_PROBE_ASM_TEMPLATE (5)) #endif - ".insn rrf,0xb9390000,%[r2],%[r4],%[hist],0\n" + ".insn rrf,0xb9390000,%[r2],%[r4],%[hist],0\n" #ifdef DFLTCC_USDT - STAP_PROBE_ASM(zlib, dfltcc_exit, - STAP_PROBE_ASM_TEMPLATE(5)) + STAP_PROBE_ASM (zlib, dfltcc_exit, + STAP_PROBE_ASM_TEMPLATE (5)) #endif - "ipm %[cc]\n" - : [r2] "+r" (r2) - , [r3] "+r" (r3) - , [r4] "+r" (r4) - , [r5] "+r" (r5) - , [cc] "=r" (cc) - : [r0] "r" (r0) - , [r1] "r" (r1) - , [hist] "r" (hist) + "ipm %[cc]\n" + : [r2] "+r" (r2) + , [r3] "+r" (r3) + , [r4] "+r" (r4) + , [r5] "+r" (r5) + , [cc] "=r" (cc) + : [r0] "r" (r0) + , [r1] "r" (r1) + , [hist] "r" (hist) #ifdef DFLTCC_USDT - , STAP_PROBE_ASM_OPERANDS(5, r2, r3, r4, r5, hist) + , STAP_PROBE_ASM_OPERANDS (5, r2, r3, r4, r5, hist) #endif - : "cc", "memory"); - t2 = r2; t3 = r3; t4 = r4; t5 = r5; - - if (op1) - *op1 = t2; - if (len1) - *len1 = t3; - if (op2) - *op2 = t4; - if (len2) - *len2 = t5; - return (cc >> 28) & 3; + : "cc", "memory"); + t2 = r2; t3 = r3; t4 = r4; t5 = r5; + + if (op1) + *op1 = t2; + if (len1) + *len1 = t3; + if (op2) + *op2 = t4; + if (len2) + *len2 = t5; + return (cc >> 28) & 3; } -static void dfltcc_qaf(struct dfltcc_qaf_param *param) +static void +dfltcc_qaf (struct dfltcc_qaf_param *param) { - dfltcc(DFLTCC_QAF, param, NULL, NULL, NULL, NULL, NULL); + dfltcc (DFLTCC_QAF, param, NULL, NULL, NULL, NULL, NULL); } -static void dfltcc_gdht(struct dfltcc_param_v0 *param) +static void +dfltcc_gdht (struct dfltcc_param_v0 *param) { - const uch *next_in = inbuf + inptr; - size_t avail_in = insize - inptr; + const uch *next_in = inbuf + inptr; + size_t avail_in = insize - inptr; - dfltcc(DFLTCC_GDHT, param, NULL, NULL, &next_in, &avail_in, NULL); + dfltcc (DFLTCC_GDHT, param, NULL, NULL, &next_in, &avail_in, NULL); } static off_t total_in; -static dfltcc_cc dfltcc_cmpr_xpnd(struct dfltcc_param_v0 *param, int fn) +static dfltcc_cc +dfltcc_cmpr_xpnd (struct dfltcc_param_v0 *param, int fn) { - uch *next_out = outbuf + outcnt; - size_t avail_out = OUTBUFSIZ - outcnt; - const uch *next_in = inbuf + inptr; - size_t avail_in = insize - inptr; - off_t consumed_in; - dfltcc_cc cc; - - cc = dfltcc(fn | HBT_CIRCULAR, param, - &next_out, &avail_out, - &next_in, &avail_in, - window); - consumed_in = next_in - (inbuf + inptr); - inptr += consumed_in; - total_in += consumed_in; - outcnt += ((OUTBUFSIZ - outcnt) - avail_out); - return cc; + uch *next_out = outbuf + outcnt; + size_t avail_out = OUTBUFSIZ - outcnt; + const uch *next_in = inbuf + inptr; + size_t avail_in = insize - inptr; + dfltcc_cc cc = dfltcc (fn | HBT_CIRCULAR, param, + &next_out, &avail_out, + &next_in, &avail_in, + window); + off_t consumed_in = next_in - (inbuf + inptr); + inptr += consumed_in; + total_in += consumed_in; + outcnt += ((OUTBUFSIZ - outcnt) - avail_out); + return cc; } -__attribute__((aligned(8))) -static struct context +static struct dfltcc_param_v0 * +init_param (union aligned_dfltcc_param_v0 *ctx) { - union - { - struct dfltcc_qaf_param af; - struct dfltcc_param_v0 param; - }; -} ctx; - -static struct dfltcc_param_v0 *init_param(struct dfltcc_param_v0 *param) -{ - const char *s; - - memset(param, 0, sizeof(*param)); -#ifndef DFLTCC_RIBM -#define DFLTCC_RIBM 0 -#endif - s = getenv("DFLTCC_RIBM"); - param->ribm = (s && *s) ? strtoul(s, NULL, 0) : DFLTCC_RIBM; - param->nt = 1; - param->cvt = CVT_CRC32; - param->cv = __builtin_bswap32(getcrc()); - return param; + char const *s = getenv ("DFLTCC_RIBM"); + struct dfltcc_param_v0 *param = &ctx->param; + memset (param, 0, sizeof *param); + param->ribm = s && *s ? strtoul (s, NULL, 0) : DFLTCC_RIBM; + param->nt = 1; + param->cvt = CVT_CRC32; + param->cv = __builtin_bswap32 (getcrc ()); + return param; } -static void bi_close_block(struct dfltcc_param_v0 *param) +static void +bi_close_block (struct dfltcc_param_v0 *param) { - bi_valid = param->sbb; - bi_buf = bi_valid == 0 ? 0 : outbuf[outcnt] & ((1 << bi_valid) - 1); - send_bits( - bi_reverse(param->eobs >> (15 - param->eobl), param->eobl), - param->eobl); - param->bcf = 0; + bi_valid = param->sbb; + bi_buf = bi_valid == 0 ? 0 : outbuf[outcnt] & ((1 << bi_valid) - 1); + send_bits (bi_reverse (param->eobs >> (15 - param->eobl), param->eobl), + param->eobl); + param->bcf = 0; } -static void close_block(struct dfltcc_param_v0 *param) +static void +close_block (struct dfltcc_param_v0 *param) { - bi_close_block(param); - bi_windup(); - param->sbb = (param->sbb + param->eobl) % 8; - if (param->sbb != 0) { - Assert(outcnt > 0, "outbuf must have enough space for EOBS"); - outcnt--; + bi_close_block (param); + bi_windup (); + param->sbb = (param->sbb + param->eobl) % 8; + if (param->sbb != 0) + { + Assert (outcnt > 0, "outbuf must have enough space for EOBS"); + outcnt--; } } -static void close_stream(struct dfltcc_param_v0 *param) +static void +close_stream (struct dfltcc_param_v0 *param) { - if (param->bcf) { - bi_close_block(param); - } - send_bits(1, 3); /* BFINAL=1, BTYPE=00 */ - bi_windup(); - put_short(0x0000); - put_short(0xFFFF); + if (param->bcf) + bi_close_block (param); + send_bits (1, 3); /* BFINAL=1, BTYPE=00 */ + bi_windup (); + put_short (0x0000); + put_short (0xFFFF); } -#define MAX(a, b) ((a) > (b) ? (a) : (b)) +/* Compress ifd into ofd in hardware or fall back to software. */ -/* =========================================================================== - * Compress ifd into ofd in hardware or fall back to software. - */ -int dfltcc_deflate(int pack_level) +int +dfltcc_deflate (int pack_level) { - const char *s; - unsigned long level_mask; - unsigned long block_size; - off_t block_threshold; - struct dfltcc_param_v0 *param; - int extra; - - /* Check whether we can use hardware compression */ - if (!is_dfltcc_enabled() || getenv("SOURCE_DATE_EPOCH")) { - return deflate(pack_level); - } -#ifndef DFLTCC_LEVEL_MASK -#define DFLTCC_LEVEL_MASK 0x2 -#endif - s = getenv("DFLTCC_LEVEL_MASK"); - level_mask = (s && *s) ? strtoul(s, NULL, 0) : DFLTCC_LEVEL_MASK; - if ((level_mask & (1 << pack_level)) == 0) { - return deflate(pack_level); - } - dfltcc_qaf(&ctx.af); - if (!is_bit_set(ctx.af.fns, DFLTCC_CMPR) || - !is_bit_set(ctx.af.fns, DFLTCC_GDHT) || - !is_bit_set(ctx.af.fmts, DFLTCC_FMT0)) { - return deflate(pack_level); - } - - /* Initialize tuning parameters */ -#ifndef DFLTCC_BLOCK_SIZE -#define DFLTCC_BLOCK_SIZE 1048576 -#endif - s = getenv("DFLTCC_BLOCK_SIZE"); - block_size = (s && *s) ? strtoul(s, NULL, 0) : DFLTCC_BLOCK_SIZE; - (void)block_size; -#ifndef DFLTCC_FIRST_FHT_BLOCK_SIZE -#define DFLTCC_FIRST_FHT_BLOCK_SIZE 4096 -#endif - s = getenv("DFLTCC_FIRST_FHT_BLOCK_SIZE"); - block_threshold = (s && *s) ? strtoul(s, NULL, 0) : - DFLTCC_FIRST_FHT_BLOCK_SIZE; - - /* Compress ifd into ofd in a loop */ - param = init_param(&ctx.param); - while (1) { - /* Flush the output data */ - if (outcnt > OUTBUFSIZ - 8) { - flush_outbuf(); - } - - /* Close the block */ - if (param->bcf && total_in == block_threshold && !param->cf) { - close_block(param); - block_threshold += block_size; + /* Check whether we can use hardware compression. */ + if (!is_dfltcc_enabled () || getenv ("SOURCE_DATE_EPOCH")) + return deflate (pack_level); + char const *s = getenv ("DFLTCC_LEVEL_MASK"); + unsigned long level_mask + = s && *s ? strtoul (s, NULL, 0) : DFLTCC_LEVEL_MASK; + if ((level_mask & (1 << pack_level)) == 0) + return deflate (pack_level); + union aligned_dfltcc_qaf_param ctx; + dfltcc_qaf (&ctx.af); + if (!is_bit_set (ctx.af.fns, DFLTCC_CMPR) + || !is_bit_set (ctx.af.fns, DFLTCC_GDHT) + || !is_bit_set (ctx.af.fmts, DFLTCC_FMT0)) + return deflate (pack_level); + + /* Initialize tuning parameters. */ + s = getenv ("DFLTCC_BLOCK_SIZE"); + unsigned long block_size + = s && *s ? strtoul (s, NULL, 0) : DFLTCC_BLOCK_SIZE; + + s = getenv ("DFLTCC_FIRST_FHT_BLOCK_SIZE"); + off_t block_threshold + = s && *s ? strtoul (s, NULL, 0) : DFLTCC_FIRST_FHT_BLOCK_SIZE; + + union aligned_dfltcc_param_v0 ctx_v0; + struct dfltcc_param_v0 *param = init_param (&ctx_v0); + + /* Compress ifd into ofd in a loop. */ + while (true) + { + /* Flush the output data. */ + if (outcnt > OUTBUFSIZ - 8) + flush_outbuf (); + + /* Close the block. */ + if (param->bcf && total_in == block_threshold && !param->cf) + { + close_block (param); + block_threshold += block_size; } - /* Read the input data */ - if (inptr == insize) { - if (fill_inbuf(1) == EOF && !param->cf) { - break; - } - inptr = 0; + /* Read the input data. */ + if (inptr == insize) + { + if (fill_inbuf (1) == EOF && !param->cf) + break; + inptr = 0; } - /* Temporarily mask some input data */ - extra = MAX(0, total_in + (insize - inptr) - block_threshold); - insize -= extra; - - /* Start a new block */ - if (!param->bcf) { - if (total_in == 0 && block_threshold > 0) { - param->htt = HTT_FIXED; - } else { - param->htt = HTT_DYNAMIC; - dfltcc_gdht(param); - } + /* Temporarily mask some input data. */ + int extra = MAX (0, total_in + (insize - inptr) - block_threshold); + insize -= extra; + + /* Start a new block. */ + if (!param->bcf) + { + if (total_in == 0 && block_threshold > 0) + param->htt = HTT_FIXED; + else { + param->htt = HTT_DYNAMIC; + dfltcc_gdht (param); + } } - /* Compress inbuf into outbuf */ - dfltcc_cmpr_xpnd(param, DFLTCC_CMPR); + /* Compress inbuf into outbuf. */ + dfltcc_cmpr_xpnd (param, DFLTCC_CMPR); - /* Unmask the input data */ - insize += extra; + /* Unmask the input data. */ + insize += extra; - /* Continue the block */ - param->bcf = 1; + /* Continue the block */ + param->bcf = 1; } - close_stream(param); - setcrc(__builtin_bswap32(param->cv)); - return 0; + + close_stream (param); + setcrc (__builtin_bswap32 (param->cv)); + return 0; } -/* =========================================================================== - * Decompress ifd into ofd in hardware or fall back to software. - */ -int dfltcc_inflate(void) +/* Decompress ifd into ofd in hardware or fall back to software. */ +int +dfltcc_inflate (void) { - struct dfltcc_param_v0 *param; - dfltcc_cc cc; - - /* Check whether we can use hardware decompression */ - if (!is_dfltcc_enabled()) { - return inflate(); - } - dfltcc_qaf(&ctx.af); - if (!is_bit_set(ctx.af.fns, DFLTCC_XPND)) { - return inflate(); - } - - /* Decompress ifd into ofd in a loop */ - param = init_param(&ctx.param); - while (1) { - /* Perform I/O */ - if (outcnt == OUTBUFSIZ) { - flush_outbuf(); - } - if (inptr == insize) { - if (fill_inbuf(1) == EOF) { - /* Premature EOF */ - return 2; + /* Check whether we can use hardware decompression. */ + if (!is_dfltcc_enabled ()) + return inflate (); + union aligned_dfltcc_qaf_param ctx; + dfltcc_qaf (&ctx.af); + if (!is_bit_set (ctx.af.fns, DFLTCC_XPND)) + return inflate (); + + union aligned_dfltcc_param_v0 ctx_v0; + struct dfltcc_param_v0 *param = init_param (&ctx_v0); + + /* Decompress ifd into ofd in a loop. */ + while (true) + { + /* Perform I/O. */ + if (outcnt == OUTBUFSIZ) + flush_outbuf (); + if (inptr == insize) + { + if (fill_inbuf (1) == EOF) + { + /* Premature EOF. */ + return 2; } - inptr = 0; + inptr = 0; } - /* Decompress inbuf into outbuf */ - cc = dfltcc_cmpr_xpnd(param, DFLTCC_XPND); - if (cc == DFLTCC_CC_OK) { - /* The entire deflate stream has been successfully decompressed */ + + /* Decompress inbuf into outbuf. */ + dfltcc_cc cc = dfltcc_cmpr_xpnd (param, DFLTCC_XPND); + if (cc == DFLTCC_CC_OK) + { + /* The entire deflate stream has been successfully decompressed. */ break; - } - if (cc == DFLTCC_CC_OP2_CORRUPT && param->oesc != 0) { - /* The deflate stream is corrupted */ + } + if (cc == DFLTCC_CC_OP2_CORRUPT && param->oesc != 0) + { + /* The deflate stream is corrupted. */ return 2; - } - /* There must be more data to decompress */ + } + /* There must be more data to decompress. */ } - if (param->sbb != 0) { - /* The deflate stream has ended in the middle of a byte - go to the next - * byte boundary, so that unzip() can read CRC and length. - */ - inptr++; + + if (param->sbb != 0) + { + /* The deflate stream has ended in the middle of a byte. Go to + the next byte boundary, so that unzip can read CRC and length. */ + inptr++; } - setcrc(__builtin_bswap32(param->cv)); /* set CRC value for unzip() */ - flush_outbuf(); /* update bytes_out for unzip() */ - return 0; + + /* Set CRC value and update bytes_out for unzip. */ + setcrc (__builtin_bswap32 (param->cv)); + flush_outbuf (); + return 0; } diff --git a/gzip.c b/gzip.c index 4fffc4f..f29edaf 100644 --- a/gzip.c +++ b/gzip.c @@ -58,6 +58,7 @@ static char const *const license_msg[] = { #include #include #include +#include #include #include #include @@ -128,22 +129,20 @@ static char const *const license_msg[] = { /* global buffers */ -#ifdef IBM_Z_DFLTCC -/* DEFLATE COMPRESSION CALL works faster with page-aligned input buffers */ -__attribute__((aligned(4096))) -#endif -DECLARE(uch, inbuf, INBUFSIZ +INBUF_EXTRA); -#ifdef IBM_Z_DFLTCC -/* DEFLATE COMPRESSION CALL works faster with page-aligned output buffers */ -__attribute__((aligned(4096))) +/* With IBM_Z_DFLTCC, DEFLATE COMPRESSION works faster with + page-aligned input and output buffers, and requires page-aligned + windows; the alignment requirement is 4096. On other platforms + alignment doesn't hurt, and alignment up to 8192 is portable so + let's do that. */ +#ifdef __alignas_is_defined +# define BUFFER_ALIGNED alignas (8192) +#else +# define BUFFER_ALIGNED /**/ #endif -DECLARE(uch, outbuf, OUTBUFSIZ+OUTBUF_EXTRA); +DECLARE(uch BUFFER_ALIGNED, inbuf, INBUFSIZ +INBUF_EXTRA); +DECLARE(uch BUFFER_ALIGNED, outbuf, OUTBUFSIZ+OUTBUF_EXTRA); DECLARE(ush, d_buf, DIST_BUFSIZE); -#ifdef IBM_Z_DFLTCC -/* DEFLATE COMPRESSION CALL works only with page-aligned windows */ -__attribute__((aligned(4096))) -#endif -DECLARE(uch, window, 2L*WSIZE); +DECLARE(uch BUFFER_ALIGNED, window, 2L*WSIZE); #ifndef MAXSEG_64K DECLARE(ush, tab_prefix, 1L<