[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
speed up getndelim2
From: |
Eric Blake |
Subject: |
speed up getndelim2 |
Date: |
Tue, 29 Apr 2008 22:45:59 -0600 |
User-agent: |
Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.12) Gecko/20080213 Thunderbird/2.0.0.12 Mnenhy/0.7.5.666 |
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
Wow! Check out the speedup with this patch, comparing an -O2 /bin/cut
pre-patch against an unoptimized -g cut post-patch, and that's even with
running /bin/cut second so it benefits from any file system caching effects.
$ dd count=20k </dev/random > data
20480+0 records in
20480+0 records out
10485760 bytes (10 MB) copied, 2.83 s, 3.7 MB/s
$ time (~/coreutils/src/cut -f 2-3 < data > data2)
real 0m4.168s
user 0m3.952s
sys 0m0.109s
$ time (/bin/cut -f 2-3 < data > data1)
real 0m6.194s
user 0m6.109s
sys 0m0.108s
$ cmp data1 data2
$
And all I did was make getndelim2 smarter to process data in blocks
instead of bytes, by exploiting freadptr and memchr2, and cut is roughly
40% faster.
- --
Don't work too hard, make some time for fun as well!
Eric Blake address@hidden
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.9 (Cygwin)
Comment: Public key at home.comcast.net/~ericblake/eblake.gpg
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org
iEYEARECAAYFAkgX+YcACgkQ84KuGfSFAYDM6QCfaPNXI3S29gizPBmMc91mX+05
XMYAn0iWeiBoIVOgkn/8ClJMHJD4bcHR
=fmzx
-----END PGP SIGNATURE-----
>From 032b1ce80ba621afc4f241570e0ae6716a59ce60 Mon Sep 17 00:00:00 2001
From: Eric Blake <address@hidden>
Date: Tue, 29 Apr 2008 11:59:35 -0600
Subject: [PATCH] Optimize getndelim2 to use block operations when possible.
* modules/getndelim2 (Depends-on): Add stdbool, freadptr,
freadseek, and memchr2.
* lib/getndelim2.c (getndelim2): Use them for block reads.
Signed-off-by: Eric Blake <address@hidden>
---
ChangeLog | 7 ++++
lib/getndelim2.c | 79 +++++++++++++++++++++++++++++++++++++++------------
modules/getndelim2 | 4 ++
3 files changed, 71 insertions(+), 19 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index ab0e292..5cf1dd3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2008-04-29 Eric Blake <address@hidden>
+
+ Optimize getndelim2 to use block operations when possible.
+ * modules/getndelim2 (Depends-on): Add stdbool, freadptr,
+ freadseek, and memchr2.
+ * lib/getndelim2.c (getndelim2): Use them for block reads.
+
2008-04-29 Bruno Haible <address@hidden>
* m4/inet_ntop.m4 (gl_INET_NTOP): Require gl_USE_SYSTEM_EXTENSIONS.
diff --git a/lib/getndelim2.c b/lib/getndelim2.c
index 8930a5b..be34e40 100644
--- a/lib/getndelim2.c
+++ b/lib/getndelim2.c
@@ -23,8 +23,10 @@
#include "getndelim2.h"
-#include <stdlib.h>
+#include <stdbool.h>
#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
#if USE_UNLOCKED_IO
# include "unlocked-io.h"
@@ -41,6 +43,10 @@
#include <limits.h>
#include <stdint.h>
+#include "freadptr.h"
+#include "freadseek.h"
+#include "memchr2.h"
+
#ifndef SSIZE_MAX
# define SSIZE_MAX ((ssize_t) (SIZE_MAX / 2))
#endif
@@ -63,6 +69,7 @@ getndelim2 (char **lineptr, size_t *linesize, size_t offset,
size_t nmax,
ssize_t bytes_stored = -1;
char *ptr = *lineptr;
size_t size = *linesize;
+ bool done = false;
if (!ptr)
{
@@ -81,23 +88,62 @@ getndelim2 (char **lineptr, size_t *linesize, size_t
offset, size_t nmax,
if (nbytes_avail == 0 && nmax <= size)
goto done;
+ /* Normalize delimiters, since memchr2 doesn't handle EOF. */
+ if (delim1 == EOF)
+ delim1 = delim2;
+ else if (delim2 == EOF)
+ delim2 = delim1;
+
flockfile (stream);
- for (;;)
+ while (!done)
{
/* Here always ptr + size == read_pos + nbytes_avail. */
int c;
+ const char *buffer;
+ size_t buffer_len;
+
+ buffer = freadptr (stream, &buffer_len);
+ if (buffer)
+ {
+ if (delim1 != EOF)
+ {
+ const char *end = memchr2 (buffer, delim1, delim2, buffer_len);
+ if (end)
+ {
+ buffer_len = end - buffer + 1;
+ done = true;
+ }
+ }
+ }
+ else
+ {
+ c = getc (stream);
+ if (c == EOF)
+ {
+ /* Return partial line, if any. */
+ if (read_pos == ptr)
+ goto unlock_done;
+ else
+ break;
+ }
+ if (c == delim1 || c == delim2)
+ done = true;
+ buffer_len = 1;
+ }
/* We always want at least one byte left in the buffer, since we
always (unless we get an error while reading the first byte)
NUL-terminate the line buffer. */
- if (nbytes_avail < 2 && size < nmax)
+ if (nbytes_avail < 1 + buffer_len && size < nmax)
{
size_t newsize = size < MIN_CHUNK ? size + MIN_CHUNK : 2 * size;
char *newptr;
+ if (newsize < buffer_len)
+ newsize = buffer_len + size;
if (! (size < newsize && newsize <= nmax))
newsize = nmax;
@@ -118,25 +164,20 @@ getndelim2 (char **lineptr, size_t *linesize, size_t
offset, size_t nmax,
read_pos = size - nbytes_avail + ptr;
}
- c = getc (stream);
- if (c == EOF)
+ if (1 < nbytes_avail)
{
- /* Return partial line, if any. */
- if (read_pos == ptr)
- goto unlock_done;
+ size_t copy_len = nbytes_avail - 1;
+ if (buffer_len < copy_len)
+ copy_len = buffer_len;
+ if (buffer)
+ memcpy (read_pos, buffer, copy_len);
else
- break;
+ *read_pos = c;
+ read_pos += copy_len;
+ nbytes_avail -= copy_len;
}
-
- if (nbytes_avail >= 2)
- {
- *read_pos++ = c;
- nbytes_avail--;
- }
-
- if (c == delim1 || c == delim2)
- /* Return the line. */
- break;
+ if (buffer && freadseek (stream, buffer_len))
+ goto unlock_done;
}
/* Done - NUL terminate and return the number of bytes read.
diff --git a/modules/getndelim2 b/modules/getndelim2
index 972ee1a..51a63bf 100644
--- a/modules/getndelim2
+++ b/modules/getndelim2
@@ -9,7 +9,11 @@ m4/getndelim2.m4
Depends-on:
ssize_t
+stdbool
stdint
+freadptr
+freadseek
+memchr2
configure.ac:
gl_GETNDELIM2
--
1.5.5.1
- speed up getndelim2,
Eric Blake <=