[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [bug-gnulib] speed up MODULES.html.sh a bit
From: |
Ralf Wildenhues |
Subject: |
Re: [bug-gnulib] speed up MODULES.html.sh a bit |
Date: |
Tue, 14 Nov 2006 23:05:49 +0100 |
User-agent: |
Mutt/1.5.13 (2006-08-11) |
* Bruno Haible wrote on Tue, Nov 14, 2006 at 09:30:54PM CET:
>
> You also combined adjacent sed invocations. Which rule are you using here?
> When is it safe to combine
> sed -e "$expr1" | sed -e "$expr2"
> into
> sed -e "$expr1" -e "$expr2"
> ?
Erm, for example when all of the following hold: both scripts do not
contain jumps, operate strictly line-based (i.e., they don't use n or N
to read the next line), and don't interfere wrt. hold space actions, the
other doesn't modify the pattern space at the time the one uses the r
command, and the second doesn't use address ranges when the first may
change the set or number of lines in its output. Just as a guideline,
and in case of doubt I try to play safe. :-)
> Also, can you please put spaces around < and > when they are used for
> redirection, please? (For legibility.)
I did that, but IMHO it's a bit inconsistent:
$ echo 2> &1
| bash: syntax error near unexpected token `&'
> > As shown, the script will now use temporary files in the current
> > directory. Should I rather rewrite to use func_tmpdir and a trap
> > for cleanup, similar to what happens in gnulib-tool?
>
> Yes, please. func_tmpdir is reliable. There's no point in having shell
> scripts leave files around any more.
Added, as below. And func_exit, consequently.
> (Soon we'll need an 'aclocal' or 'gnulib-tool' for shell functions
> :-)).
I don't see any compelling reason not to simply go with aclocal yet.
There's a natural path for Autoconf to profit as well that way.
> > Otherwise, OK to apply?
>
> If it generates the same MODULES.html than the previous script, yes.
Well, sure, I did test that. I applied as below.
Cheers,
Ralf
2006-11-14 Ralf Wildenhues <address@hidden>
* MODULES.html.sh (seen_modules, seen_files): New variables.
(func_module): Rewrite to use a few less gnulib-tool and sed
invocations. Avoid a couple of quadratic algorithms for ...
(missed_modules, missed_files): ... these, with ...
(func_append, func_tmpdir): ... these new functions, from
gnulib-tool. Analogously, install traps for cleanup.
Index: MODULES.html.sh
===================================================================
RCS file: /cvsroot/gnulib/gnulib/MODULES.html.sh,v
retrieving revision 1.159
diff -u -r1.159 MODULES.html.sh
--- MODULES.html.sh 6 Nov 2006 21:58:29 -0000 1.159
+++ MODULES.html.sh 14 Nov 2006 21:51:36 -0000
@@ -1243,8 +1243,62 @@
' | sed -e "$sed_alt1" | tr -d "$trnl" | sed -e "$sed_alt2" -e "$sed_alt3"`
indent=""
-missed_modules=`gnulib-tool --list`
-missed_files=`ls -d lib/* m4/* | sed -e /CVS/d -e /README/d -e /ChangeLog/d -e
/Makefile/d -e /TODO/d -e '/~$/d'`
+seen_modules=
+seen_files=
+
+# func_exit STATUS
+# exit with status
+func_exit ()
+{
+ (exit $1); exit $1
+}
+
+# func_tmpdir
+# creates a temporary directory.
+# Sets variable
+# - tmp pathname of freshly created temporary directory
+func_tmpdir ()
+{
+ # Use the environment variable TMPDIR, falling back to /tmp. This allows
+ # users to specify a different temporary directory, for example, if their
+ # /tmp is filled up or too small.
+ : ${TMPDIR=/tmp}
+ {
+ # Use the mktemp program if available. If not available, hide the error
+ # message.
+ tmp=`(umask 077 && mktemp -d "$TMPDIR/MDXXXXXX") 2>/dev/null` &&
+ test -n "$tmp" && test -d "$tmp"
+ } ||
+ {
+ # Use a simple mkdir command. It is guaranteed to fail if the directory
+ # already exists. $RANDOM is bash specific and expands to empty in shells
+ # other than bash, ksh and zsh. Its use does not increase security;
+ # rather, it minimizes the probability of failure in a very cluttered /tmp
+ # directory.
+ tmp=$TMPDIR/MD$$-$RANDOM
+ (umask 077 && mkdir "$tmp")
+ } ||
+ {
+ echo "$0: cannot create a temporary directory in $TMPDIR" >&2
+ func_exit 1
+ }
+}
+
+# func_append var value
+# appends the given value to the shell variable var.
+if ( foo=bar; foo+=baz && test "$foo" = barbaz ) >/dev/null 2>&1; then
+ # Use bash's += operator. It reduces complexity of appending repeatedly to
+ # a single variable from O(n^2) to O(n).
+ func_append ()
+ {
+ eval "$1+=\"\$2\""
+ }
+else
+ func_append ()
+ {
+ eval "$1=\"\$$1\$2\""
+ }
+fi
# func_echo line
# outputs line with indentation.
@@ -1339,9 +1393,9 @@
element=`gnulib-tool --extract-description $1 \
| sed -e "$sed_lt" -e "$sed_gt" -e
"$sed_remove_trailing_empty_line" \
- | sed -e 's,^, ,' \
- | sed -e 's,\([^a-zA-Z]\)'"${posix_functions}"'(),\1<A
HREF="'"$POSIX2001_URL"'xsh/\2.html">\2</A>(),g' \
- | sed -e 's,^ ,,'`
+ -e 's,^, ,' \
+ -e 's,\([^a-zA-Z]\)'"${posix_functions}"'(),\1<A
HREF="'"$POSIX2001_URL"'xsh/\2.html">\2</A>(),g' \
+ -e 's,^ ,,'`
func_echo "<TD ALIGN=LEFT VALIGN=TOP WIDTH=\"80%\">$element"
func_end TR
@@ -1353,34 +1407,36 @@
element='<A NAME="module='$1'"></A><A HREF="modules/'$1'">'$1'</A>'
func_echo "<TD ALIGN=LEFT VALIGN=TOP>$element"
- element=`gnulib-tool --extract-include-directive $1 \
+ includes=`gnulib-tool --extract-include-directive $1`
+ files=`gnulib-tool --extract-filelist $1`
+ element=`echo "$includes" \
| sed -e "$sed_lt" -e "$sed_gt" -e
"$sed_remove_trailing_empty_line" \
- | sed -e 's,^#include "\(.*\)"$,#include "<A
HREF="lib/\1">\1</A>",' \
- -e 's,^#include <'"${posix_headers}"'\.h>$,#include
\<<A HREF="'"$POSIX2001_URL"'xbd/\1.h.html">\1.h</A>\>,' \
- | sed -e 's/$/<BR>/' | tr -d "$trnl" | sed -e 's/<BR>$//'`
+ -e 's,^#include "\(.*\)"$,#include "<A
HREF="lib/\1">\1</A>",' \
+ -e 's,^#include <'"${posix_headers}"'\.h>$,#include
\<<A HREF="'"$POSIX2001_URL"'xbd/\1.h.html">\1.h</A>\>,' \
+ -e 's/$/<BR>/' | tr -d "$trnl" | sed -e 's/<BR>$//'`
test -n "$element" || element='---'
func_echo "<TD ALIGN=LEFT VALIGN=TOP>$element"
sed_choose_unconditional_nonstandard_include='s,^#include "\(.*\)"$,\1,p'
- includefile=`gnulib-tool --extract-include-directive $1 \
+ includefile=`echo "$includes" \
| sed -n -e "$sed_choose_unconditional_nonstandard_include" \
| sed -e "$sed_escape_dot" | tr -d "$trnl"`
sed_choose_lib_files='s,^lib/\(.*\)$,\1,p'
- element=`gnulib-tool --extract-filelist $1 \
+ element=`echo "$files" \
| sed -e '/^$/d' \
| sed -n -e "$sed_choose_lib_files" \
| sed -e '/^'"${includefile}"'$/d' \
- | sed -e 's,^\(.*\)$,<A HREF="lib/\1">\1</A>,' \
- | sed -e 's/$/<BR>/' | tr -d "$trnl" | sed -e 's/<BR>$//'`
+ -e 's,^\(.*\)$,<A HREF="lib/\1">\1</A>,' \
+ -e 's/$/<BR>/' | tr -d "$trnl" | sed -e 's/<BR>$//'`
test -n "$element" || element='---'
func_echo "<TD ALIGN=LEFT VALIGN=TOP>$element"
sed_choose_m4_files='s,^m4/\(.*\)$,\1,p'
- element=`(gnulib-tool --extract-filelist $1 \
+ element=`(echo "$files" \
| sed -e "$sed_remove_trailing_empty_line" \
| sed -n -e "$sed_choose_m4_files" \
| sed -e '/^onceonly/d' \
- | sed -e 's,^\(.*\)$,<A HREF="m4/\1">\1</A>,'; \
+ -e 's,^\(.*\)$,<A HREF="m4/\1">\1</A>,'; \
gnulib-tool --extract-autoconf-snippet $1 \
| sed -e "$sed_remove_trailing_empty_line") \
| sed -e 's/$/<BR>/' | tr -d "$trnl" | sed -e 's/<BR>$//'`
@@ -1389,26 +1445,14 @@
element=`gnulib-tool --extract-dependencies $1 \
| sed -e "$sed_remove_trailing_empty_line" \
- | sed -e 's/$/<BR>/' | tr -d "$trnl" | sed -e 's/<BR>$//'`
+ -e 's/$/<BR>/' | tr -d "$trnl" | sed -e 's/<BR>$//'`
test -n "$element" || element='---'
func_echo "<TD ALIGN=LEFT VALIGN=TOP>$element"
func_end TR
- missed_modules=`echo "$missed_modules" | sed -e '/^'"$1"'$/d'`
-
- files=`gnulib-tool --extract-filelist $1`
- sed_removal_prefix='s,^,/^,'
- sed_removal_suffix='s,$,\$/d,'
- sed_remove_files=`echo '{'; \
- for file in $files; do \
- echo $file \
- | sed -e "$sed_escape_dot" -e "$sed_escape_slash" \
- | sed -e "$sed_removal_prefix" -e
"$sed_removal_suffix"; \
- done; \
- echo '}'`
- missed_files=`echo "$missed_files" | sed -e "$sed_remove_files"`
-
+ func_append seen_modules " $1"
+ func_append seen_files " $files"
fi
}
@@ -2193,7 +2237,19 @@
func_module uptime
func_end_table
}
+
+func_tmpdir
+trap 'exit_status=$?
+ if test "$signal" != 0; then
+ echo "caught signal $signal" >&2
+ fi
+ rm -rf "$tmp"
+ exit $exit_status' 0
+for signal in 1 2 3 13 15; do
+ trap '{ signal='$signal'; func_exit 1; }' $signal
+done
+signal=0
echo '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN">'
func_begin HTML
@@ -2224,6 +2280,12 @@
in_toc=
func_all_modules
+
+gnulib-tool --list > "$tmp/all-modules"
+missed_modules=`for module in $seen_modules; do echo $module; done \
+ | LC_ALL=C sort -u \
+ | LC_ALL=C join -v 2 - "$tmp/all-modules"`
+
if test -n "$missed_modules"; then
element="Unclassified modules - please update MODULES.html.sh"
@@ -2239,6 +2301,11 @@
fi
+LC_ALL=C ls -d lib/* m4/* | sed -e /CVS/d -e /README/d -e /ChangeLog/d -e
/Makefile/d -e /TODO/d -e '/tags$/d' -e '/TAGS$/d' -e '/~$/d' > "$tmp/all-files"
+missed_files=`for file in $seen_files; do echo $file; done \
+ | LC_ALL=C sort -u \
+ | LC_ALL=C join -v 2 - "$tmp/all-files"`
+
if test -n "$missed_files"; then
element="Lone files - please create new modules containing them"
@@ -2274,3 +2341,10 @@
func_end BODY
func_end HTML
+
+rm -rf "$tmp"
+# Undo the effect of the previous 'trap' command.
+trap '' 0
+trap 'func_exit $?' 1 2 3 13 15
+
+exit 0