The only place where swapcontext could realistically be interrupted is
inside the sched function. A remote possibility, but a possibility
nonetheless. So to rule this out, I went ahead and blocked SIGALRM there
(and inside the handler as well, for good measure, though it should be
blocked by default), and even in main. The code (see below) still fails
as described in the original email.
Note that the section in the standard you pointed to defines *all*
functions described in the standard (including unsafe ones like
swapcontext) to be safely callable inside a signal handler provided the
signal it is handling did not interrupt an unsafe function.
The only place that can be interrupted by a SIGALRM in the code below is
the eh function, which makes no function calls at all. Hence, calling
swapcontext from within the signal handler is perfectly valid.
The reason why libunwind is implicated is the following:
1. When linking the program with the standalone libunwind 0.98-5, 0.98.6
(latest stable I believe) or the latest git-snapshot, it fails quickly
and reliably. If linked against the gcc-supplied libunwind, I can run it
overnight without failure.
2. The SIGILL, when it happens, occurs inside libunwind, more precisely
in _ULia64_install_cursor (). Apparently, the unwind code does not like
to be interrupted and context-switched.
I managed to insert a conditional break point just before it crashes. My
suspicion is that in the last bundle before it returns and installs the
new context, pfs is loaded with a nonsensical value (here, a stack
address). Now, whether this is the only problem or just the last symptom
in a chain of mishaps, I cannot say, but I'm attaching a gdb log to help
you with your debugging efforts (libunwind-0.98-5):
Roy
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
(gdb) bt
#0 _ULia64_install_cursor () at ../../src/ia64/Ginstall_cursor.S:292
#1 0x4000000000001340 in eh () at eh3.cc:15
(gdb) x /3i $pc
0x20000000003297e0 <_ULia64_install_cursor+832>: [MIB] mov.m
ar.unat=r25
0x20000000003297e1 <_ULia64_install_cursor+833>: mov.i
ar.pfs=r14
0x20000000003297e2 <_ULia64_install_cursor+834>: br.ret.sptk.many b0;;
(gdb) p /x $r14
$45 = 0x600000000001b498
(gdb) info reg
r0 0x0 0
r1 0x6000000000001b58 6917529027641088856
r2 0x600000000001f5c0 6917529027641210304
r3 0x600000000001f4f8 6917529027641210104
r4 0x0 0
r5 0x0 0
r6 0x0 0
r7 0x0 0
r8 0x0 0
r9 0x600000000001f4f0 6917529027641210096
r10 0xf 15
r11 0xc 12
r12 0x60000000000210a0 6917529027641217184
r13 0x20000000002f2260 2305843009216782944
r14 0x600000000001b498 6917529027641193624
r15 0x6000000000024060 6917529027641229408
r16 0x1 1
r17 0x0 0
r18 0x0 0
r19 0x0 0
r20 0x0 0
r21 0x60000000000207d0 6917529027641214928
r22 0x60000000000207e0 6917529027641214944
r23 0x60000000000207f0 6917529027641214960
r24 0x6000000000020800 6917529027641214976
r25 0x0 0
r26 0x0 0
r27 0x9804c8a70033f 2674341018862399
r28 0x0 0
r29 0x60000000000210a0 6917529027641217184
r30 0x60000000000126a0 6917529027641157280
r31 0x5ac141 5947713
b0 0x4000000000001340 4611686018427392832
b1 0x40000000000012c0 4611686018427392704
b2 0x0 0
b3 0x0 0
b4 0x0 0
b5 0x0 0
b6 0x20000000003cd640 2305843009217680960
b7 0xa00000010000e040 -6917529023346057152
vfp 0x0 0
vrap 0x0 0
pr 0x5ac141 5947713
ip 0x20000000003297e0 2305843009217009632
psr 0x10 16
cfm 0x387 903
kr0 0x0 0
kr1 0x0 0
kr2 0x0 0
kr3 0x0 0
kr4 0x0 0
kr5 0x0 0
kr6 0x0 0
kr7 0x0 0
rsc 0xf 15
bsp 0x60000000000126d8 6917529027641157336
bspstore 0x60000000000126a0 6917529027641157280
rnat 0x0 0
fcr 0x0 0
eflag 0x0 0
csd 0x0 0
ssd 0x0 0
cflg 0x0 0
fsr 0x0 0
fir 0x0 0
fdr 0x0 0
ccv 0x0 0
unat 0x0 0
fpsr 0x9804c8a70033f 2674341018862399
itc 0x0 0
pfs 0xc00000000000038e -4611686018427386994
lc 0x0 0
ec 0x0 0
nat0 0x0 0
nat1 0x0 0
nat2 0x0 0
nat3 0x0 0
nat4 0x0 0
nat5 0x0 0
nat6 0x0 0
nat7 0x0 0
nat8 0x0 0
nat9 0x0 0
nat10 0x0 0
nat11 0x0 0
nat12 0x0 0
nat13 0x0 0
nat14 0x0 0
nat15 0x0 0
nat16 0x0 0
nat17 0x0 0
nat18 0x0 0
nat19 0x0 0
nat20 0x0 0
nat21 0x0 0
nat22 0x0 0
nat23 0x0 0
nat24 0x0 0
nat25 0x0 0
nat26 0x0 0
nat27 0x0 0
nat28 0x0 0
nat29 0x0 0
nat30 0x0 0
nat31 0x0 0
nat32 0x0 0
nat33 0x0 0
nat34 0x0 0
nat35 0x0 0
nat36 0x0 0
nat37 0x0 0
nat38 0x0 0
nat39 0x0 0
nat40 0x0 0
nat41 0x0 0
nat42 0x0 0
nat43 0x0 0
nat44 0x0 0
nat45 0x0 0
nat46 0x0 0
nat47 0x0 0
nat48 0x0 0
nat49 0x0 0
nat50 0x0 0
nat51 0x0 0
nat52 0x0 0
nat53 0x0 0
nat54 0x0 0
nat55 0x0 0
nat56 0x0 0
nat57 0x0 0
nat58 0x0 0
nat59 0x0 0
nat60 0x0 0
nat61 0x0 0
nat62 0x0 0
nat63 0x0 0
nat64 0x0 0
nat65 0x0 0
nat66 0x0 0
nat67 0x0 0
nat68 0x0 0
nat69 0x0 0
nat70 0x0 0
nat71 0x0 0
nat72 0x0 0
nat73 0x0 0
nat74 0x0 0
nat75 0x0 0
nat76 0x0 0
nat77 0x0 0
nat78 0x0 0
nat79 0x0 0
nat80 0x0 0
nat81 0x0 0
nat82 0x0 0
nat83 0x0 0
nat84 0x0 0
nat85 0x0 0
nat86 0x0 0
nat87 0x0 0
nat88 0x0 0
nat89 0x0 0
nat90 0x0 0
nat91 0x0 0
nat92 0x0 0
nat93 0x0 0
nat94 0x0 0
nat95 0x0 0
nat96 0x0 0
nat97 0x0 0
nat98 0x0 0
nat99 0x0 0
nat100 0x0 0
nat101 0x0 0
nat102 0x0 0
nat103 0x0 0
nat104 0x0 0
nat105 0x0 0
nat106 0x0 0
nat107 0x0 0
nat108 0x0 0
nat109 0x0 0
nat110 0x0 0
nat111 0x0 0
nat112 0x0 0
nat113 0x0 0
nat114 0x0 0
nat115 0x0 0
nat116 0x0 0
nat117 0x0 0
nat118 0x0 0
nat119 0x0 0
nat120 0x0 0
nat121 0x0 0
nat122 0x0 0
nat123 0x0 0
nat124 0x0 0
nat125 0x0 0
nat126 0x0 0
nat127 0x0 0
bof 0x60000000000126a0 6917529027641157280
r32 0x600000000001f598 6917529027641210264
r33 0x0 0
r34 0x600000000001f4e0 6917529027641210080
r35 0x60000000000126a0 6917529027641157280
r36 0x0 0
r37 0x600000000001b498 6917529027641193624
r38 0x600000000001f598 6917529027641210264
r39 0x0 0
r40 0x0 0
r41 0x0 0
r42 0x0 0
r43 0x0 0
r44 0x0 0
r45 0x0 0
r46 0x0 0
r47 0x0 0
r48 0x0 0
r49 0x0 0
r50 0x0 0
r51 0x0 0
r52 0x0 0
r53 0x0 0
r54 0x0 0
r55 0x0 0
r56 0x0 0
r57 0x0 0
r58 0x0 0
r59 0x0 0
r60 0x0 0
r61 0x0 0
r62 0x0 0
r63 0x0 0
r64 0x0 0
r65 0x0 0
r66 0x0 0
r67 0x0 0
r68 0x0 0
r69 0x0 0
r70 0x0 0
r71 0x0 0
r72 0x0 0
r73 0x0 0
r74 0x0 0
r75 0x0 0
r76 0x0 0
r77 0x0 0
r78 0x0 0
r79 0x0 0
r80 0x0 0
r81 0x0 0
r82 0x0 0
r83 0x0 0
r84 0x0 0
r85 0x0 0
r86 0x0 0
r87 0x0 0
r88 0x0 0
r89 0x0 0
r90 0x0 0
r91 0x0 0
r92 0x0 0
r93 0x0 0
r94 0x0 0
r95 0x0 0
r96 0x0 0
r97 0x0 0
r98 0x0 0
r99 0x0 0
r100 0x0 0
r101 0x0 0
r102 0x0 0
r103 0x0 0
r104 0x0 0
r105 0x0 0
r106 0x0 0
r107 0x0 0
r108 0x0 0
r109 0x0 0
r110 0x0 0
r111 0x0 0
r112 0x0 0
r113 0x0 0
r114 0x0 0
r115 0x0 0
r116 0x0 0
r117 0x0 0
r118 0x0 0
r119 0x0 0
r120 0x0 0
r121 0x0 0
r122 0x0 0
r123 0x0 0
r124 0x0 0
r125 0x0 0
r126 0x0 0
r127 0x0 0
p0 0x1 1
p1 0x0 0
p2 0x0 0
p3 0x0 0
p4 0x0 0
p5 0x0 0
p6 0x1 1
p7 0x0 0
p8 0x1 1
p9 0x0 0
p10 0x0 0
p11 0x0 0
p12 0x0 0
p13 0x0 0
p14 0x1 1
p15 0x1 1
p16 0x0 0
p17 0x1 1
p18 0x0 0
p19 0x1 1
p20 0x1 1
p21 0x0 0
p22 0x1 1
p23 0x0 0
p24 0x0 0
p25 0x0 0
p26 0x0 0
p27 0x0 0
p28 0x0 0
p29 0x0 0
p30 0x0 0
p31 0x0 0
p32 0x0 0
p33 0x0 0
p34 0x0 0
p35 0x0 0
p36 0x0 0
p37 0x0 0
p38 0x0 0
p39 0x0 0
p40 0x0 0
p41 0x0 0
p42 0x0 0
p43 0x0 0
p44 0x0 0
p45 0x0 0
p46 0x0 0
p47 0x0 0
p48 0x0 0
p49 0x0 0
p50 0x0 0
p51 0x0 0
p52 0x0 0
p53 0x0 0
p54 0x0 0
p55 0x0 0
p56 0x0 0
p57 0x0 0
p58 0x0 0
p59 0x0 0
p60 0x0 0
p61 0x0 0
p62 0x0 0
p63 0x0 0
(gdb) si
Program received signal SIGILL, Illegal instruction.
_ULia64_install_cursor () at ../../src/ia64/Ginstall_cursor.S:293
293 mov.i ar.pfs = r14
2: /x $r14 = 0x600000000001b498
1: $r14 = 6917529027641193624
(gdb)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#include <signal.h>
#include <sys/time.h>
#include <ucontext.h>
#include <stdlib.h>
ucontext_t main_ctx, eh_ctx, sched_ctx;
char sched_stack[60000];
char eh_stack[60000];
void eh (void) { // the eh context
for ( ; ; ) { // we loop indefinitely..until we crash
try {
throw 42;
} catch ( int ) { }
}
}
void sched (void) {
sigset_t ss; // switch back to the eh context
sigemptyset(&ss);
sigaddset(&ss, SIGALRM);
sigprocmask (SIG_BLOCK, &ss, NULL); // block SIGALRM,
for ( ; ; ) {
swapcontext(&sched_ctx, &eh_ctx);// re-schedule the eh context
}
}
void alrm ( int signum ) { // just switch (from eh) to sched context
sigset_t ss;
sigemptyset(&ss);
sigaddset(&ss, SIGALRM);
sigprocmask (SIG_BLOCK, &ss, NULL); // block SIGALRM,
redundant // but still...
swapcontext(&eh_ctx, &sched_ctx);
}
int main() {
getcontext(&eh_ctx);
eh_ctx.uc_stack.ss_sp = eh_stack;
eh_ctx.uc_stack.ss_size = sizeof( eh_stack );
eh_ctx.uc_link = &main_ctx;
makecontext(&eh_ctx, eh, 0);
getcontext(&sched_ctx);
sched_ctx.uc_stack.ss_sp = sched_stack;
sched_ctx.uc_stack.ss_size = sizeof( sched_stack );
sched_ctx.uc_link = NULL;
makecontext(&sched_ctx, sched, 0);
// interrupt the eh context every 10 ms with a SIGALRM
itimerval it;
it.it_interval.tv_sec = 0;
it.it_interval.tv_usec = 10000;
it.it_value.tv_sec = 0;
it.it_value.tv_usec = 10000;
sigset_t ss;
sigemptyset(&ss);
sigaddset(&ss, SIGALRM);
sigprocmask (SIG_BLOCK, &ss, NULL); // block SIGALRM (just in case)
signal (SIGALRM, alrm);
setitimer( ITIMER_REAL, &it, NULL );
swapcontext(&main_ctx, &eh_ctx); // start the eh context
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Peter A. Buhr wrote:
The following appears to be a libunwind bug with respect to context
switching
and timeslicing, which occurs in our threading package. The program
below is
the simplest example that generates the failure. Here are additional
environment details. Please contact me if you require additional
details.
@plg4[1]% uname -a
Linux plg4.uwaterloo.ca 2.6.25-2-mckinley #2 SMP Fri Jun 27 05:32:44
UTC 2008 ia64 GNU/Linux
@plg4[2]% g++ -v test.cc
Using built-in specs.
Target: ia64-linux-gnu
Configured with: ../src/configure -v --with-pkgversion='Debian
4.3.2-1' --with-bugurl=file:///usr/share/doc/gcc-4.3/README.Bugs
--enable-languages=c,c++,fortran,objc,obj-c++ --prefix=/usr
--enable-shared --with-system-zlib --libexecdir=/usr/lib
--without-included-gettext --enable-threads=posix --enable-nls
--with-gxx-include-dir=/usr/include/c++/4.3 --program-suffix=-4.3
--enable-clocale=gnu --enable-libstdcxx-debug --enable-objc-gc
--enable-mpfr --disable-libssp --with-system-libunwind
--enable-checking=release --build=ia64-linux-gnu
--host=ia64-linux-gnu --target=ia64-linux-gnu
Thread model: posix
gcc version 4.3.2 (Debian 4.3.2-1) COLLECT_GCC_OPTIONS='-v'
'-shared-libgcc'
/usr/lib/gcc/ia64-linux-gnu/4.3.2/cc1plus -quiet -v -D_GNU_SOURCE
test.cc -quiet -dumpbase test.cc -auxbase test -version -o
/tmp/ccgGQUbV.s
ignoring nonexistent directory "/usr/local/include/ia64-linux-gnu"
ignoring nonexistent directory
"/usr/lib/gcc/ia64-linux-gnu/4.3.2/../../../../ia64-linux-gnu/include"
ignoring nonexistent directory "/usr/include/ia64-linux-gnu"
#include "..." search starts here:
#include <...> search starts here:
/usr/include/c++/4.3
/usr/include/c++/4.3/ia64-linux-gnu
/usr/include/c++/4.3/backward
/usr/local/include
/usr/lib/gcc/ia64-linux-gnu/4.3.2/include
/usr/lib/gcc/ia64-linux-gnu/4.3.2/include-fixed
/usr/include
End of search list.
GNU C++ (Debian 4.3.2-1) version 4.3.2 (ia64-linux-gnu)
compiled by GNU C version 4.3.2, GMP version 4.2.2, MPFR version
2.3.1.
GGC heuristics: --param ggc-min-expand=100 --param
ggc-min-heapsize=131072
Compiler executable checksum: 95c628cc090dee6f51aeb20e8aa4faf3
COLLECT_GCC_OPTIONS='-v' '-shared-libgcc'
as -x -o /tmp/ccWwmhkO.o /tmp/ccgGQUbV.s
COMPILER_PATH=/usr/lib/gcc/ia64-linux-gnu/4.3.2/:/usr/lib/gcc/ia64-linux-gnu/4.3.2/:/usr/lib/gcc/ia64-linux-gnu/:/usr/lib/gcc/ia64-linux-gnu/4.3.2/:/usr/lib/gcc/ia64-linux-gnu/:/usr/lib/gcc/ia64-linux-gnu/4.3.2/:/usr/lib/gcc/ia64-linux-gnu/
LIBRARY_PATH=/usr/lib/gcc/ia64-linux-gnu/4.3.2/:/usr/lib/gcc/ia64-linux-gnu/4.3.2/:/usr/lib/gcc/ia64-linux-gnu/4.3.2/../../../:/lib/:/usr/lib/
COLLECT_GCC_OPTIONS='-v' '-shared-libgcc'
/usr/lib/gcc/ia64-linux-gnu/4.3.2/collect2 --hash-style=both
-dynamic-linker /lib/ld-linux-ia64.so.2
/usr/lib/gcc/ia64-linux-gnu/4.3.2/../../../crt1.o
/usr/lib/gcc/ia64-linux-gnu/4.3.2/../../../crti.o
/usr/lib/gcc/ia64-linux-gnu/4.3.2/crtbegin.o
-L/usr/lib/gcc/ia64-linux-gnu/4.3.2
-L/usr/lib/gcc/ia64-linux-gnu/4.3.2
-L/usr/lib/gcc/ia64-linux-gnu/4.3.2/../../.. /tmp/ccWwmhkO.o -lstdc++
-lm -lgcc_s -lunwind -lgcc -lc -lgcc_s -lunwind -lgcc
/usr/lib/gcc/ia64-linux-gnu/4.3.2/crtend.o
/usr/lib/gcc/ia64-linux-gnu/4.3.2/../../../crtn.o
@plg4[3]% ldd a.out
linux-gate.so.1 => (0xa000000000000000)
libstdc++.so.6 => /usr/lib/libstdc++.so.6 (0x2000000000050000)
libm.so.6.1 => /lib/libm.so.6.1 (0x2000000000224000)
libgcc_s.so.1 => /lib/libgcc_s.so.1 (0x20000000002f4000)
libunwind.so.7 => /lib/libunwind.so.7 (0x2000000000318000)
libc.so.6.1 => /lib/libc.so.6.1 (0x2000000000360000)
/lib/ld-linux-ia64.so.2 (0x2000000000000000)
@plg4[4]% a.out
Illegal instruction (core dumped)
================================= test.cc
================================
#include <signal.h>
#include <sys/time.h>
#include <ucontext.h>
#include <stdlib.h>
ucontext_t main_ctx, eh_ctx, sched_ctx;
char sched_stack[60000];
char eh_stack[60000];
void eh (void) { // the eh context
for ( ; ; ) { // yes, we loop indefinitely ...
until we crash
try {
throw 42;
} catch ( int ) { }
}
}
void sched (void) { // the scheduler' context, all
it does is switch back to the eh context
for ( ; ; ) {
swapcontext(&sched_ctx, &eh_ctx); // re-schedule the eh
context
}
}
void alrm ( int signum ) { // signal handler just
switches (from the eh) to the sched context
swapcontext(&eh_ctx, &sched_ctx);
}
int main() {
getcontext(&eh_ctx);
eh_ctx.uc_stack.ss_sp = eh_stack;
eh_ctx.uc_stack.ss_size = sizeof( eh_stack );
eh_ctx.uc_link = &main_ctx;
makecontext(&eh_ctx, eh, 0);
getcontext(&sched_ctx);
sched_ctx.uc_stack.ss_sp = sched_stack;
sched_ctx.uc_stack.ss_size = sizeof( sched_stack );
sched_ctx.uc_link = NULL;
makecontext(&sched_ctx, sched, 0);
// interrupt the eh context every 10 ms with a SIGALRM
itimerval it; it.it_interval.tv_sec = 0;
it.it_interval.tv_usec = 10000;
it.it_value.tv_sec = 0; it.it_value.tv_usec = 10000;
signal (SIGALRM, alrm);
setitimer( ITIMER_REAL, &it, NULL );
swapcontext(&main_ctx, &eh_ctx); // start the eh context
}
_______________________________________________
Libunwind-devel mailing list
address@hidden
http://lists.nongnu.org/mailman/listinfo/libunwind-devel
--
Oracle Email Signature Logo
Tom Honermann | Senior Principal Software Engineer | 503.276.2354
Oracle PeopleTools Development
1211 SW 5th Ave, Suite 9080, Portland, OR 97204