[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: LYNX-DEV Lynx & Tables
From: |
Tom Zerucha |
Subject: |
Re: LYNX-DEV Lynx & Tables |
Date: |
Fri, 3 Jan 1997 19:49:51 -0500 (EST) |
On Thu, 19 Dec 1996, Benjamin C. W. Sittler wrote:
> I would like to know because I'm experimenting with a cost-based rendering
> of tables into ASCII, and I'd like to determine the relative costs of
> breaking lines versus making lines very wide, in terms of readability.
This actually should be settable, or retreived from the terminal
environment setting.
Also, I have my own awk script hack to convert text to tables - it even
uses box characters if available. The big problem is that lots of tables
are designed badly, especially if nested (one mod to the awk script below
unnests them). Many are over 160 characters wide (in one case financial
data for puts and calls that are side by side in lots of columns that
won't split neatly) with further nesting, so the borders pile up (another
mod converts nested adjacent vertical bars to one which helps a little).
The main problem I ran into is that every site has a different form of bad
HTML. Tables with missing markups ( e.g. <TABLE> some info <TR>row
info<TR>row two <TABLE> ... ) are one headache. I finally got most of the
tables from the links from http://cnnfn.com/markets to come out legibly,
but it took far more work than I thought.
address@hidden
finger address@hidden for PGP key
-----cut here------
#!/usr/bin/gawk -f
####################
function vislength(str) {
return length(str);
}
####################
function alimg( wid ) {
wid -= vislength(imline);
if( !wid )
image[iline] = image[iline] imline vrule;
else if( substr(align[tnst,row,col],0,6) == "CENTER" ) {
if( wid % 2 ) {
imline = imline " ";
wid--;
}
wid = wid / 2;
image[iline] = image[iline] substr( spaces, 0, wid ) imline substr( spaces,
0, wid ) vrule;
}
else if( substr(align[tnst,row,col],0,5) == "RIGHT" ) {
image[iline] = image[iline] substr( spaces, 0, wid ) imline vrule;
}
else {
image[iline] = image[iline] imline substr( spaces, 0, wid ) vrule;
}
}
####################
function printtab() {
# sides = (tnst == 1 ); #supress sides for nested tables
sides = 1; #always include sides
# sides = 0; #never include sides
currow[tnst]++;
while( !colwid[tnst,maxcol] )
maxcol--;
#top line
if( sides )
image[0]= boxtl;
else
image[0]="";
col = 1;
while( col < maxcol )
image[0]=image[0] substr( hrule , 0, colwid[tnst,col++] ) boxt;
image[0]=image[0] substr( hrule , 0, colwid[tnst,col] );
if( sides )
image[0] = image[0] boxtr;
iline = 1;
#data rows
row = 1;
while( row < currow[tnst] ) {
#lines in row (valign and rowspan not handled)
hght = 0;
while( hght < rowhght[tnst,row] ) {
#cols in line
if( sides )
image[iline]=vrule;
else
image[iline]="";
col = 1;
while( col <= maxcol ) {
imline = ttext[tnst,row,col,hght];
ttext[tnst,row,col,hght] = "";
if( tcs[tnst,row,col] == 0 ) {
if( col == 1 ) {
while( col <= maxcol )
image[iline] = image[iline] substr( spaces, 0, colwid[tnst,col++]
) vrule;
col = 1;
}
}
else {
len = -1;
cnt = 0;
while( cnt < tcs[tnst,row,col] ) {
len += colwid[tnst,col+cnt]+1;
cnt++;
}
alimg(len);
}
col++;
}
hght++;
if( !sides )
image[iline] = substr(image[iline],0,length(image[iline])-1);
iline++;
}
row++;
}
#bottom line
if( sides )
image[iline] = boxbl;
else
image[iline] = "";
col = 1;
while( col < maxcol ) {
len = colwid[tnst,col];
colwid[tnst,col] = 0;
if( len )
image[iline] = image[iline] substr( hrule , 0, len ) boxb;
col++;
}
image[iline] = image[iline] substr( hrule , 0, colwid[tnst,col] );
if( sides )
image[iline]=image[iline] boxbr;
colwid[tnst,col] = 0;
}
####################
function startentry() {
#missing </td>
if( tdflag[tnst] )
endentry();
curcol[tnst]++;
ttext[tnst,currow[tnst],curcol[tnst],0] = "";
#this needs to be the previous value, colsp[curcol[tnst]], maybe tcs?
colsp[tnst] = 1;
if( rowspan[curcol[tnst]] )
while( --rowspan[curcol[tnst]] ) {
#see prev
tcs[tnst,currow[tnst],curcol[tnst]] = 1;
curcol[tnst]++;
}
line[tnst] = 0;
#grab alignment
align[tnst,currow[tnst],curcol[tnst]] = defalign[tnst];
if( substr(toupper($1)" ",0,3) == "TH " )
align[tnst,currow[tnst],curcol[tnst]] = "CENTER";
if( match(toupper($1), " ALIGN=") )
align[tnst,currow[tnst],curcol[tnst]] = toupper(substr($1,RSTART+7,6));
#grab colspan
if( match(toupper($1), "COLSPAN=") )
colsp[tnst] = substr($1,RSTART+8,5);
colsp[tnst] += 0;
#grab rowspan;
rowsp = 1;
if( match(toupper($1), "ROWSPAN=") )
rowsp = substr($1,RSTART+8,5);
rowspan[curcol[tnst]] = rowsp + 0;
tcs[tnst,currow[tnst],curcol[tnst]] = colsp[tnst];
tdflag[tnst] = 1;
}
####################
function endentry() {
if( colwid[tnst,curcol[tnst]] == 0 )
colwid[tnst,curcol[tnst]] = 1;
if( !colsp[tnst] )
colsp[tnst] = 1;
colsp[tnst] += 0;
lx = 0;
while( lx <= line[tnst] ) {
#trim edge spaces
while( sub(" $","",ttext[tnst,currow[tnst],curcol[tnst],lx]));
while( sub("^ ","",ttext[tnst,currow[tnst],curcol[tnst],lx]));
col = vislength( ttext[tnst,currow[tnst],curcol[tnst],lx] );
if( !col ) {
ttext[tnst,currow[tnst],curcol[tnst],lx] = \
ttext[tnst,currow[tnst],curcol[tnst],lx] " ";
col = 1;
}
#print "row:" currow[tnst] " col:" curcol[tnst] " line:" lx " len:" col ">"
ttext[tnst,currow[tnst],curcol[tnst],lx] "<";
if( colsp[tnst] == 1 ) {
if( col > colwid[tnst,curcol[tnst]] )
colwid[tnst,curcol[tnst]] = col;
}
else {
col = ( (col + colsp[tnst] - 1 ) / colsp[tnst] ) ;
sub( "[.]"," ",col );
col += 0;
col1 = 0;
while( col1 < colsp[tnst] ) {
if( col > colwid[tnst,curcol[tnst]+col1] )
colwid[tnst,curcol[tnst]+col1] = col;
rowspan[curcol[tnst]+col1] = rowspan[curcol[tnst]];
col1++;
}
}
lx++;
}
while( lx > 1 && (\
!length( ttext[tnst,currow[tnst],curcol[tnst],lx-1] ) || \
ttext[tnst,currow[tnst],curcol[tnst],lx-1] == " ") )
lx--;
#FIXME need to spread rowhght among rowspan
if( lx > rowhght[tnst,currow[tnst]] )
rowhght[tnst,currow[tnst]] = lx;
while( colsp[tnst] > 1 ) {
curcol[tnst]++;
tcs[tnst,currow[tnst],curcol[tnst]] = 0;
colsp[tnst]--;
}
tdflag[tnst] = 0;
line[tnst] = 0;
tralready=0;
}
####################
function fixrow () {
#omitted </td>
if( tdflag[tnst] )
endentry();
if( curcol[tnst] > maxcol )
maxcol = curcol[tnst];
curcol[tnst] = 0;
currow[tnst]++;
rowhght[tnst,currow[tnst]] = 1;
line[tnst] = 0;
}
####################
function startrow () {
#omitted </tr>
if( !tralready )
fixrow();
tralready = 0;
#valign?
defalign[tnst] = "default";
if( match(toupper($1), " ALIGN=") )
defalign[tnst] = toupper(substr($1,RSTART+7,6));
}
####################
function endrow() {
fixrow();
tralready = 1;
}
####################
BEGIN {
RS = "\<" ;
FS = "\>" ;
tdflag[tnst] = 0;
tnst = 0;
colsp[tnst] = 0;
dflg = 0;
loff = 0;
lofmx = 1;
tralready = 0;
# PC Graphics characters (single);
boxtl = "\332"; boxt = "\302"; boxtr = "\277";
boxbl = "\300"; boxb = "\301"; boxbr = "\331";
vrule = "\263"; hrule = "\304"; #cross = "\305";
# PC Graphics characters (double);
# boxtl = "\311"; boxt = "\313"; boxtr = "\273";
# boxbl = "\310"; boxb = "\312"; boxbr = "\274";
# vrule = "\272"; hrule = "\315"; #cross = "\316";
# Ascii boxes
# boxtl = "+"; boxt = "+"; boxtr = "+";
# boxbl = "+"; boxb = "+"; boxbr = "+";
# vrule = "|"; hrule = "-"; #cross = "+";
hrule = hrule hrule hrule hrule; #4
hrule = hrule hrule hrule hrule; #16
hrule = hrule hrule hrule hrule; #64
hrule = hrule hrule hrule hrule; #256
spaces = " ";
spaces = spaces spaces spaces spaces;
spaces = spaces spaces spaces spaces;
spaces = spaces spaces spaces spaces;
spaces = spaces spaces spaces spaces;
# colwid = 12;
}
#################### MAIN
{
if( substr(toupper($1)" ",0,6) == "TABLE " ) {
if( tnst > 0 ) {
if( !tdflag[tnst] ) {
startrow();
startentry();
}
rowhght[tnst,currow[tnst]] = 1;
}
tnst++;
currow[tnst] = 1;
curcol[tnst] = 0;
line[tnst] = 0;
maxcol = 0;
tralready = 1;
}
if( tnst > 0 ) {
if( substr(toupper($1)" ",0,3) == "TR " )
startrow();
if( substr(toupper($1)" ",0,3) == "TH " \
|| substr(toupper($1)" ",0,3) == "TD " )
startentry();
if( toupper($1)" " == "BR " || \
substr(toupper($1)" ",0,3) == "HR " || \
substr(toupper($1)" ",0,3) == "LI " || \
toupper($1)" " == "P " || \
substr(toupper($1)" ",0,7) == "OPTION " \
) {
line[tnst]++;
# if( substr(toupper($1)" ",0,7) == "OPTION " )
# ttext[tnst,currow[tnst],curcol[tnst],line[tnst]] = ">";
# else
ttext[tnst,currow[tnst],curcol[tnst],line[tnst]] = "";
}
#extract ALT string
if( match(substr(toupper($1)" ",0,4), "IMG ") && match(toupper($1),"ALT="))
{
name = substr($1,RSTART+4,length($1)-6);
if( substr(name,0,1) == "\"" ) {
name = substr(name,2,length(name)-1);
match(name,"\"");
name = substr(name,0,RSTART-1);
}
else if( match(name," ") )
name = substr(name,0,RSTART-1);
gsub( "\>" , "", name );
$2 = " [" name "]" $2;
}
#fix character formats
if( NF > 1 && length($2) ) {
gsub("\046amp;","+",$2);
gsub("\046#169;","(C)",$2);
gsub("\046#162;","cents",$2);
gsub("\046nbsp"," ",$2);
gsub("\n","",$2);
gsub("\r","",$2);
gsub("\t"," ",$2);
gsub("\t"," ",$2);
while( gsub(" "," ",$2) );
if( !tdflag[tnst] )
sub(" $","",$2);
}
#mainly for forms - extract name in select
# if( match(toupper($1), "NAME=") ) {
# name = substr($1,RSTART+5,64);
# if( match(name," ") )
# name = substr(name,1,RSTART-1 );
# gsub( "\>" , "", name );
# gsub( "\"" , "" , name );
# $2 = name "=" $2;
# }
if( NF > 1 && length($2) )
ttext[tnst,currow[tnst],curcol[tnst],line[tnst]] \
= ttext[tnst,currow[tnst],curcol[tnst],line[tnst]] $2;
#print currow[tnst]","curcol[tnst]","line[tnst] "<" $1 ">" $2 ":"
if( substr(toupper($1)" ",0,4) == "/TD " \
|| substr(toupper($1)" ",0,4) == "/TH " )
endentry();
if( substr(toupper($1)" ",0,4) == "/TR " )
endrow();
if( substr(toupper($1)" ",0,7) == "/TABLE " ) {
if( !tralready )
endrow();
currow[tnst]--;
tralready = 0;
printtab();
tnst--;
rowhght[tnst,currow[tnst]] += iline + 1;
colwid[tnst,curcol[tnst]] += 0;
inrow = 0;
if( !tnst )
while( inrow <= iline )
print image[inrow++];
else {
while( inrow <= iline ) {
ttext[tnst,currow[tnst],curcol[tnst],inrow] = image[inrow];
inrow++;
}
col = vislength( image[inrow-1] );
if( col > colwid[tnst,curcol[tnst]] )
colwid[tnst,curcol[tnst]] = col;
}
line[tnst] += iline;
inrow = 0;
currow[tnst+1] = 0;
if( tnst == 0 )
system("");
if( !tdflag[tnst] ) {
endentry();
endrow();
}
#for vertical stacking of all tables
# else
# endentry();
tdflag[tnst] = 0;
}
}
}
END {
print "";
}
;
; To UNSUBSCRIBE: Send a mail message to address@hidden
; with "unsubscribe lynx-dev" (without the
; quotation marks) on a line by itself.
;
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- Re: LYNX-DEV Lynx & Tables,
Tom Zerucha <=