lynx-dev
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: LYNX-DEV Lynx & Tables


From: Tom Zerucha
Subject: Re: LYNX-DEV Lynx & Tables
Date: Fri, 3 Jan 1997 19:49:51 -0500 (EST)

On Thu, 19 Dec 1996, Benjamin C. W. Sittler wrote:

> I would like to know because I'm experimenting with a cost-based rendering
> of tables into ASCII, and I'd like to determine the relative costs of
> breaking lines versus making lines very wide, in terms of readability.

This actually should be settable, or retreived from the terminal
environment setting.

Also, I have my own awk script hack to convert text to tables - it even
uses box characters if available.  The big problem is that lots of tables
are designed badly, especially if nested (one mod to the awk script below
unnests them).  Many are over 160 characters wide (in one case financial
data for puts and calls that are side by side in lots of columns that
won't split neatly) with further nesting, so the borders pile up (another 
mod converts nested adjacent vertical bars to one which helps a little).

The main problem I ran into is that every site has a different form of bad
HTML.  Tables with missing markups ( e.g. <TABLE> some info <TR>row
info<TR>row two <TABLE> ... ) are one headache.  I finally got most of the
tables from the links from http://cnnfn.com/markets to come out legibly,
but it took far more work than I thought. 

address@hidden
finger address@hidden for PGP key

-----cut here------
#!/usr/bin/gawk -f

####################
function vislength(str) {
  return length(str);
}

####################
function alimg( wid ) {

  wid -= vislength(imline);

  if( !wid )
    image[iline] = image[iline] imline vrule;

  else if( substr(align[tnst,row,col],0,6) == "CENTER" ) {
    if( wid  % 2 ) {
      imline = imline " ";
      wid--;
    }
    wid = wid / 2;
    image[iline] = image[iline] substr( spaces, 0, wid ) imline substr( spaces, 
0, wid ) vrule;
  }

  else if( substr(align[tnst,row,col],0,5) == "RIGHT" ) {
    image[iline] = image[iline] substr( spaces, 0, wid ) imline vrule;
  }

  else {
    image[iline] = image[iline] imline substr( spaces, 0, wid ) vrule;
  }
}

####################
function printtab()  {

#  sides = (tnst == 1 ); #supress sides for nested tables
  sides = 1; #always include sides
#  sides = 0; #never include sides

  currow[tnst]++;

  while( !colwid[tnst,maxcol] )
    maxcol--;
#top line

if( sides )
  image[0]= boxtl;
else
  image[0]="";

  col = 1;
  while( col < maxcol )
    image[0]=image[0] substr( hrule , 0, colwid[tnst,col++] ) boxt;
  image[0]=image[0] substr( hrule , 0, colwid[tnst,col] );

if( sides )
  image[0] = image[0] boxtr;

  iline = 1;
#data rows
  row = 1;
  while( row < currow[tnst] ) {
#lines in row (valign and rowspan not handled)
    hght = 0;
    while( hght < rowhght[tnst,row] ) {
#cols in line
      if( sides )
        image[iline]=vrule;
      else
        image[iline]="";

      col = 1;
      while( col <= maxcol ) {

        imline = ttext[tnst,row,col,hght];
        ttext[tnst,row,col,hght] = "";

        if( tcs[tnst,row,col] == 0 ) {
          if( col == 1 ) {
            while( col <= maxcol )
              image[iline] = image[iline] substr( spaces, 0, colwid[tnst,col++] 
) vrule;
            col = 1;
          }
        }
        else {
          len = -1;
          cnt = 0;
          while( cnt < tcs[tnst,row,col] ) {
            len += colwid[tnst,col+cnt]+1;
            cnt++;
          }
          alimg(len);
        }

        col++;
      }

      hght++;
if( !sides )
  image[iline] = substr(image[iline],0,length(image[iline])-1);
      iline++;
    }
    row++;
  }
#bottom line
  if( sides )
    image[iline] = boxbl;
  else
    image[iline] =   "";
  col = 1;
  while( col < maxcol ) {
    len = colwid[tnst,col];
    colwid[tnst,col] = 0;
    if( len )
      image[iline] = image[iline] substr( hrule , 0, len ) boxb;
    col++;
  }
  image[iline] = image[iline] substr( hrule , 0, colwid[tnst,col] );
  if( sides )
    image[iline]=image[iline] boxbr;
  colwid[tnst,col] = 0;
}

####################
function startentry() {
#missing </td>
  if( tdflag[tnst] )
    endentry();

  curcol[tnst]++;
  ttext[tnst,currow[tnst],curcol[tnst],0] = "";

#this needs to be the previous value, colsp[curcol[tnst]], maybe tcs?
  colsp[tnst] = 1;

  if( rowspan[curcol[tnst]] )
    while( --rowspan[curcol[tnst]] ) {
#see prev
      tcs[tnst,currow[tnst],curcol[tnst]] = 1;
      curcol[tnst]++;
    }

  line[tnst] = 0;

#grab alignment
  align[tnst,currow[tnst],curcol[tnst]] = defalign[tnst];
  if( substr(toupper($1)" ",0,3) == "TH " )
    align[tnst,currow[tnst],curcol[tnst]] = "CENTER";
  if( match(toupper($1), " ALIGN=") )
    align[tnst,currow[tnst],curcol[tnst]] = toupper(substr($1,RSTART+7,6));

#grab colspan
  if( match(toupper($1), "COLSPAN=") )
    colsp[tnst] = substr($1,RSTART+8,5); 
  colsp[tnst] += 0;

#grab rowspan;
  rowsp = 1;
  if( match(toupper($1), "ROWSPAN=") )
    rowsp = substr($1,RSTART+8,5); 
  rowspan[curcol[tnst]] = rowsp + 0;

  tcs[tnst,currow[tnst],curcol[tnst]] = colsp[tnst];
  tdflag[tnst] = 1;
}

####################
function endentry() {

  if( colwid[tnst,curcol[tnst]] == 0 )
    colwid[tnst,curcol[tnst]] = 1;

  if( !colsp[tnst] )
    colsp[tnst] = 1;

  colsp[tnst] += 0;

  lx = 0;
  while( lx <= line[tnst] ) {

#trim edge spaces
    while( sub(" $","",ttext[tnst,currow[tnst],curcol[tnst],lx]));
    while( sub("^ ","",ttext[tnst,currow[tnst],curcol[tnst],lx]));

    col = vislength( ttext[tnst,currow[tnst],curcol[tnst],lx] );

    if( !col ) {
      ttext[tnst,currow[tnst],curcol[tnst],lx] = \
        ttext[tnst,currow[tnst],curcol[tnst],lx] " ";
      col = 1;
    }
#print "row:" currow[tnst] " col:" curcol[tnst] " line:" lx " len:" col ">" 
ttext[tnst,currow[tnst],curcol[tnst],lx] "<";

    if( colsp[tnst] == 1 ) {
      if( col > colwid[tnst,curcol[tnst]] )
        colwid[tnst,curcol[tnst]] = col;
    }
    else {
      col = ( (col + colsp[tnst] - 1 ) / colsp[tnst] ) ;
      sub( "[.]"," ",col );
      col += 0;
      col1 = 0;
      while( col1 < colsp[tnst] ) {
        if( col > colwid[tnst,curcol[tnst]+col1] )
          colwid[tnst,curcol[tnst]+col1] = col;
        rowspan[curcol[tnst]+col1] = rowspan[curcol[tnst]];
        col1++;
      }
    }
    lx++;
  }

  while( lx > 1 && (\
         !length( ttext[tnst,currow[tnst],curcol[tnst],lx-1] ) || \
         ttext[tnst,currow[tnst],curcol[tnst],lx-1] == " ") )
    lx--;

#FIXME need to spread rowhght among rowspan
  if( lx > rowhght[tnst,currow[tnst]] )
    rowhght[tnst,currow[tnst]] = lx;

  while( colsp[tnst] > 1 ) {
    curcol[tnst]++;
    tcs[tnst,currow[tnst],curcol[tnst]] = 0;
    colsp[tnst]--;
  }

  tdflag[tnst] = 0;
  line[tnst] = 0;

  tralready=0;
}

####################
function fixrow () {
#omitted </td>
  if( tdflag[tnst] )
    endentry();
  if( curcol[tnst] > maxcol )
    maxcol = curcol[tnst];
  curcol[tnst] = 0;
  currow[tnst]++;
  rowhght[tnst,currow[tnst]] = 1;
  line[tnst] = 0;
}

####################
function startrow () {
#omitted </tr>
  if( !tralready )
    fixrow();
  tralready = 0;
#valign?
  defalign[tnst] = "default";
  if( match(toupper($1), " ALIGN=") )
    defalign[tnst] = toupper(substr($1,RSTART+7,6));
}

####################
function endrow() {
  fixrow();
  tralready = 1;
}

####################
BEGIN { 
  RS = "\<" ; 
  FS = "\>" ; 

  tdflag[tnst] = 0;
  tnst = 0;
  colsp[tnst] = 0;
  dflg = 0;
  loff = 0;
  lofmx = 1;
  tralready = 0;

# PC Graphics characters (single);
  boxtl = "\332";  boxt = "\302";  boxtr = "\277";
  boxbl = "\300";  boxb = "\301";  boxbr = "\331";
  vrule = "\263";  hrule = "\304";  #cross = "\305";

# PC Graphics characters (double);
#  boxtl = "\311";  boxt = "\313";  boxtr = "\273";
#  boxbl = "\310";  boxb = "\312";  boxbr = "\274";
#  vrule = "\272";  hrule = "\315";  #cross = "\316";

# Ascii boxes
#  boxtl = "+";  boxt = "+";  boxtr = "+";
#  boxbl = "+";  boxb = "+";  boxbr = "+";
#  vrule = "|";  hrule = "-"; #cross = "+";

  hrule = hrule hrule hrule hrule; #4
  hrule = hrule hrule hrule hrule; #16
  hrule = hrule hrule hrule hrule; #64
  hrule = hrule hrule hrule hrule; #256
  spaces = " ";
  spaces = spaces spaces spaces spaces;
  spaces = spaces spaces spaces spaces;
  spaces = spaces spaces spaces spaces;
  spaces = spaces spaces spaces spaces;
#  colwid = 12;
}

#################### MAIN
{
  if( substr(toupper($1)" ",0,6) == "TABLE " ) {

    if( tnst > 0 ) {
      if( !tdflag[tnst] ) {
        startrow();
        startentry();
      }
      rowhght[tnst,currow[tnst]] = 1;
    }
    tnst++;

    currow[tnst] = 1;
    curcol[tnst] = 0;
    line[tnst] = 0;

    maxcol = 0;
    tralready = 1;
  }

  if( tnst > 0 ) {      
    if( substr(toupper($1)" ",0,3) == "TR " )
      startrow();
    
    if( substr(toupper($1)" ",0,3) == "TH " \
        || substr(toupper($1)" ",0,3) == "TD " )
      startentry();
    if( toupper($1)" " == "BR " || \
        substr(toupper($1)" ",0,3) == "HR " || \
        substr(toupper($1)" ",0,3) == "LI " || \
        toupper($1)" " == "P " || \
        substr(toupper($1)" ",0,7) == "OPTION " \
        ) {
      line[tnst]++;
#      if( substr(toupper($1)" ",0,7) == "OPTION "  )
#       ttext[tnst,currow[tnst],curcol[tnst],line[tnst]] = ">";
#      else
        ttext[tnst,currow[tnst],curcol[tnst],line[tnst]] = "";
    }

#extract ALT string
    if( match(substr(toupper($1)" ",0,4), "IMG ") && match(toupper($1),"ALT=")) 
{
      name = substr($1,RSTART+4,length($1)-6);
      if( substr(name,0,1) == "\"" ) {
        name = substr(name,2,length(name)-1);
        match(name,"\"");
        name = substr(name,0,RSTART-1);
      }
      else if( match(name," ") )
        name = substr(name,0,RSTART-1);
      gsub( "\>" , "", name );
      $2 = " [" name "]" $2;
    }

#fix character formats
    if( NF > 1 && length($2) ) {
      gsub("\046amp;","+",$2);
      gsub("\046#169;","(C)",$2);
      gsub("\046#162;","cents",$2);
      gsub("\046nbsp"," ",$2);
      gsub("\n","",$2);
      gsub("\r","",$2);
      gsub("\t"," ",$2);
      gsub("\t"," ",$2);
      while( gsub("  "," ",$2) );
      if( !tdflag[tnst] )
        sub(" $","",$2);
    }

#mainly for forms - extract name in select
#    if( match(toupper($1), "NAME=") ) {
#       name = substr($1,RSTART+5,64);
#       if( match(name," ") )
#         name = substr(name,1,RSTART-1 );
#       gsub( "\>" , "", name );
#       gsub( "\"" , "" , name );
#       $2 = name "=" $2;
#    }
    
    if( NF > 1 && length($2) )
      ttext[tnst,currow[tnst],curcol[tnst],line[tnst]] \
        = ttext[tnst,currow[tnst],curcol[tnst],line[tnst]] $2;

#print currow[tnst]","curcol[tnst]","line[tnst] "<" $1 ">" $2 ":"

    if( substr(toupper($1)" ",0,4) == "/TD " \
        || substr(toupper($1)" ",0,4) == "/TH " )
      endentry();

    if( substr(toupper($1)" ",0,4) == "/TR " )
      endrow();

    if( substr(toupper($1)" ",0,7) == "/TABLE " ) {

      if( !tralready )
        endrow();
      currow[tnst]--;
      tralready = 0;

      printtab();

      tnst--;
      rowhght[tnst,currow[tnst]] += iline + 1;

      colwid[tnst,curcol[tnst]] += 0;

      inrow = 0;
      if( !tnst )
        while( inrow <= iline )
          print image[inrow++];
      else {
        while( inrow <= iline ) {
          ttext[tnst,currow[tnst],curcol[tnst],inrow] = image[inrow];
          inrow++;
        }
        col = vislength( image[inrow-1] );
        if( col > colwid[tnst,curcol[tnst]] )
          colwid[tnst,curcol[tnst]] = col;
      }
      line[tnst] += iline;
      inrow = 0;
      currow[tnst+1] = 0;
      if( tnst == 0 )
        system("");

      if( !tdflag[tnst] ) {
        endentry();
        endrow();
      }
#for vertical stacking of all tables
#     else
#       endentry();
      tdflag[tnst] = 0;
    }    
  }
}

END { 
print ""; 
}

;
; To UNSUBSCRIBE:  Send a mail message to address@hidden
;                  with "unsubscribe lynx-dev" (without the
;                  quotation marks) on a line by itself.
;

reply via email to

[Prev in Thread] Current Thread [Next in Thread]