texinfo-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[7576] Texinfo::MiscXS::merge_text


From: gavinsmith0123
Subject: [7576] Texinfo::MiscXS::merge_text
Date: Sun, 25 Dec 2016 22:39:49 +0000 (UTC)

Revision: 7576
          http://svn.sv.gnu.org/viewvc/?view=rev&root=texinfo&revision=7576
Author:   gavin
Date:     2016-12-25 22:39:48 +0000 (Sun, 25 Dec 2016)
Log Message:
-----------
Texinfo::MiscXS::merge_text

Modified Paths:
--------------
    trunk/ChangeLog
    trunk/tp/Texinfo/MiscXS/MiscXS.xs
    trunk/tp/Texinfo/MiscXS/miscxs.c
    trunk/tp/Texinfo/MiscXS/miscxs.h
    trunk/tp/Texinfo/Parser.pm

Modified: trunk/ChangeLog
===================================================================
--- trunk/ChangeLog     2016-12-25 13:23:22 UTC (rev 7575)
+++ trunk/ChangeLog     2016-12-25 22:39:48 UTC (rev 7576)
@@ -1,5 +1,14 @@
 2016-12-25  Gavin Smith  <address@hidden>
 
+       * tp/Texinfo/MiscXS/MiscXS.xs,
+       * tp/Texinfo/MiscXS/miscxs.c (merge_text): New function.
+       * tp/Texinfo/Parser.pm: Add a 'UNITCHECK' section to override
+       '_merge_text' with Texinfo::MiscXS::merge_text.
+       (_merge_text): Reorder a condition to make executing a regex
+       less likely.
+
+2016-12-25  Gavin Smith  <address@hidden>
+
        * tp/t/protect_character_in_texinfo.t,
        tp/t/reference_to_text_in_tree.t, tp/t/test_brace_count.t, 
        tp/t/test_fill_gaps_in_sectioning.t, 

Modified: trunk/tp/Texinfo/MiscXS/MiscXS.xs
===================================================================
--- trunk/tp/Texinfo/MiscXS/MiscXS.xs   2016-12-25 13:23:22 UTC (rev 7575)
+++ trunk/tp/Texinfo/MiscXS/MiscXS.xs   2016-12-25 22:39:48 UTC (rev 7576)
@@ -30,8 +30,11 @@
 
 PROTOTYPES: DISABLE
 
-int
-xspara_init ()
+HV *
+xs_merge_text (self, current, text_in)
+     HV *self
+     HV *current
+     SV *text_in
 
 SV *
 xs_unicode_text (text_in, ...)

Modified: trunk/tp/Texinfo/MiscXS/miscxs.c
===================================================================
--- trunk/tp/Texinfo/MiscXS/miscxs.c    2016-12-25 13:23:22 UTC (rev 7575)
+++ trunk/tp/Texinfo/MiscXS/miscxs.c    2016-12-25 22:39:48 UTC (rev 7576)
@@ -38,6 +38,172 @@
 
 #include "miscxs.h"
 
+const char *whitespace_chars = " \t\f\v\r\n";
+
+HV *
+xs_merge_text (HV *self, HV *current, SV *text_in)
+{
+  AV *contents_array;
+
+  int no_merge_with_following_text = 0;
+  char *text;
+  int leading_spaces;
+  SV *leading_spaces_sv = 0;
+  int call_ret;
+  SV *returned_sv;
+
+  SV *contents_ref;
+  int contents_num;
+  HV *last_elt;
+  SV *existing_text_sv;
+  char *existing_text;
+  SV **svp;
+
+  dTHX;
+
+  dSP;
+
+  if (!SvUTF8 (text_in))
+    sv_utf8_upgrade (text_in);
+
+  text = SvPV_nolen (text_in);
+
+  leading_spaces = strspn (text, whitespace_chars);
+  if (text[leading_spaces])
+    {
+      int contents_num;
+
+      if (leading_spaces > 0)
+        {
+          leading_spaces_sv = newSVpv (text, leading_spaces);
+        }
+
+      svp = hv_fetch (current,
+                      "contents", strlen ("contents"), 0);
+      contents_array = (AV *)SvRV(*svp);
+      
+      contents_num = av_top_index(contents_array) + 1;
+      if (contents_num > 0)
+        {
+          HV *last_elt;
+          char *type = 0;
+
+          last_elt = (HV *)
+            SvRV (*av_fetch (contents_array, contents_num - 1, 0));
+
+          svp = hv_fetch (last_elt, "type", strlen ("type"), 0);
+          if (svp)
+            type = SvPV_nolen (*svp);
+          if (type
+              && (!strcmp (type, "empty_line_after_command")
+                  || !strcmp (type, "empty_spaces_after_command")
+                  || !strcmp (type, "empty_spaces_before_argument")
+                  || !strcmp (type, "empty_spaces_after_close_brace")))
+            {
+              no_merge_with_following_text = 1;
+            }
+        }
+
+      /* See 'perlcall' man page. */
+      ENTER;
+      SAVETMPS;
+
+      /**********************/
+      PUSHMARK(SP);
+      XPUSHs(sv_2mortal(newRV_inc((SV *)self)));
+      XPUSHs(sv_2mortal(newRV_inc((SV *)current)));
+      XPUSHs(leading_spaces_sv);
+      PUTBACK;
+
+      call_ret = call_pv ("Texinfo::Parser::_abort_empty_line", G_SCALAR);
+
+      SPAGAIN;
+
+      returned_sv = POPs;
+      if (returned_sv && SvRV(returned_sv))
+        {
+          text += leading_spaces;
+        }
+
+      /************************/
+
+      PUSHMARK(SP);
+      XPUSHs(sv_2mortal(newRV_inc((SV *)self)));
+      XPUSHs(sv_2mortal(newRV_inc((SV *)current)));
+      PUTBACK;
+
+      call_ret = call_pv ("Texinfo::Parser::_begin_paragraph", G_SCALAR);
+
+      SPAGAIN;
+
+      returned_sv = POPs;
+
+      /************************/
+
+      if (returned_sv && SvRV(returned_sv))
+        {
+          current = (HV *)SvRV(returned_sv);
+        }
+
+      FREETMPS;
+      LEAVE;
+    }
+
+  svp = hv_fetch (current, "contents", strlen ("contents"), 0);
+  if (!svp)
+    {
+      contents_array = newAV ();
+      contents_ref = newRV_inc ((SV *) contents_array);
+      hv_store (current, "contents", strlen ("contents"),
+                contents_ref, 0);
+      fprintf (stderr, "NEW CONTENTS %p\n", contents_array);
+      goto NEW_TEXT;
+    }
+  else
+    {
+      contents_ref = *svp;
+      contents_array = (AV *)SvRV(contents_ref);
+    }
+
+  if (no_merge_with_following_text)
+    goto NEW_TEXT;
+
+  contents_num = av_top_index(contents_array) + 1;
+  if (contents_num == 0)
+    goto NEW_TEXT;
+
+  last_elt = (HV *)
+    SvRV (*av_fetch (contents_array, contents_num - 1, 0));
+  svp = hv_fetch (last_elt, "text", strlen ("text"), 0);
+  if (!svp)
+    goto NEW_TEXT;
+  existing_text_sv = *svp;
+  existing_text = SvPV_nolen (existing_text_sv);
+  if (strchr (existing_text, '\n'))
+    goto NEW_TEXT;
+
+MERGED_TEXT:
+  sv_catpv (existing_text_sv, text);
+  //fprintf (stderr, "MERGED TEXT: %s|||\n", text);
+
+  if (0)
+    {
+      HV *hv;
+      SV *sv;
+NEW_TEXT:
+      hv = newHV ();
+      sv = newSVpv (text, 0);
+      hv_store (hv, "text", strlen ("text"), sv, 0);
+      SvUTF8_on (sv);
+      hv_store (hv, "parent", strlen ("parent"),
+                newRV_inc ((SV *)current), 0);
+      av_push (contents_array, newRV_inc ((SV *)hv));
+      //fprintf (stderr, "NEW TEXT: %s|||\n", text);
+    }
+
+  return current;
+}
+
 char *
 xs_unicode_text (char *text, int in_code)
 {

Modified: trunk/tp/Texinfo/MiscXS/miscxs.h
===================================================================
--- trunk/tp/Texinfo/MiscXS/miscxs.h    2016-12-25 13:23:22 UTC (rev 7575)
+++ trunk/tp/Texinfo/MiscXS/miscxs.h    2016-12-25 22:39:48 UTC (rev 7576)
@@ -1 +1,2 @@
 char *xs_unicode_text (char *, int);
+HV *xs_merge_text (HV *self, HV *current, SV *text_in);

Modified: trunk/tp/Texinfo/Parser.pm
===================================================================
--- trunk/tp/Texinfo/Parser.pm  2016-12-25 13:23:22 UTC (rev 7575)
+++ trunk/tp/Texinfo/Parser.pm  2016-12-25 22:39:48 UTC (rev 7576)
@@ -533,7 +533,6 @@
   warn "You found a bug: $message\n\n".
        "Additional informations:\n".
        $line_message.$message_context_stack.$current_element_message;
-  
 }
 
 # simple deep copy of a structure
@@ -1852,10 +1851,14 @@
   return ($closed_element, $current);
 }
 
+UNITCHECK {
+   Texinfo::XSLoader::override ("Texinfo::Parser::_merge_text",
+     "Texinfo::MiscXS::merge_text");
+}
+
 # begin paragraph if needed.  If not try to merge with the previous
 # content if it is also some text.
-sub _merge_text($$$)
-{
+sub _merge_text {
   my $self = shift;
   my $current = shift;
   my $text = shift;
@@ -1890,10 +1893,10 @@
     die;
   }
 
-  if (@{$current->{'contents'}} 
+  if (!$no_merge_with_following_text
+      and @{$current->{'contents'}} 
       and exists($current->{'contents'}->[-1]->{'text'}) 
-      and $current->{'contents'}->[-1]->{'text'} !~ /\n/
-      and !$no_merge_with_following_text) {
+      and $current->{'contents'}->[-1]->{'text'} !~ /\n/) {
     $current->{'contents'}->[-1]->{'text'} .= $text;
     print STDERR "MERGED TEXT: $text|||\n" if ($self->{'DEBUG'});
   } else {




reply via email to

[Prev in Thread] Current Thread [Next in Thread]