[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[no subject]
From: |
Patrice Dumas |
Date: |
Mon, 6 Mar 2023 12:44:23 -0500 (EST) |
branch: master
commit a357baf0efe054564ab04b199c05630dfc1c7a0c
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Mon Mar 6 17:34:49 2023 +0100
* tp/Texinfo/ParserNonXS.pm (_parse_texi_regex)
(_process_remaining_on_line), tp/Texinfo/XS/MiscXS.xs
(xs_parse_texi_regex), tp/Texinfo/XS/misc.c (xs_parse_texi_regex),
tp/Texinfo/XS/parsetexi/parser.c (process_remaining_on_line),
tp/Texinfo/XS/parsetexi/separator.c (handle_separator): handle
separately the different separators (except for menu only separators)
instead of grouping them, directly in process_remaining_on_line.
Remove the handle_separator function.
---
ChangeLog | 11 ++++++
tp/Texinfo/ParserNonXS.pm | 75 +++++++++++++++++++++----------------
tp/Texinfo/XS/MiscXS.xs | 33 +++++++++++-----
tp/Texinfo/XS/misc.c | 44 ++++++++++++----------
tp/Texinfo/XS/miscxs.h | 3 ++
tp/Texinfo/XS/parsetexi/parser.c | 63 ++++++++++++++++++++++++-------
tp/Texinfo/XS/parsetexi/parser.h | 5 ++-
tp/Texinfo/XS/parsetexi/separator.c | 50 -------------------------
8 files changed, 156 insertions(+), 128 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 45a352cc09..b80224f020 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2023-03-06 Patrice Dumas <pertusus@free.fr>
+
+ * tp/Texinfo/ParserNonXS.pm (_parse_texi_regex)
+ (_process_remaining_on_line), tp/Texinfo/XS/MiscXS.xs
+ (xs_parse_texi_regex), tp/Texinfo/XS/misc.c (xs_parse_texi_regex),
+ tp/Texinfo/XS/parsetexi/parser.c (process_remaining_on_line),
+ tp/Texinfo/XS/parsetexi/separator.c (handle_separator): handle
+ separately the different separators (except for menu only separators)
+ instead of grouping them, directly in process_remaining_on_line.
+ Remove the handle_separator function.
+
2023-03-06 Patrice Dumas <pertusus@free.fr>
* tp/Texinfo/Common.pm (_copy_tree, _substitute_references_in_array)
diff --git a/tp/Texinfo/ParserNonXS.pm b/tp/Texinfo/ParserNonXS.pm
index f476013d4d..e3b9018e5b 100644
--- a/tp/Texinfo/ParserNonXS.pm
+++ b/tp/Texinfo/ParserNonXS.pm
@@ -4471,27 +4471,28 @@ sub _parse_texi_regex {
my ($line) = @_;
# REMACRO
- my ($at_command, $open_brace, $asterisk, $single_letter_command,
- $separator_match, $menu_separator, $misc_text)
+ my ($at_command, $open_brace, $close_brace, $comma,
+ $asterisk, $single_letter_command, $arobase,
+ $form_feed, $menu_only_separator, $misc_text)
= ($line =~ /^\@([[:alnum:]][[:alnum:]-]*)
|^(\{)
+ |^(\})
+ |^(,)
|^(\*)
|^\@(["'~\@&\}\{,\.!\? \t\n\*\-\^`=:\|\/\\])
- |^([{}@,\f])
+ |^(@)
+ |^(\f)
|^([:\t.])
|^([^{}@,:\t.\n\f]+)
/x);
- if ($open_brace) {
- $separator_match = $open_brace;
- } elsif ($asterisk) {
+ if ($asterisk) {
($misc_text) = ($line =~ /^([^{}@,:\t.\n\f]+)/);
- } elsif ($separator_match and $separator_match eq ',') {
- $menu_separator = $separator_match;
}
- return ($at_command, $open_brace, $asterisk, $single_letter_command,
- $separator_match, $menu_separator, $misc_text);
+ return ($at_command, $open_brace, $close_brace, $comma,
+ $asterisk, $single_letter_command, $arobase,
+ $form_feed, $menu_only_separator, $misc_text);
}
sub _check_line_directive {
@@ -5797,8 +5798,11 @@ sub _process_remaining_on_line($$$$)
my $at_command_length;
my @line_parsing = _parse_texi_regex($line);
- my ($at_command, $open_brace, $asterisk, $single_letter_command,
- $separator_match, $menu_separator, $misc_text) = @line_parsing;
+ my ($at_command, $open_brace, $close_brace, $comma, $asterisk,
+ $single_letter_command, $arobase, $form_feed, $menu_only_separator,
+ $misc_text) = @line_parsing;
+ my $menu_separator = $comma;
+ $menu_separator = $menu_only_separator if (!$comma);
print STDERR "PARSED: "
.join(', ',map {!defined($_) ? 'UNDEF' : "'$_'"} @line_parsing)."\n"
if ($self->{'DEBUG'} and $self->{'DEBUG'} > 3);
@@ -6058,7 +6062,8 @@ sub _process_remaining_on_line($$$$)
$current = $current->{'parent'};
}
} elsif (_handle_menu_entry_separators($self, \@current_array_for_ref,
- \$line, $source_info, $asterisk, $menu_separator)) {
+ \$line, $source_info, $asterisk,
+ $menu_separator)) {
$current = $current_array_for_ref[0];
# Any other @-command.
} elsif ($command) {
@@ -6199,15 +6204,8 @@ sub _process_remaining_on_line($$$$)
$command_element->{'info'} = {} if (!$command_element->{'info'});
$command_element->{'info'}->{'alias_of'} = $from_alias;
}
- } elsif ($separator_match) {
- my $separator = $separator_match;
+ } elsif ($open_brace) {
substr ($line, 0, 1) = '';
- print STDERR "SEPARATOR: $separator\n" if ($self->{'DEBUG'});
- if ($separator eq '@') {
- # this may happen with a @ at the very end of a file, therefore
- # not followed by anything.
- $self->_line_error(__("unexpected \@"), $source_info);
- } elsif ($separator eq '{') {
# handle_open_brace in XS parser
if ($current->{'cmdname'}
and defined($self->{'brace_commands'}->{$current->{'cmdname'}})) {
@@ -6357,7 +6355,8 @@ sub _process_remaining_on_line($$$$)
$self->_line_error(sprintf(__("misplaced {")), $source_info);
}
- } elsif ($separator eq '}') {
+ } elsif ($close_brace) {
+ substr ($line, 0, 1) = '';
# handle_close_brace in XS parser
# For footnote and caption closing, when there is a paragraph inside.
@@ -6609,9 +6608,10 @@ sub _process_remaining_on_line($$$$)
} else {
$self->_line_error(sprintf(__("misplaced }")), $source_info);
}
- } elsif ($separator eq ','
- and $current->{'parent'}
- and $current->{'parent'}->{'remaining_args'}) {
+ } elsif ($comma) {
+ substr ($line, 0, 1) = '';
+ if ($current->{'parent'}
+ and $current->{'parent'}->{'remaining_args'}) {
# handle_comma in XS parser
_abort_empty_line($self, $current);
_isolate_last_space($self, $current);
@@ -6772,28 +6772,37 @@ sub _process_remaining_on_line($$$$)
'extra' => {'spaces_associated_command' => $current}
};
push @{$current->{'contents'}}, $space_before;
- } elsif ($separator eq ',' and $current->{'type'}
+ } elsif ($current->{'type'}
and $current->{'type'} eq 'line_arg'
and $current->{'parent'}->{'cmdname'}
and $current->{'parent'}->{'cmdname'} eq 'node') {
$self->_line_warn(__("superfluous arguments for node"), $source_info);
- } elsif ($separator eq "\f" and $current->{'type'}
- and $current->{'type'} eq 'paragraph') {
+ } else {
+ $current = _merge_text($self, $current, $comma);
+ }
+ } elsif ($form_feed) {
+ substr ($line, 0, 1) = '';
+ if ($current->{'type'}
+ and $current->{'type'} eq 'paragraph') {
# A form feed stops and restart a paragraph.
$current = _end_paragraph($self, $current, $source_info);
- my $line_feed = {'type' => 'empty_line', 'text' => $separator,
+ my $line_feed = {'type' => 'empty_line', 'text' => $form_feed,
'parent' => $current };
push @{$current->{'contents'}}, $line_feed;
my $empty_line = { 'type' => 'empty_line', 'text' => '',
'parent' => $current };
push @{$current->{'contents'}}, $empty_line;
} else {
- $current = _merge_text($self, $current, $separator);
+ $current = _merge_text($self, $current, $form_feed);
}
- # need to be after as , is in common with separators
- } elsif ($menu_separator) {
+ } elsif ($arobase) {
+ substr ($line, 0, 1) = '';
+ # this may happen with a @ at the very end of a file, therefore
+ # not followed by anything.
+ $self->_line_error(__("unexpected \@"), $source_info);
+ } elsif ($menu_only_separator) {
substr ($line, 0, 1) = '';
- $current = _merge_text($self, $current, $menu_separator);
+ $current = _merge_text($self, $current, $menu_only_separator);
# Misc text except end of line
} elsif (defined $misc_text) {
print STDERR "MISC TEXT: $misc_text\n" if ($self->{'DEBUG'});
diff --git a/tp/Texinfo/XS/MiscXS.xs b/tp/Texinfo/XS/MiscXS.xs
index 4223eec88b..a8c58cc539 100644
--- a/tp/Texinfo/XS/MiscXS.xs
+++ b/tp/Texinfo/XS/MiscXS.xs
@@ -105,16 +105,20 @@ xs_parse_texi_regex (text)
PREINIT:
char *at_command;
char *open_brace;
+ char *close_brace;
+ char *comma;
char *asterisk;
char *single_letter_command;
char *separator_match;
- char *menu_separator;
+ char *arobase;
+ char *form_feed;
+ char *menu_only_separator;
char *new_text;
PPCODE:
- xs_parse_texi_regex(text, &at_command, &open_brace, &asterisk,
- &single_letter_command, &separator_match,
- &menu_separator, &new_text);
- EXTEND(SP,6);
+ xs_parse_texi_regex(text, &at_command, &open_brace, &close_brace,
+ &comma, &asterisk, &single_letter_command,
+ &arobase, &form_feed, &menu_only_separator,
&new_text);
+ EXTEND(SP,9);
PUSHs(sv_newmortal());
sv_setpv((SV*)ST(0), at_command);
SvUTF8_on(ST(0));
@@ -122,20 +126,29 @@ xs_parse_texi_regex (text)
sv_setpv((SV*)ST(1), open_brace);
SvUTF8_on(ST(1));
PUSHs(sv_newmortal());
- sv_setpv((SV*)ST(2), asterisk);
+ sv_setpv((SV*)ST(2), close_brace);
SvUTF8_on(ST(2));
PUSHs(sv_newmortal());
- sv_setpv((SV*)ST(3), single_letter_command);
+ sv_setpv((SV*)ST(3), comma);
SvUTF8_on(ST(3));
PUSHs(sv_newmortal());
- sv_setpv((SV*)ST(4), separator_match);
+ sv_setpv((SV*)ST(4), asterisk);
SvUTF8_on(ST(4));
PUSHs(sv_newmortal());
- sv_setpv((SV*)ST(5), menu_separator);
+ sv_setpv((SV*)ST(5), single_letter_command);
SvUTF8_on(ST(5));
PUSHs(sv_newmortal());
- sv_setpv((SV*)ST(6), new_text);
+ sv_setpv((SV*)ST(6), arobase);
SvUTF8_on(ST(6));
+ PUSHs(sv_newmortal());
+ sv_setpv((SV*)ST(7), form_feed);
+ SvUTF8_on(ST(7));
+ PUSHs(sv_newmortal());
+ sv_setpv((SV*)ST(8), menu_only_separator);
+ SvUTF8_on(ST(8));
+ PUSHs(sv_newmortal());
+ sv_setpv((SV*)ST(9), new_text);
+ SvUTF8_on(ST(9));
SV *
xs_default_format_protect_text (self, text_in)
diff --git a/tp/Texinfo/XS/misc.c b/tp/Texinfo/XS/misc.c
index de7fc5cb30..f582a81ee2 100644
--- a/tp/Texinfo/XS/misc.c
+++ b/tp/Texinfo/XS/misc.c
@@ -281,15 +281,17 @@ xs_entity_text (char *text)
return new;
}
-/* Return list ($at_command, $open_brace, $asterisk, $single_letter_command,
- $separator_match) */
+/* Return list ($at_command, $open_brace, ....) */
void xs_parse_texi_regex (SV *text_in,
char **at_command,
char **open_brace,
+ char **close_brace,
+ char **comma,
char **asterisk,
char **single_letter_command,
- char **separator_match,
- char **menu_separator,
+ char **arobase,
+ char **form_feed,
+ char **menu_only_separator,
char **new_text)
{
char *text;
@@ -301,8 +303,9 @@ void xs_parse_texi_regex (SV *text_in,
sv_utf8_upgrade (text_in);
text = SvPV_nolen (text_in);
- *at_command = *open_brace = *asterisk = *single_letter_command
- = *separator_match = *menu_separator = *new_text = 0;
+ *at_command = *open_brace = *close_brace = *comma = *asterisk
+ = *single_letter_command = *arobase = *form_feed
+ = *menu_only_separator = *new_text = 0;
if (*text == '@' && isalnum(text[1]))
{
@@ -324,9 +327,16 @@ void xs_parse_texi_regex (SV *text_in,
if (*text == '{')
{
*open_brace = "{";
- *separator_match = "{";
+ }
+ else if (*text == '}')
+ {
+ *close_brace = "}";
}
+ else if (*text == ',')
+ {
+ *comma = ",";
+ }
else if (*text == '@'
&& text[1] && strchr ("([\"'~@&}{,.!?"
" \t\n"
@@ -338,25 +348,21 @@ void xs_parse_texi_regex (SV *text_in,
a[0] = text[1];
a[1] = '\0';
}
-
- else if (strchr ("{}@,\f", *text))
+ else if (strchr (":\t.", *text))
{
static char a[2];
- *separator_match = a;
- if (*text == ',')
- *menu_separator = a;
+ *menu_only_separator = a;
a[0] = *text;
a[1] = '\0';
}
-
- else if (strchr (":\t.", *text))
+ else if (*text == '\f')
{
- static char a[2];
- *menu_separator = a;
- a[0] = *text;
- a[1] = '\0';
+ *form_feed = "\f";
+ }
+ else if (*text == '@')
+ {
+ *arobase = "@";
}
-
else
{
char *p;
diff --git a/tp/Texinfo/XS/miscxs.h b/tp/Texinfo/XS/miscxs.h
index 8dea52bffa..696c61e304 100644
--- a/tp/Texinfo/XS/miscxs.h
+++ b/tp/Texinfo/XS/miscxs.h
@@ -2,6 +2,9 @@ char *xs_unicode_text (char *, int);
char *xs_entity_text (char *);
char *xs_process_text (char *text);
void xs_parse_texi_regex (SV *text,
+ char **,
+ char **,
+ char **,
char **,
char **,
char **,
diff --git a/tp/Texinfo/XS/parsetexi/parser.c b/tp/Texinfo/XS/parsetexi/parser.c
index ea37eefc6f..55c166f02c 100644
--- a/tp/Texinfo/XS/parsetexi/parser.c
+++ b/tp/Texinfo/XS/parsetexi/parser.c
@@ -1618,7 +1618,9 @@ process_remaining_on_line (ELEMENT **current_inout, char
**line_inout)
/* There are cases when we need more input, but we don't want to
get it in the top-level loop in parse_texi - this is mostly
(always?) when we don't want to start a new, empty line, and
- need to get more from the current, incomplete line of input. */
+ need to get more from the current, incomplete line of input.
+ Also, this ensures that the line cannot be empty in parsing below
+ */
while (*line == '\0')
{
static char *allocated_text;
@@ -1949,7 +1951,7 @@ process_remaining_on_line (ELEMENT **current_inout, char
**line_inout)
/* special case for accent commands, use following character except @
* as argument */
else if ((command_flags(current) & CF_accent)
- && *line != '\0' && *line != '@')
+ && *line != '@')
{
ELEMENT *e, *e2;
debug ("ACCENT following_arg");
@@ -2219,30 +2221,65 @@ process_remaining_on_line (ELEMENT **current_inout,
char **line_inout)
command_name (from_alias));
}
}
- /* "Separator" character */
- else if (*line != '\0' && strchr ("{}@,\f", *line))
+ /* "Separator" characters */
+ else if (*line == '{')
+ {
+ char separator = *line++;
+ current = handle_open_brace (current, &line);
+ }
+ else if (*line == '}')
+ {
+ char separator = *line++;
+ current = handle_close_brace (current, &line);
+ }
+ else if (*line == ',')
{
char separator = *line++;
- debug ("SEPARATOR: %c", separator);
- if (separator == '@')
- line_error ("unexpected @");
+ /* comma as a command argument separator */
+ if (counter_value (&count_remaining_args, current->parent) > 0)
+ current = handle_comma (current, &line);
+ else if (current->type == ET_line_arg && current->parent->cmd == CM_node)
+ line_warn ("superfluous arguments for node");
else
- current = handle_separator (current, separator, &line);
+ current = merge_text (current, ",", 0);
}
- else if (*line != '\0' && strchr (":\t.", *line))
+ else if (strchr (":\t.", *line))
{
- /* merge menu separator (other than comma, done with other separators) */
+ /* merge menu separator (other than comma) */
char separator = *line++;
char t[2];
t[0] = separator;
t[1] = '\0';
current = merge_text (current, t, 0);
}
+ else if (*line == '@')
+ {
+ char separator = *line++;
+ line_error ("unexpected @");
+ }
+ else if (*line == '\f')
+ {
+ char separator = *line++;
+ if (current->type == ET_paragraph)
+ {
+ ELEMENT *e;
+
+ /* A form feed stops and restarts a paragraph. */
+ current = end_paragraph (current, 0, 0);
+ e = new_element (ET_empty_line);
+ text_append_n (&e->text, "\f", 1);
+ add_to_element_contents (current, e);
+ e = new_element (ET_empty_line);
+ add_to_element_contents (current, e);
+ }
+ else
+ current = merge_text (current, "\f", 0);
+ }
/* "Misc text except end of line." */
- else if (*line && *line != '\n')
+ else if (*line != '\n')
{
size_t len;
-
+
/* Output until next command, separator or newline. */
{
char saved; /* TODO: Have a length argument to merge_text? */
@@ -2253,8 +2290,6 @@ process_remaining_on_line (ELEMENT **current_inout, char
**line_inout)
line += len;
*line = saved;
}
-
- goto funexit;
}
else /* End of line */
{
diff --git a/tp/Texinfo/XS/parsetexi/parser.h b/tp/Texinfo/XS/parsetexi/parser.h
index c2751a7601..e6a63ef179 100644
--- a/tp/Texinfo/XS/parsetexi/parser.h
+++ b/tp/Texinfo/XS/parsetexi/parser.h
@@ -141,8 +141,9 @@ extern size_t floats_space;
/* In separator.c */
-ELEMENT *handle_separator (ELEMENT *current, char separator,
- char **line_inout);
+ELEMENT * handle_open_brace (ELEMENT *current, char **line_inout);
+ELEMENT * handle_close_brace (ELEMENT *current, char **line_inout);
+ELEMENT * handle_comma (ELEMENT *current, char **line_inout);
/* In parser.c */
typedef struct {
diff --git a/tp/Texinfo/XS/parsetexi/separator.c
b/tp/Texinfo/XS/parsetexi/separator.c
index 738295dde3..47fc2d7080 100644
--- a/tp/Texinfo/XS/parsetexi/separator.c
+++ b/tp/Texinfo/XS/parsetexi/separator.c
@@ -747,53 +747,3 @@ funexit:
return current;
}
-/* Actions to be taken when a special character appears in the input. */
-ELEMENT *
-handle_separator (ELEMENT *current, char separator, char **line_inout)
-{
- char *line = *line_inout;
-
- if (separator == '{')
- {
- current = handle_open_brace (current, &line);
- }
- else if (separator == '}')
- {
- current = handle_close_brace (current, &line);
- }
- /* If a comma is seen after all the arguments for the command have been
- read, it is included in the last argument. */
- else if (separator == ','
- && counter_value (&count_remaining_args, current->parent) > 0)
- {
- current = handle_comma (current, &line);
- }
- else if (separator == ',' && current->type == ET_line_arg
- && current->parent->cmd == CM_node)
- {
- line_warn ("superfluous arguments for node");
- }
- else if (separator == '\f' && current->type == ET_paragraph)
- {
- ELEMENT *e;
-
- /* A form feed stops and restarts a paragraph. */
- current = end_paragraph (current, 0, 0);
- e = new_element (ET_empty_line);
- text_append_n (&e->text, "\f", 1);
- add_to_element_contents (current, e);
- e = new_element (ET_empty_line);
- add_to_element_contents (current, e);
- }
- else
- {
- /* Default - merge the character as usual. */
- char t[2];
- t[0] = separator;
- t[1] = '\0';
- current = merge_text (current, t, 0);
- }
-
- *line_inout = line;
- return current;
-}