[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[5999] better support for files produced with CR-LF line endings
From: |
Gavin D. Smith |
Subject: |
[5999] better support for files produced with CR-LF line endings |
Date: |
Mon, 29 Dec 2014 14:21:25 +0000 |
Revision: 5999
http://svn.sv.gnu.org/viewvc/?view=rev&root=texinfo&revision=5999
Author: gavin
Date: 2014-12-29 14:21:24 +0000 (Mon, 29 Dec 2014)
Log Message:
-----------
better support for files produced with CR-LF line endings
Modified Paths:
--------------
trunk/ChangeLog
trunk/info/filesys.c
trunk/info/nodes.c
trunk/info/nodes.h
Modified: trunk/ChangeLog
===================================================================
--- trunk/ChangeLog 2014-12-27 22:23:46 UTC (rev 5998)
+++ trunk/ChangeLog 2014-12-29 14:21:24 UTC (rev 5999)
@@ -1,3 +1,16 @@
+2014-12-29 Gavin Smith <address@hidden>
+
+ * info/filesys.c, info/nodes.c (convert_eols): Function moved
+ between files. Arguments changed.
+ * info/nodes.c (adjust_nodestart): Take extra argument giving
+ the slack to search for a node.
+ (find_node_from_tag): New function, wrapping adjust_nodestart.
+ Call convert_eols on file if we don't find a node.
+ (info_node_of_tag): Call find_node_from_tag instead of
+ adjust_nodestart. Rearrange logic in function so not to call
+ itself when handling an anchor.
+ * info/nodes.h (N_EOLs_Converted): New flag.
+
2014-12-27 Gavin Smith <address@hidden>
* info/nodes.h (TAGS_TABLE_BEG_LABEL): Define without trailing
Modified: trunk/info/filesys.c
===================================================================
--- trunk/info/filesys.c 2014-12-27 22:23:46 UTC (rev 5998)
+++ trunk/info/filesys.c 2014-12-29 14:21:24 UTC (rev 5999)
@@ -269,43 +269,7 @@
free (try_filename);
return 0;
}
-
-#if 0
-/* Given a chunk of text and its length, convert all CRLF pairs at every
- end-of-line into a single Newline character. Return the length of
- produced text.
- This is required because the rest of code is too entrenched in having
- a single newline at each EOL; in particular, searching for various
- Info headers and cookies can become extremely tricky if that assumption
- breaks.
-
- FIXME: this could also support Mac-style text files with a single CR
- at the EOL, but what about random CR characters in non-Mac files? Can
- we afford converting them into newlines as well? Maybe implement some
- heuristics here, like in Emacs 20.
-
- FIXME: is it a good idea to show the EOL type on the modeline? */
-static long
-convert_eols (char *text, long int textlen)
-{
- register char *s = text;
- register char *d = text;
-
- while (textlen--)
- {
- if (*s == '\r' && textlen && s[1] == '\n')
- {
- s++;
- textlen--;
- }
- *d++ = *s++;
- }
-
- return d - text;
-}
-#endif
-
/* Read the contents of PATHNAME, returning a buffer with the contents of
that file in it, and returning the size of that buffer in FILESIZE.
If the file turns out to be compressed, set IS_COMPRESSED to non-zero.
@@ -355,19 +319,6 @@
close (descriptor);
}
- /* EOL conversion is disabled because it makes the tag table for a file
- incorrect. See the test in info/t/cr-tag-table.sh. */
-#if 0
- /* Convert any DOS-style CRLF EOLs into Unix-style NL.
- Seems like a good idea to have even on Unix, in case the Info
- files are coming from some Windows system across a network. */
- fsize = convert_eols (contents, fsize);
-
- /* EOL conversion can shrink the text quite a bit. We don't
- want to waste storage. */
- contents = xrealloc (contents, 1 + fsize);
- contents[fsize] = '\0';
-#endif
*filesize = fsize;
return contents;
Modified: trunk/info/nodes.c
===================================================================
--- trunk/info/nodes.c 2014-12-27 22:23:46 UTC (rev 5998)
+++ trunk/info/nodes.c 2014-12-29 14:21:24 UTC (rev 5999)
@@ -866,7 +866,6 @@
char **filename, char **nodename,
char *filename_in, char *nodename_in);
static void node_set_body_start (NODE *node);
-static int adjust_nodestart (FILE_BUFFER *file_buffer, NODE *tag);
/* Return a pointer to a newly allocated NODE structure, with
fields filled in. */
@@ -1082,6 +1081,35 @@
return node;
}
+
+/* Convert any CRLF pairs in the SOURCE file and place the converted buffer in
+ DESTINATION. DESTINATION->contents must be allocated on the heap and at
+ least as big as SOURCE->contents, including a terminating null.
DESTINATION
+ is allowed to be the same as SOURCE to convert in place. */
+void
+convert_eols (FILE_BUFFER *destination, FILE_BUFFER *source)
+{
+ register char *d = destination->contents;
+ register char *s = source->contents;
+
+ long textlen = source->filesize;
+ while (textlen--)
+ {
+ if (*s == '\r' && textlen && s[1] == '\n')
+ {
+ s++;
+ textlen--;
+ }
+ *d++ = *s++;
+ }
+ *d = '\0';
+
+ /* EOL conversion can shrink the text quite a bit. We don't
+ want to waste storage. */
+ destination->contents = xrealloc (destination->contents,
+ d - destination->contents + 1);
+}
+
/* Magic number that RMS used to decide how much a tags table pointer could
be off by. I feel that it should be much smaller, like 4. */
#define DEFAULT_INFO_FUDGE 1000
@@ -1093,7 +1121,7 @@
Set NODE->nodestart directly on the separator that precedes this node.
If the node could not be found, return 0. */
static int
-adjust_nodestart (FILE_BUFFER *fb, NODE *node)
+adjust_nodestart (FILE_BUFFER *fb, NODE *node, int slack)
{
long position = -1;
SEARCH_BINDING s;
@@ -1112,15 +1140,15 @@
position = find_node_in_binding (node->nodename, &s);
}
- if (position == -1)
+ if (position == -1)
{
if (strict_node_location_p)
return 0;
/* Oh well, I guess we have to try to find it in a larger area. */
- s.start -= DEFAULT_INFO_FUDGE;
- s.end += DEFAULT_INFO_FUDGE;
+ s.start -= slack;
+ s.end += slack;
if (s.start < 0)
s.start = 0;
@@ -1132,17 +1160,101 @@
/* If the node still couldn't be found, we lose big. */
if (position == -1)
return 0;
-
+
/* Set the flag in NODE->flags to say that the the tags table could
need updating (if we used a tag to get here, that is). */
if (node->flags & N_HasTagsTable)
node->flags |= N_UpdateTags;
}
+ /* Do we want this? */
+ /* TODO: Use TAG again to store the tags, and add an extra field to store
+ the original values. */
node->nodestart = s.buffer + position - fb->contents;
return 1;
}
+/* Look in the contents of *FB_PTR for a node referred to with TAG.
+
+ If we have to update the contents of the file, *PARENT and *FB_PTR can be
+ changed to a different FILE_BUFFER. */
+static int
+find_node_from_tag (FILE_BUFFER **parent, FILE_BUFFER **fb_ptr, NODE *tag)
+{
+ int success;
+
+ FILE_BUFFER *fb = *fb_ptr;
+ int file_already_used = 1;
+ FILE_BUFFER *dest_fb;
+ int slack;
+
+ /* Start off with a small fudge to reduce chance of finding a node and then
+ later having to convert the EOL's, leaving us with the question of what to
+ do with the existing buffer and the nodes that refer to it. */
+ if (!(fb->flags & N_EOLs_Converted))
+ slack = DEFAULT_INFO_FUDGE;
+ else
+ slack = DEFAULT_INFO_FUDGE;
+
+ success = adjust_nodestart (fb, tag, slack);
+ if (success)
+ return success;
+
+ if (fb->flags & N_EOLs_Converted || strict_node_location_p)
+ return 0;
+
+ /* Convert EOL's. If the Info file was produced under MS-Windows with
+ some versions of makeinfo, it's possible that it has CR-LF line endings
+ with the CR bytes not counted in the tag table. */
+
+ /* TODO: Check if there are already nodes in windows from this file. If
+ not, we can convert the buffer in place. */
+
+ if (file_already_used)
+ {
+ FILE_BUFFER *new_fb = xmalloc (sizeof (FILE_BUFFER));
+
+ memcpy (new_fb, fb, sizeof (FILE_BUFFER));
+ new_fb->contents = xmalloc (fb->filesize + 1);
+
+ /* TODO: Copy and restore tags table. */
+
+ add_pointer_to_array (new_fb, info_loaded_files_index,
+ info_loaded_files, info_loaded_files_slots, 10);
+
+ dest_fb = new_fb;
+ }
+
+ convert_eols (dest_fb, fb);
+ dest_fb->flags |= N_EOLs_Converted;
+
+ success = adjust_nodestart (dest_fb, tag, DEFAULT_INFO_FUDGE);
+ if (success)
+ {
+ /* Stop the old record being used again. */
+ if (dest_fb != fb)
+ {
+ fb->fullpath = "";
+ fb->filename = "";
+
+ /* TODO: Could we also try to convert nodes referring to the old
+ buffer, to save space? */
+ }
+
+ /* If file is split, leave PARENT as it is, otherwise update both FB_PTR
+ and PARENT to the new file. */
+ if (*parent == *fb_ptr)
+ *parent = dest_fb;
+ *fb_ptr = dest_fb;
+ return success;
+ }
+ else
+ /* Throw the converted buffer away? Or keep it to stop us ever having
+ to do the conversion step again? */
+
+ return 0;
+}
+
/* Calculate the length of the node. */
static void
set_tag_nodelen (FILE_BUFFER *subfile, NODE *tag)
@@ -1164,13 +1276,20 @@
{
NODE *tag = *tag_ptr;
NODE *node;
- /* If not a split file, subfile == fb */
- FILE_BUFFER *subfile;
+ int is_anchor;
+ NODE *anchor_tag;
+ int node_pos, anchor_pos;
+
+ FILE_BUFFER *parent; /* File containing tag table. */
+ FILE_BUFFER *subfile; /* File containing node. */
if (!tag->subfile)
- subfile = fb;
+ parent = subfile = fb;
else
- subfile = info_find_subfile (tag->subfile);
+ {
+ parent = fb;
+ subfile = info_find_subfile (tag->subfile);
+ }
if (!subfile)
return NULL;
@@ -1189,41 +1308,10 @@
node = 0;
- if (tag->nodelen != 0) /* If not an anchor. */
+ is_anchor = tag->nodelen == 0;
+
+ if (is_anchor)
{
- /* If TAG->nodelen hasn't been calculated yet, then we aren't
- in a position to trust the entry pointer. Adjust things so
- that TAG->nodestart gets the exact address of the start of
- the node separator which starts this node. If we cannot
- do that, the node isn't really here. */
- if (tag->nodelen == -1)
- {
- if (!adjust_nodestart (subfile, tag))
- return NULL; /* Node not found. */
-
- set_tag_nodelen (subfile, tag);
- }
-
- tag->contents = subfile->contents + tag->nodestart;
- tag->contents += skip_node_separator (tag->contents);
- node_set_body_start (tag);
-
- /* Read locations of references in node and similar. Strip Info file
- syntax from node if preprocess_nodes=On. Adjust the offsets of
- anchors that occur within the node.*/
- node = scan_node_contents (fb, tag_ptr);
- node->nodename = xstrdup (node->nodename);
-
- /* We can't set this when tag table is built, because
- if file is split, we don't know which of the sub-files
- are compressed. */
- if (subfile->flags & N_IsCompressed)
- node->flags |= N_IsCompressed;
- }
- else /* anchor, return containing node */
- {
- int anchor_pos, node_pos;
-
anchor_pos = tag_ptr - fb->tags;
/* Look backwards in the tag table for the node preceding
@@ -1239,25 +1327,56 @@
if (node_pos < 0)
return NULL;
- /* Get the actual node from the tag. This is a recursive call, but
- it can't recurse again, because we call it with a real node. */
- node = info_node_of_tag (fb, &fb->tags[node_pos]);
+ anchor_tag = tag;
+ tag = fb->tags[node_pos];
+ tag_ptr = &fb->tags[node_pos];
+ }
- if (node)
- {
- /* Start displaying the node at the anchor position. */
- node->display_pos = tag->nodestart
- - (node->nodestart
- + skip_node_separator (subfile->contents
- + fb->tags[node_pos]->nodestart));
+ /* Get the node. */
- /* Otherwise an anchor at the end of a node ends up displaying at
- the end of the last line of the node (way over on the right of
- the screen), which looks wrong. */
- if (node->display_pos >= (unsigned long) node->nodelen)
- node->display_pos = node->nodelen - 1;
- }
+ /* If TAG->nodelen hasn't been calculated yet, then we aren't
+ in a position to trust the entry pointer. Adjust things so
+ that TAG->nodestart gets the exact address of the start of
+ the node separator which starts this node. If we cannot
+ do that, the node isn't really here. */
+ if (tag->nodelen == -1)
+ {
+ if (!find_node_from_tag (&parent, &subfile, tag))
+ return NULL; /* Node not found. */
+
+ set_tag_nodelen (subfile, tag);
}
+ tag->contents = subfile->contents + tag->nodestart;
+ tag->contents += skip_node_separator (tag->contents);
+ node_set_body_start (tag);
+
+ /* Read locations of references in node and similar. Strip Info file
+ syntax from node if preprocess_nodes=On. Adjust the offsets of
+ anchors that occur within the node.*/
+ node = scan_node_contents (parent, tag_ptr);
+ node->nodename = xstrdup (node->nodename);
+
+ /* We can't set this when tag table is built, because
+ if file is split, we don't know which of the sub-files
+ are compressed. */
+ if (subfile->flags & N_IsCompressed)
+ node->flags |= N_IsCompressed;
+
+ if (is_anchor)
+ {
+ /* Start displaying the node at the anchor position. */
+
+ node->display_pos = anchor_tag->nodestart
+ - (node->nodestart
+ + skip_node_separator (subfile->contents + tag->nodestart));
+
+ /* Otherwise an anchor at the end of a node ends up displaying at
+ the end of the last line of the node (way over on the right of
+ the screen), which looks wrong. */
+ if (node->display_pos >= (unsigned long) node->nodelen)
+ node->display_pos = node->nodelen - 1;
+ }
+
return node;
}
Modified: trunk/info/nodes.h
===================================================================
--- trunk/info/nodes.h 2014-12-27 22:23:46 UTC (rev 5998)
+++ trunk/info/nodes.h 2014-12-29 14:21:24 UTC (rev 5999)
@@ -70,6 +70,7 @@
#define N_IsIndex 0x200 /* An index node. */
#define N_IsDir 0x400 /* A dir node. */
#define N_Subfile 0x800 /* File buffer is a subfile of a split file. */
+#define N_EOLs_Converted 0x1000 /* CR bytes were stripped before LF. */
/* String constants. */
#define INFO_FILE_LABEL "File:"
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [5999] better support for files produced with CR-LF line endings,
Gavin D. Smith <=