[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: (aset UNIBYTE-STRING MULTIBYTE-CHAR)
From: |
Stefan Monnier |
Subject: |
Re: (aset UNIBYTE-STRING MULTIBYTE-CHAR) |
Date: |
Sat, 16 Feb 2008 09:33:24 -0500 |
User-agent: |
Gnus/5.13 (Gnus v5.13) Emacs/23.0.60 (gnu/linux) |
> If we are going to inhibit aset on multibyte strings, I think we
> should inhibit aset on any strings to avoid a further confusion.
> I think someone should try making it work.
> The way I suggested should not be terribly hard.
The problem is the following: while it can be made to work, it will be
inefficient. If we just make it work, the callers will never get to
know that they're doing things in a terribly inefficient way. The real
fix is to change the caller.
BTW, I suggest the patch below to fix one such caller.
Stefan
--- orig/src/casefiddle.c
+++ mod/src/casefiddle.c
@@ -75,23 +76,18 @@
return obj;
}
- if (STRINGP (obj))
+ if (!STRINGP (obj))
+ wrong_type_argument (Qchar_or_string_p, obj);
+ else if (STRING_UNIBYTE (obj))
{
- int multibyte = STRING_MULTIBYTE (obj);
- int i, i_byte, len;
- int size = SCHARS (obj);
+ EMACS_INT i;
+ EMACS_INT size = SCHARS (obj);
obj = Fcopy_sequence (obj);
- for (i = i_byte = 0; i < size; i++, i_byte += len)
+ for (i = 0; i < size; i++)
{
- if (multibyte)
- c = STRING_CHAR_AND_LENGTH (SDATA (obj) + i_byte, 0, len);
- else
- {
- c = SREF (obj, i_byte);
- len = 1;
- MAKE_CHAR_MULTIBYTE (c);
- }
+ c = SREF (obj, i);
+ MAKE_CHAR_MULTIBYTE (c);
c1 = c;
if (inword && flag != CASE_CAPITALIZE_UP)
c = DOWNCASE (c);
@@ -102,24 +98,51 @@
inword = (SYNTAX (c) == Sword);
if (c != c1)
{
- if (! multibyte)
- {
- MAKE_CHAR_UNIBYTE (c);
- SSET (obj, i_byte, c);
- }
- else if (ASCII_CHAR_P (c1) && ASCII_CHAR_P (c))
- SSET (obj, i_byte, c);
- else
- {
- Faset (obj, make_number (i), make_number (c));
- i_byte += CHAR_BYTES (c) - len;
- }
+ MAKE_CHAR_UNIBYTE (c);
+ if (c < 0 || c > 255)
+ error ("Non-unibyte char in unibyte string");
+ SSET (obj, i, c);
}
}
return obj;
}
+ else
+ {
+ EMACS_INT i, i_byte, len;
+ EMACS_INT size = SCHARS (obj);
+ USE_SAFE_ALLOCA;
+ unsigned char *dst, *o;
+ /* Over-allocate by 12%: this is a minor overhead, but should be
+ sufficient in 99.999% of the cases to avoid a reallocation. */
+ EMACS_INT o_size = SBYTES (obj) + SBYTES (obj) / 8 +
MAX_MULTIBYTE_LENGTH;
+ SAFE_ALLOCA (dst, void *, o_size);
+ o = dst;
- wrong_type_argument (Qchar_or_string_p, obj);
+ for (i = i_byte = 0; i < size; i++, i_byte += len)
+ {
+ if ((o - dst) + MAX_MULTIBYTE_LENGTH > o_size)
+ { /* Not enough space for the next char: grow the destination. */
+ unsigned char *old_dst = dst;
+ o_size += o_size; /* Probably overkill, but extremely rare. */
+ SAFE_ALLOCA (dst, void *, o_size);
+ bcopy (old_dst, dst, o - old_dst);
+ o = dst + (o - old_dst);
+ }
+ c = STRING_CHAR_AND_LENGTH (SDATA (obj) + i_byte, 0, len);
+ if (inword && flag != CASE_CAPITALIZE_UP)
+ c = DOWNCASE (c);
+ else if (!UPPERCASEP (c)
+ && (!inword || flag != CASE_CAPITALIZE_UP))
+ c = UPCASE1 (c);
+ if ((int) flag >= (int) CASE_CAPITALIZE)
+ inword = (SYNTAX (c) == Sword);
+ o += CHAR_STRING (c, o);
+ }
+ eassert (o - dst <= o_size);
+ obj = make_multibyte_string (dst, size, o - dst);
+ SAFE_FREE ();
+ return obj;
+ }
}
DEFUN ("upcase", Fupcase, Supcase, 1, 1, 0,
@@ -329,10 +352,10 @@
return Qnil;
}
-Lisp_Object
+static Lisp_Object
operate_on_word (arg, newpoint)
Lisp_Object arg;
- int *newpoint;
+ EMACS_INT *newpoint;
{
Lisp_Object val;
int farend;
@@ -358,7 +381,7 @@
Lisp_Object arg;
{
Lisp_Object beg, end;
- int newpoint;
+ EMACS_INT newpoint;
XSETFASTINT (beg, PT);
end = operate_on_word (arg, &newpoint);
casify_region (CASE_UP, beg, end);
@@ -373,7 +396,7 @@
Lisp_Object arg;
{
Lisp_Object beg, end;
- int newpoint;
+ EMACS_INT newpoint;
XSETFASTINT (beg, PT);
end = operate_on_word (arg, &newpoint);
casify_region (CASE_DOWN, beg, end);
@@ -390,7 +413,7 @@
Lisp_Object arg;
{
Lisp_Object beg, end;
- int newpoint;
+ EMACS_INT newpoint;
XSETFASTINT (beg, PT);
end = operate_on_word (arg, &newpoint);
casify_region (CASE_CAPITALIZE, beg, end);
- Re: (aset UNIBYTE-STRING MULTIBYTE-CHAR), (continued)
- Re: (aset UNIBYTE-STRING MULTIBYTE-CHAR), Stephen J. Turnbull, 2008/02/13
- Re: (aset UNIBYTE-STRING MULTIBYTE-CHAR), Richard Stallman, 2008/02/13
- Re: (aset UNIBYTE-STRING MULTIBYTE-CHAR), Kenichi Handa, 2008/02/14
- Re: (aset UNIBYTE-STRING MULTIBYTE-CHAR), Stefan Monnier, 2008/02/14
- Re: (aset UNIBYTE-STRING MULTIBYTE-CHAR), Eli Zaretskii, 2008/02/15
- Re: (aset UNIBYTE-STRING MULTIBYTE-CHAR), Miles Bader, 2008/02/15
- Re: (aset UNIBYTE-STRING MULTIBYTE-CHAR), Eli Zaretskii, 2008/02/16
- Re: (aset UNIBYTE-STRING MULTIBYTE-CHAR), Richard Stallman, 2008/02/16
- Re: (aset UNIBYTE-STRING MULTIBYTE-CHAR),
Stefan Monnier <=
- Re: (aset UNIBYTE-STRING MULTIBYTE-CHAR), Richard Stallman, 2008/02/17
- Re: (aset UNIBYTE-STRING MULTIBYTE-CHAR), Stefan Monnier, 2008/02/17
- Re: (aset UNIBYTE-STRING MULTIBYTE-CHAR), Kenichi Handa, 2008/02/17
- Re: (aset UNIBYTE-STRING MULTIBYTE-CHAR), Richard Stallman, 2008/02/18
Re: (aset UNIBYTE-STRING MULTIBYTE-CHAR), Richard Stallman, 2008/02/13