« Return to Thread: Proposal: immediate strings

Re: Proposal: immediate strings

by Dmitry Antipov-3 :: Rate this Message:

| View in Thread

On 05/29/2012 11:38 AM, Paul Eggert wrote:

> On 05/28/2012 11:55 PM, Dmitry Antipov wrote:
>> To avoid using 'packed' attribute and speedup an inefficient access to u.dat.size
>> field, it's possible to use 'ptrdiff_t X : BITS_PER_PTRDIFF_T - 2'
>> bitfields - it's still wide enough to hold the values up to STRING_BYTES_BOUND.
>
> That's not true on 32-bit hosts configured --with-wide-int.
> On such hosts, a string can contain up to PTRDIFF_MAX - 1 bytes.
> This proposal would lower the limit to PTRDIFF_MAX / 4 bytes.
> In contrast, the proposal in
> <http://lists.gnu.org/archive/html/emacs-devel/2012-05/msg00460.html>
> would merely lower it to PTRDIFF_MAX - 65536 bytes.
OK, this allows

min (MOST_POSITIVE_FIXNUM, (ptrdiff_t) min (SIZE_MAX, PTRDIFF_MAX - 1) - 1)

in all configurations (so PTRDIFF_MAX - 2 for --with-wide-int), eventually
uses _all_ bits by eliminating signed values.

Dmitry

=== modified file 'src/alloc.c'
--- src/alloc.c 2012-05-25 18:19:24 +0000
+++ src/alloc.c 2012-05-29 13:28:14 +0000
@@ -147,20 +147,14 @@
 /* Mark, unmark, query mark bit of a Lisp string.  S must be a pointer
    to a struct Lisp_String.  */
 
-#define MARK_STRING(S) ((S)->size |= ARRAY_MARK_FLAG)
-#define UNMARK_STRING(S) ((S)->size &= ~ARRAY_MARK_FLAG)
-#define STRING_MARKED_P(S) (((S)->size & ARRAY_MARK_FLAG) != 0)
+#define MARK_STRING(S) ((S)->u.imm.gcmarkbit = 1)
+#define UNMARK_STRING(S) ((S)->u.imm.gcmarkbit = 0)
+#define STRING_MARKED_P(S) ((S)->u.imm.gcmarkbit)
 
 #define VECTOR_MARK(V) ((V)->header.size |= ARRAY_MARK_FLAG)
 #define VECTOR_UNMARK(V) ((V)->header.size &= ~ARRAY_MARK_FLAG)
 #define VECTOR_MARKED_P(V) (((V)->header.size & ARRAY_MARK_FLAG) != 0)
 
-/* Value is the number of bytes of S, a pointer to a struct Lisp_String.
-   Be careful during GC, because S->size contains the mark bit for
-   strings.  */
-
-#define GC_STRING_BYTES(S) (STRING_BYTES (S))
-
 /* Global variables.  */
 struct emacs_globals globals;
 
@@ -392,6 +386,7 @@
 static void mark_stack (void);
 static int live_vector_p (struct mem_node *, void *);
 static int live_buffer_p (struct mem_node *, void *);
+static int live_string_data_p (struct Lisp_String *);
 static int live_string_p (struct mem_node *, void *);
 static int live_cons_p (struct mem_node *, void *);
 static int live_symbol_p (struct mem_node *, void *);
@@ -1761,7 +1756,8 @@
    a pointer to the `u.data' member of its sdata structure; the
    structure starts at a constant offset in front of that.  */
 
-#define SDATA_OF_STRING(S) ((struct sdata *) ((S)->data - SDATA_DATA_OFFSET))
+#define SDATA_OF_STRING(S) ((S)->u.imm.immbit ? (struct sdata *) NULL \
+  : ((struct sdata *) ((S)->u.dat.data - SDATA_DATA_OFFSET)))
 
 
 #ifdef GC_CHECK_STRING_OVERRUN
@@ -1843,21 +1839,30 @@
 
 static int check_string_bytes_count;
 
-#define CHECK_STRING_BYTES(S) STRING_BYTES (S)
-
-
-/* Like GC_STRING_BYTES, but with debugging check.  */
+#define CHECK_STRING_BYTES(S) string_bytes (S)
 
 ptrdiff_t
 string_bytes (struct Lisp_String *s)
 {
-  ptrdiff_t nbytes =
-    (s->size_byte < 0 ? s->size & ~ARRAY_MARK_FLAG : s->size_byte);
-
-  if (!PURE_POINTER_P (s)
-      && s->data
-      && nbytes != SDATA_NBYTES (SDATA_OF_STRING (s)))
-    abort ();
+  ptrdiff_t nbytes;
+
+  if (s->u.imm.immbit)
+    {
+      nbytes = s->u.imm.size_byte == STRING_UNIBYTE_IMM_MAX ?
+ s->u.imm.size : s->u.imm.size_byte;
+      eassert (nbytes < STRING_IMM_SIZE);
+    }
+  else
+    {
+      nbytes = s->u.dat.size_byte == STRING_UNIBYTE_DAT_MAX ?
+ s->u.dat.size : s->u.dat.size_byte;
+      eassert (nbytes >= STRING_IMM_SIZE);
+      if (!PURE_POINTER_P (s) && s->u.dat.data &&
+  nbytes != SDATA_NBYTES (SDATA_OF_STRING (s)))
+ /* Normal non-pure string with size mismatch.  */
+ abort ();
+    }
+
   return nbytes;
 }
 
@@ -1882,7 +1887,7 @@
  CHECK_STRING_BYTES (from->string);
 
       if (from->string)
- nbytes = GC_STRING_BYTES (from->string);
+ nbytes = string_bytes (from->string);
       else
  nbytes = SDATA_NBYTES (from);
 
@@ -2028,8 +2033,8 @@
   /* Determine the number of bytes needed to store NBYTES bytes
      of string data.  */
   needed = SDATA_SIZE (nbytes);
-  old_data = s->data ? SDATA_OF_STRING (s) : NULL;
-  old_nbytes = GC_STRING_BYTES (s);
+  old_data = s->u.dat.data ? SDATA_OF_STRING (s) : NULL;
+  old_nbytes = string_bytes (s);
 
   MALLOC_BLOCK_INPUT;
 
@@ -2088,13 +2093,11 @@
   MALLOC_UNBLOCK_INPUT;
 
   data->string = s;
-  s->data = SDATA_DATA (data);
+  s->u.dat.data = SDATA_DATA (data);
 #ifdef GC_CHECK_STRING_BYTES
   SDATA_NBYTES (data) = nbytes;
 #endif
-  s->size = nchars;
-  s->size_byte = nbytes;
-  s->data[nbytes] = '\0';
+  s->u.dat.data[nbytes] = '\0';
 #ifdef GC_CHECK_STRING_OVERRUN
   memcpy ((char *) data + needed, string_overrun_cookie,
   GC_STRING_OVERRUN_COOKIE_SIZE);
@@ -2112,6 +2115,12 @@
   consing_since_gc += needed;
 }
 
+#ifdef GC_STRING_STATS
+
+static EMACS_INT total_imm_strings, total_dat_strings, total_imm_intervals;
+static EMACS_INT total_imm_bytes, total_dat_bytes, total_dat_intervals;
+
+#endif
 
 /* Sweep and compact strings.  */
 
@@ -2125,6 +2134,12 @@
   total_strings = total_free_strings = 0;
   total_string_size = 0;
 
+#ifdef GC_STRING_STATS
+  total_imm_strings = total_dat_strings = 0;
+  total_imm_bytes = total_dat_bytes = 0;
+  total_imm_intervals = total_dat_intervals = 0;
+#endif
+
   /* Scan strings_blocks, free Lisp_Strings that aren't marked.  */
   for (b = string_blocks; b; b = next)
     {
@@ -2137,49 +2152,64 @@
  {
   struct Lisp_String *s = b->strings + i;
 
-  if (s->data)
+  if (STRING_MARKED_P (s))
+    {      
+      /* String is live; unmark it and its intervals.  */
+      UNMARK_STRING (s);
+
+      if (!NULL_INTERVAL_P (s->intervals))
+ UNMARK_BALANCE_INTERVALS (s->intervals);
+
+      ++total_strings;
+      total_string_size += string_bytes (s);
+#ifdef GC_STRING_STATS
+      if (s->u.imm.immbit)
+ {
+  total_imm_strings++;
+  total_imm_bytes += string_bytes (s);
+  if (!NULL_INTERVAL_P (s->intervals))
+    total_imm_intervals++;
+ }
+      else
+ {
+  total_dat_strings++;
+  total_dat_bytes += string_bytes (s);
+  if (!NULL_INTERVAL_P (s->intervals))
+    total_dat_intervals++;
+ }
+#endif /* GC_STRING_STATS */
+    }
+  else
     {
-      /* String was not on free-list before.  */
-      if (STRING_MARKED_P (s))
- {
-  /* String is live; unmark it and its intervals.  */
-  UNMARK_STRING (s);
-
-  if (!NULL_INTERVAL_P (s->intervals))
-    UNMARK_BALANCE_INTERVALS (s->intervals);
-
-  ++total_strings;
-  total_string_size += STRING_BYTES (s);
- }
+      if (s->u.imm.immbit)
+ /* Fill data with special pattern. Used by
+   GC to find dead immediate strings.  */
+ memset (s->u.imm.data, 0xff, STRING_IMM_SIZE);
       else
  {
-  /* String is dead.  Put it on the free-list.  */
-  struct sdata *data = SDATA_OF_STRING (s);
+  if (s->u.dat.data)
+    {
+      /* String is dead.  Put it on the free-list.  */
+      struct sdata *data = SDATA_OF_STRING (s);
 
-  /* Save the size of S in its sdata so that we know
-     how large that is.  Reset the sdata's string
-     back-pointer so that we know it's free.  */
+      /* Save the size of S in its sdata so that we know
+ how large that is.  Reset the sdata's string
+ back-pointer so that we know it's free.  */
 #ifdef GC_CHECK_STRING_BYTES
-  if (GC_STRING_BYTES (s) != SDATA_NBYTES (data))
-    abort ();
+      if (string_bytes (s) != SDATA_NBYTES (data))
+ abort ();
 #else
-  data->u.nbytes = GC_STRING_BYTES (s);
+      data->u.nbytes = string_bytes (s);
 #endif
-  data->string = NULL;
-
-  /* Reset the strings's `data' member so that we
-     know it's free.  */
-  s->data = NULL;
-
-  /* Put the string on the free-list.  */
-  NEXT_FREE_LISP_STRING (s) = string_free_list;
-  string_free_list = s;
-  ++nfree;
+      data->string = NULL;
+
+      /* Reset the strings's `data' member so that we
+ know it's free.  */
+      s->u.dat.data = NULL;
+    }
  }
-    }
-  else
-    {
-      /* S was on the free-list before.  Put it there again.  */
+
+      /* Put the string on the free-list.  */
       NEXT_FREE_LISP_STRING (s) = string_free_list;
       string_free_list = s;
       ++nfree;
@@ -2271,12 +2301,12 @@
   /* Check that the string size recorded in the string is the
      same as the one recorded in the sdata structure. */
   if (from->string
-      && GC_STRING_BYTES (from->string) != SDATA_NBYTES (from))
+      && string_bytes (from->string) != SDATA_NBYTES (from))
     abort ();
 #endif /* GC_CHECK_STRING_BYTES */
 
   if (from->string)
-    nbytes = GC_STRING_BYTES (from->string);
+    nbytes = string_bytes (from->string);
   else
     nbytes = SDATA_NBYTES (from);
 
@@ -2312,7 +2342,7 @@
  {
   xassert (tb != b || to < from);
   memmove (to, from, nbytes + GC_STRING_EXTRA);
-  to->string->data = SDATA_DATA (to);
+  to->string->u.dat.data = SDATA_DATA (to);
  }
 
       /* Advance past the sdata we copied to.  */
@@ -2562,13 +2592,24 @@
     return empty_multibyte_string;
 
   s = allocate_string ();
-  allocate_string_data (s, nchars, nbytes);
+  if (nbytes < STRING_IMM_SIZE)
+    {
+      s->u.imm.immbit = 1;
+      s->u.imm.size = nchars;
+      s->u.imm.size_byte = nbytes;
+    }
+  else
+    {
+      s->u.imm.immbit = 0;
+      s->u.dat.size = nchars;
+      s->u.dat.size_byte = nbytes;
+      allocate_string_data (s, nchars, nbytes);
+    }
   XSETSTRING (string, s);
   string_chars_consed += nbytes;
   return string;
 }
 
-
 
 /***********************************************************************
    Float Allocation
@@ -3937,6 +3978,22 @@
   x->color = MEM_BLACK;
 }
 
+/* Non-zero if data of S is valid.  */
+
+static inline int
+live_string_data_p (struct Lisp_String *s)
+{
+  if (s->u.imm.immbit)
+    {
+      unsigned char *p;
+
+      for (p = s->u.imm.data; p < s->u.imm.data + STRING_IMM_SIZE; p++)
+ if (*p != 0xff)
+  return 1;
+      return 0;
+    }
+  return s->u.dat.data != NULL;
+}
 
 /* Value is non-zero if P is a pointer to a live Lisp string on
    the heap.  M is a pointer to the mem_block for P.  */
@@ -3954,7 +4011,7 @@
       return (offset >= 0
       && offset % sizeof b->strings[0] == 0
       && offset < (STRING_BLOCK_SIZE * sizeof b->strings[0])
-      && ((struct Lisp_String *) p)->data != NULL);
+      && live_string_data_p ((struct Lisp_String *) p));
     }
   else
     return 0;
@@ -4869,15 +4926,29 @@
   struct Lisp_String *s;
 
   s = (struct Lisp_String *) pure_alloc (sizeof *s, Lisp_String);
-  s->data = (unsigned char *) find_string_data_in_pure (data, nbytes);
-  if (s->data == NULL)
-    {
-      s->data = (unsigned char *) pure_alloc (nbytes + 1, -1);
-      memcpy (s->data, data, nbytes);
-      s->data[nbytes] = '\0';
-    }
-  s->size = nchars;
-  s->size_byte = multibyte ? nbytes : -1;
+
+  if (nbytes < STRING_IMM_SIZE)
+    {
+      memcpy (s->u.imm.data, data, nbytes);
+      s->u.imm.data[nbytes] = '\0';
+      s->u.imm.immbit = 1;
+      s->u.imm.size = nchars;
+      s->u.imm.size_byte = multibyte ? nbytes : STRING_UNIBYTE_IMM_MAX;
+    }
+  else
+    {
+      s->u.dat.data = (unsigned char *) find_string_data_in_pure (data, nbytes);
+      if (s->u.dat.data == NULL)
+ {
+  s->u.dat.data = (unsigned char *) pure_alloc (nbytes + 1, -1);
+  memcpy (s->u.dat.data, data, nbytes);
+  s->u.dat.data[nbytes] = '\0';
+ }
+      s->u.imm.immbit = 0;
+      s->u.dat.size = nchars;
+      s->u.dat.size_byte = multibyte ? nbytes : STRING_UNIBYTE_DAT_MAX;
+    }
+
   s->intervals = NULL_INTERVAL;
   XSETSTRING (string, s);
   return string;
@@ -4894,9 +4965,23 @@
   ptrdiff_t nchars = strlen (data);
 
   s = (struct Lisp_String *) pure_alloc (sizeof *s, Lisp_String);
-  s->size = nchars;
-  s->size_byte = -1;
-  s->data = (unsigned char *) data;
+
+  if (nchars < STRING_IMM_SIZE)
+    {
+      memcpy (s->u.imm.data, data, nchars);
+      s->u.imm.data[nchars] = '\0';
+      s->u.imm.immbit = 1;
+      s->u.imm.size = nchars;
+      s->u.imm.size_byte = STRING_UNIBYTE_IMM_MAX;
+    }
+  else
+    {
+      s->u.dat.data = (unsigned char *) data;
+      s->u.imm.immbit = 0;
+      s->u.dat.size = nchars;
+      s->u.dat.size_byte = STRING_UNIBYTE_DAT_MAX;
+    }
+
   s->intervals = NULL_INTERVAL;
   XSETSTRING (string, s);
   return string;
@@ -6319,6 +6404,34 @@
   return Flist (8, consed);
 }
 
+#ifdef GC_STRING_STATS
+
+DEFUN ("string-stats", Fstring_stats, Sstring_stats, 0, 0, 0,
+       doc: /* Return a list of counters that measures how much
+strings of a particular internal structure are alive after last
+garbage collection, and how many bytes are in them.
+The elements of the value are are as follows:
+  (IMM-STRINGS IMM-BYTES IMM-INTERVALS DAT-STRINGS DAT-BYTES DAT-INTERVALS)
+where IMM-STRINGS is the number of immediate strings, IMM-BYTES is the total
+number of bytes in them, and IMM-INTERVALS is the number of immediate string
+with non-nil text properties. The rest three numbers has the same meaning
+for normal strings, respectively.  */)
+  (void)
+{
+  Lisp_Object data[6];
+
+  data[0] = make_number (min (MOST_POSITIVE_FIXNUM, total_imm_strings));
+  data[1] = make_number (min (MOST_POSITIVE_FIXNUM, total_imm_bytes));
+  data[2] = make_number (min (MOST_POSITIVE_FIXNUM, total_imm_intervals));
+  data[3] = make_number (min (MOST_POSITIVE_FIXNUM, total_dat_strings));
+  data[4] = make_number (min (MOST_POSITIVE_FIXNUM, total_dat_bytes));
+  data[5] = make_number (min (MOST_POSITIVE_FIXNUM, total_dat_intervals));
+
+  return Flist (6, data);
+}
+
+#endif /* GC_STRING_STATS */
+
 /* Find at most FIND_MAX symbols which have OBJ as their value or
    function.  This is used in gdbinit's `xwhichsymbols' command.  */
 
@@ -6547,7 +6660,9 @@
   defsubr (&Sgarbage_collect);
   defsubr (&Smemory_limit);
   defsubr (&Smemory_use_counts);
-
+#ifdef GC_STRING_STATS
+  defsubr (&Sstring_stats);
+#endif
 #if GC_MARK_STACK == GC_USE_GCPROS_CHECK_ZOMBIES
   defsubr (&Sgc_status);
 #endif

=== modified file 'src/fns.c'
--- src/fns.c 2012-05-25 18:19:24 +0000
+++ src/fns.c 2012-05-29 10:00:53 +0000
@@ -2166,8 +2166,8 @@
   int len = CHAR_STRING (charval, str);
   ptrdiff_t size_byte = SBYTES (array);
 
-  if (INT_MULTIPLY_OVERFLOW (SCHARS (array), len)
-      || SCHARS (array) * len != size_byte)
+  if (INT_MULTIPLY_OVERFLOW (size, len)
+      || size * len != size_byte)
     error ("Attempt to change byte length of a string");
   for (idx = 0; idx < size_byte; idx++)
     *p++ = str[idx % len];

=== modified file 'src/lisp.h'
--- src/lisp.h 2012-05-27 07:51:09 +0000
+++ src/lisp.h 2012-05-29 13:27:57 +0000
@@ -743,17 +743,23 @@
 
 /* Convenience macros for dealing with Lisp strings.  */
 
-#define SDATA(string) (XSTRING (string)->data + 0)
+#define SDATA(string) (XSTRING (string)->u.imm.immbit ? \
+ (XSTRING (string)->u.imm.data) : \
+ (XSTRING (string)->u.dat.data))
 #define SREF(string, index) (SDATA (string)[index] + 0)
 #define SSET(string, index, new) (SDATA (string)[index] = (new))
-#define SCHARS(string) (XSTRING (string)->size + 0)
-#define SBYTES(string) (STRING_BYTES (XSTRING (string)) + 0)
+#define SCHARS(string) (XSTRING (string)->u.imm.immbit ? \
+ (XSTRING (string)->u.imm.size) : \
+ (XSTRING (string)->u.dat.size))
+#define SBYTES(string) (string_bytes (XSTRING (string)))
 
 /* Avoid "differ in sign" warnings.  */
 #define SSDATA(x)  ((char *) SDATA (x))
 
-#define STRING_SET_CHARS(string, newsize) \
-    (XSTRING (string)->size = (newsize))
+#define STRING_SET_CHARS(string, newsize) \
+  (XSTRING (string)->u.imm.immbit ? \
+   (XSTRING (string)->u.imm.size = (newsize)) : \
+   (XSTRING (string)->u.dat.size = (newsize)))
 
 #define STRING_COPYIN(string, index, new, count) \
     memcpy (SDATA (string) + index, new, count)
@@ -843,24 +849,17 @@
 #define CDR_SAFE(c) \
   (CONSP ((c)) ? XCDR ((c)) : Qnil)
 
+/* For unibyte immediate string, SIZE_BYTE field is always set to this.  */
+#define STRING_UNIBYTE_IMM_MAX ((1 << (BITS_PER_CHAR - 1)) - 1)
+
+/* For unibyte normal string, SIZE_BYTE field is always set to this.  */
+#define STRING_UNIBYTE_DAT_MAX ((1UL << (BITS_PER_LONG - 1)) - 1)
+
 /* Nonzero if STR is a multibyte string.  */
-#define STRING_MULTIBYTE(STR)  \
-  (XSTRING (STR)->size_byte >= 0)
-
-/* Return the length in bytes of STR.  */
-
-#ifdef GC_CHECK_STRING_BYTES
-
-struct Lisp_String;
-extern ptrdiff_t string_bytes (struct Lisp_String *);
-#define STRING_BYTES(S) string_bytes ((S))
-
-#else /* not GC_CHECK_STRING_BYTES */
-
-#define STRING_BYTES(STR)  \
-  ((STR)->size_byte < 0 ? (STR)->size : (STR)->size_byte)
-
-#endif /* not GC_CHECK_STRING_BYTES */
+#define STRING_MULTIBYTE(string) \
+  (XSTRING (string)->u.imm.immbit ? \
+   (XSTRING (string)->u.imm.size_byte != STRING_UNIBYTE_IMM_MAX) : \
+   (XSTRING (string)->u.dat.size_byte != STRING_UNIBYTE_DAT_MAX))
 
 /* An upper bound on the number of bytes in a Lisp string, not
    counting the terminating null.  This a tight enough bound to
@@ -874,20 +873,32 @@
    private.  The cast to ptrdiff_t ensures that STRING_BYTES_BOUND is
    signed.  */
 #define STRING_BYTES_BOUND  \
-  min (MOST_POSITIVE_FIXNUM, (ptrdiff_t) min (SIZE_MAX, PTRDIFF_MAX) - 1)
+  min (MOST_POSITIVE_FIXNUM, \
+       (ptrdiff_t) min (SIZE_MAX, STRING_UNIBYTE_DAT_MAX - 1) - 1)
+
+/* Maximum amount of bytes, including '\0', in an immediate string.  */
+#define STRING_IMM_SIZE (sizeof (void *) + 2 * sizeof (unsigned long) - 2)
 
 /* Mark STR as a unibyte string.  */
 #define STRING_SET_UNIBYTE(STR)  \
-  do { if (EQ (STR, empty_multibyte_string))  \
-      (STR) = empty_unibyte_string;  \
-    else XSTRING (STR)->size_byte = -1; } while (0)
+  do { if (EQ (STR, empty_multibyte_string)) \
+      (STR) = empty_unibyte_string; \
+    else if (XSTRING (STR)->u.imm.immbit) \
+      XSTRING (STR)->u.imm.size_byte = STRING_UNIBYTE_IMM_MAX; \
+    else \
+      XSTRING (STR)->u.dat.size_byte = STRING_UNIBYTE_DAT_MAX; \
+  } while (0)
 
 /* Mark STR as a multibyte string.  Assure that STR contains only
    ASCII characters in advance.  */
-#define STRING_SET_MULTIBYTE(STR)  \
-  do { if (EQ (STR, empty_unibyte_string))  \
-      (STR) = empty_multibyte_string;  \
-    else XSTRING (STR)->size_byte = XSTRING (STR)->size; } while (0)
+#define STRING_SET_MULTIBYTE(STR)  \
+  do { if (EQ (STR, empty_unibyte_string))  \
+      (STR) = empty_multibyte_string;  \
+    else if (XSTRING (STR)->u.imm.immbit)  \
+      XSTRING (STR)->u.imm.size_byte = XSTRING (STR)->u.imm.size; \
+    else  \
+      XSTRING (STR)->u.dat.size_byte = XSTRING (STR)->u.dat.size; \
+  } while (0)
 
 /* Get text properties.  */
 #define STRING_INTERVALS(STR)  (XSTRING (STR)->intervals + 0)
@@ -898,12 +909,53 @@
 /* In a string or vector, the sign bit of the `size' is the gc mark bit */
 
 struct Lisp_String
-  {
-    ptrdiff_t size;
-    ptrdiff_t size_byte;
-    INTERVAL intervals; /* text properties in this string */
-    unsigned char *data;
-  };
+{
+  /* Text properties in this string.  Should be the first
+     member since NEXT_FREE_LISP_STRING from alloc.c uses it.  */
+  INTERVAL intervals;
+
+  union {
+    /* GC mark bit and subtype bit are in IMM just by convention - when
+       IMMBIT is 0, the DAT field is used except it's UNUSED field.  */
+    struct {
+      unsigned immbit : 1;
+      unsigned size : BITS_PER_CHAR - 1;
+      unsigned char data[STRING_IMM_SIZE];
+      unsigned size_byte : BITS_PER_CHAR - 1;
+      unsigned gcmarkbit : 1;
+    } imm;
+
+    struct {
+      unsigned immbit : 1;
+      unsigned long size : BITS_PER_LONG - 1;
+      unsigned char *data;
+      unsigned long size_byte : BITS_PER_LONG - 1;
+      unsigned gcmarkbit : 1;
+    } dat;
+  } u;
+};
+
+/* Return the length in bytes of STR.  */
+
+#ifdef GC_CHECK_STRING_BYTES
+
+struct Lisp_String;
+extern ptrdiff_t string_bytes (struct Lisp_String *);
+#define STRING_BYTES(S) string_bytes ((S))
+
+#else /* not GC_CHECK_STRING_BYTES */
+
+static inline
+ptrdiff_t string_bytes (struct Lisp_String *s)
+{
+  if (s->u.imm.immbit)
+    return s->u.imm.size_byte == STRING_UNIBYTE_IMM_MAX ?
+      s->u.imm.size : s->u.imm.size_byte;
+  return s->u.dat.size_byte == STRING_UNIBYTE_DAT_MAX ?
+    s->u.dat.size : s->u.dat.size_byte;
+}
+
+#endif /* not GC_CHECK_STRING_BYTES */
 
 /* Header of vector-like objects.  This documents the layout constraints on
    vectors and pseudovectors other than struct Lisp_Subr.  It also prevents

 « Return to Thread: Proposal: immediate strings