Here's a patch that seems to implement shared substrings.  The
question is how big a performance hit does using bitfields cause?  The
main drawback is that we can't have negative length strings any more. :)
And its not portable to machines where int is not 32 bits ... or
maybe it is?  DISCLAIMER - this patch is neither official, well
tested, or even necessarily a particularly good idea.  If someone
has a good program to benchmark this with I'd be much obliged.
diff -ru STk-3.1/Src/stk.h~ STk-3.1/Src/stk.h
--- STk-3.1/Src/stk.h~	Tue Jul 23 10:00:24 1996
+++ STk-3.1/Src/stk.h	Mon Dec  2 20:05:33 1996
_at_@ -115,7 +115,7 @@
          struct {char *name; struct obj * (*f)(void *,...);} 	subr;
          struct {struct obj *env; struct obj *code;} 		closure;
          struct {struct obj *code; }				macro;
-	 struct {long dim; char *data;} 			string;
+	 struct {uint sub:1; uint dim:31; char *data;}		string;
          struct {long dim; struct obj **data;} 			vector;
          struct {struct port_descr *p;}				port;
          struct {char *data;} 					keyword;
diff -ru STk-3.1/Src/str.c~ STk-3.1/Src/str.c
--- STk-3.1/Src/str.c~	Mon May 13 16:42:51 1996
+++ STk-3.1/Src/str.c	Mon Dec  2 20:05:32 1996
_at_@ -70,6 +70,7 @@
   STk_disallow_sigint();
   NEWCELL(z, tc_string);
 
+  z->storage_as.string.sub = 0;
   z->storage_as.string.dim  = len;
   z->storage_as.string.data = (char *) must_malloc(len+1); 
   z->storage_as.string.data[len] = 0;
_at_@ -80,6 +81,23 @@
   return z;
 }
 
+SCM STk_makesubstr(int len, char *init)
+{
+  SCM  z;
+
+  STk_disallow_sigint();
+  NEWCELL(z, tc_string);
+
+  z->storage_as.string.sub = 1;
+  z->storage_as.string.dim  = len;
+  z->storage_as.string.data = init;
+  z->storage_as.string.data[len] = 0;
+
+  STk_allow_sigint();
+
+  return z;
+}
+
 
 /**** Section 6.7 ****/
 
_at_@ -182,8 +200,11 @@
   if ((from=STk_integer_value(start))==LONG_MIN) Err(msg ,start);
   if ((to=STk_integer_value(end)) == LONG_MIN)   Err(msg ,end);
 
-  if (0 <= from && from <= to && to <= STRSIZE(string))
-    return STk_makestrg(to - from, CHARS(string)+from);
+  if (0 <= from && from <= to)
+    if (to < STRSIZE(string))
+      return STk_makestrg(to - from, CHARS(string)+from);
+    else if (to == STRSIZE(string))
+      return STk_makesubstr(to - from, CHARS(string)+from);
 
   Err("substring: bad bounds", Cons(start, end));
 }
diff -ru STk-3.1/Src/gc.c~ STk-3.1/Src/gc.c
--- STk-3.1/Src/gc.c~	Tue Jun 11 15:54:16 1996
+++ STk-3.1/Src/gc.c	Mon Dec  2 20:05:32 1996
_at_@ -322,7 +322,8 @@
           case tc_closure:     break;
           case tc_free_cell:   break;
           case tc_char:	       break;
-	  case tc_string:      free(ptr->storage_as.string.data); break;
+	  case tc_string:      if (!ptr->storage_as.string.sub)
+	    			 free(ptr->storage_as.string.data); break;
           case tc_vector:      free(ptr->storage_as.vector.data); break;
           case tc_eof:         break;
           case tc_undefined:   break;
-- 
David Fox	    http://www.cat.nyu.edu/fox		 xoF divaD
NYU Media Research Lab   fox_at_cat.nyu.edu    baL hcraeseR aideM UYN
Received on Tue Dec 03 1996 - 11:27:48 CET