Substring patch

From: David Fox <fox_at_cat.nyu.edu>
Date: 03 Dec 1996 05:35:33 -0500

Here's a patch that seems to implement shared substrings. The
question is how big a performance hit does using bitfields cause? The
main drawback is that we can't have negative length strings any more. :)
And its not portable to machines where int is not 32 bits ... or
maybe it is? DISCLAIMER - this patch is neither official, well
tested, or even necessarily a particularly good idea. If someone
has a good program to benchmark this with I'd be much obliged.

diff -ru STk-3.1/Src/stk.h~ STk-3.1/Src/stk.h
--- STk-3.1/Src/stk.h~ Tue Jul 23 10:00:24 1996
+++ STk-3.1/Src/stk.h Mon Dec 2 20:05:33 1996
_at_@ -115,7 +115,7 @@
          struct {char *name; struct obj * (*f)(void *,...);} subr;
          struct {struct obj *env; struct obj *code;} closure;
          struct {struct obj *code; } macro;
- struct {long dim; char *data;} string;
+ struct {uint sub:1; uint dim:31; char *data;} string;
          struct {long dim; struct obj **data;} vector;
          struct {struct port_descr *p;} port;
          struct {char *data;} keyword;
diff -ru STk-3.1/Src/str.c~ STk-3.1/Src/str.c
--- STk-3.1/Src/str.c~ Mon May 13 16:42:51 1996
+++ STk-3.1/Src/str.c Mon Dec 2 20:05:32 1996
_at_@ -70,6 +70,7 @@
   STk_disallow_sigint();
   NEWCELL(z, tc_string);
 
+ z->storage_as.string.sub = 0;
   z->storage_as.string.dim = len;
   z->storage_as.string.data = (char *) must_malloc(len+1);
   z->storage_as.string.data[len] = 0;
_at_@ -80,6 +81,23 @@
   return z;
 }
 
+SCM STk_makesubstr(int len, char *init)
+{
+ SCM z;
+
+ STk_disallow_sigint();
+ NEWCELL(z, tc_string);
+
+ z->storage_as.string.sub = 1;
+ z->storage_as.string.dim = len;
+ z->storage_as.string.data = init;
+ z->storage_as.string.data[len] = 0;
+
+ STk_allow_sigint();
+
+ return z;
+}
+
 
 /**** Section 6.7 ****/
 
_at_@ -182,8 +200,11 @@
   if ((from=STk_integer_value(start))==LONG_MIN) Err(msg ,start);
   if ((to=STk_integer_value(end)) == LONG_MIN) Err(msg ,end);
 
- if (0 <= from && from <= to && to <= STRSIZE(string))
- return STk_makestrg(to - from, CHARS(string)+from);
+ if (0 <= from && from <= to)
+ if (to < STRSIZE(string))
+ return STk_makestrg(to - from, CHARS(string)+from);
+ else if (to == STRSIZE(string))
+ return STk_makesubstr(to - from, CHARS(string)+from);
 
   Err("substring: bad bounds", Cons(start, end));
 }
diff -ru STk-3.1/Src/gc.c~ STk-3.1/Src/gc.c
--- STk-3.1/Src/gc.c~ Tue Jun 11 15:54:16 1996
+++ STk-3.1/Src/gc.c Mon Dec 2 20:05:32 1996
_at_@ -322,7 +322,8 @@
           case tc_closure: break;
           case tc_free_cell: break;
           case tc_char: break;
- case tc_string: free(ptr->storage_as.string.data); break;
+ case tc_string: if (!ptr->storage_as.string.sub)
+ free(ptr->storage_as.string.data); break;
           case tc_vector: free(ptr->storage_as.vector.data); break;
           case tc_eof: break;
           case tc_undefined: break;

-- 
David Fox	    http://www.cat.nyu.edu/fox		 xoF divaD
NYU Media Research Lab   fox_at_cat.nyu.edu    baL hcraeseR aideM UYN
Received on Tue Dec 03 1996 - 11:27:48 CET

This archive was generated by hypermail 2.3.0 : Mon Jul 21 2014 - 19:38:59 CEST