Here's a patch that seems to implement shared substrings. The
question is how big a performance hit does using bitfields cause? The
main drawback is that we can't have negative length strings any more. :)
And its not portable to machines where int is not 32 bits ... or
maybe it is? DISCLAIMER - this patch is neither official, well
tested, or even necessarily a particularly good idea. If someone
has a good program to benchmark this with I'd be much obliged.
diff -ru STk-3.1/Src/stk.h~ STk-3.1/Src/stk.h
--- STk-3.1/Src/stk.h~ Tue Jul 23 10:00:24 1996
+++ STk-3.1/Src/stk.h Mon Dec 2 20:05:33 1996
_at_@ -115,7 +115,7 @@
struct {char *name; struct obj * (*f)(void *,...);} subr;
struct {struct obj *env; struct obj *code;} closure;
struct {struct obj *code; } macro;
- struct {long dim; char *data;} string;
+ struct {uint sub:1; uint dim:31; char *data;} string;
struct {long dim; struct obj **data;} vector;
struct {struct port_descr *p;} port;
struct {char *data;} keyword;
diff -ru STk-3.1/Src/str.c~ STk-3.1/Src/str.c
--- STk-3.1/Src/str.c~ Mon May 13 16:42:51 1996
+++ STk-3.1/Src/str.c Mon Dec 2 20:05:32 1996
_at_@ -70,6 +70,7 @@
STk_disallow_sigint();
NEWCELL(z, tc_string);
+ z->storage_as.string.sub = 0;
z->storage_as.string.dim = len;
z->storage_as.string.data = (char *) must_malloc(len+1);
z->storage_as.string.data[len] = 0;
_at_@ -80,6 +81,23 @@
return z;
}
+SCM STk_makesubstr(int len, char *init)
+{
+ SCM z;
+
+ STk_disallow_sigint();
+ NEWCELL(z, tc_string);
+
+ z->storage_as.string.sub = 1;
+ z->storage_as.string.dim = len;
+ z->storage_as.string.data = init;
+ z->storage_as.string.data[len] = 0;
+
+ STk_allow_sigint();
+
+ return z;
+}
+
/**** Section 6.7 ****/
_at_@ -182,8 +200,11 @@
if ((from=STk_integer_value(start))==LONG_MIN) Err(msg ,start);
if ((to=STk_integer_value(end)) == LONG_MIN) Err(msg ,end);
- if (0 <= from && from <= to && to <= STRSIZE(string))
- return STk_makestrg(to - from, CHARS(string)+from);
+ if (0 <= from && from <= to)
+ if (to < STRSIZE(string))
+ return STk_makestrg(to - from, CHARS(string)+from);
+ else if (to == STRSIZE(string))
+ return STk_makesubstr(to - from, CHARS(string)+from);
Err("substring: bad bounds", Cons(start, end));
}
diff -ru STk-3.1/Src/gc.c~ STk-3.1/Src/gc.c
--- STk-3.1/Src/gc.c~ Tue Jun 11 15:54:16 1996
+++ STk-3.1/Src/gc.c Mon Dec 2 20:05:32 1996
_at_@ -322,7 +322,8 @@
case tc_closure: break;
case tc_free_cell: break;
case tc_char: break;
- case tc_string: free(ptr->storage_as.string.data); break;
+ case tc_string: if (!ptr->storage_as.string.sub)
+ free(ptr->storage_as.string.data); break;
case tc_vector: free(ptr->storage_as.vector.data); break;
case tc_eof: break;
case tc_undefined: break;
--
David Fox http://www.cat.nyu.edu/fox xoF divaD
NYU Media Research Lab fox_at_cat.nyu.edu baL hcraeseR aideM UYN
Received on Tue Dec 03 1996 - 11:27:48 CET