[clean-list] Languages Comparison

John van Groningen johnvg@cs.kun.nl
Wed, 30 Jun 2004 17:04:24 +0200


Isaac Gouy wrote:
>Maybe someone knowledgeable could take a look at the amateurish
>implementations on the shootout website.

I have some suggestions to make some of these programs faster, and have
also implemented hash2 and wordfreq.

Array Access (ary):
===================

The inc function indexes the array with -1 when j<0. The following
definition of inc does not have this bug, and is faster:

   inc x y j
      | j>=0  #! yj=y.[j]
              = inc x {y & [j]= yj + x.[j]} (j-1)    
              = y

Sieve of Eratosthenes (sieve):
==============================

The sieve function has a similar indexing bug, instead use:
(this probably doesn't make the program faster or slower)

sieve primes i count rep
    | i>uBound
        | rep==0     = count
                     = sieve {primes&[j]=True \\ j<-[0..uBound]} lBound 0 (rep-1)
        | primes.[i] = sieve (loop primes i i) (i+1) (count+1) rep
                     = sieve primes (i+1) count rep
 
String Concatenation (strcat):
==============================

Make the arguments of the function copyTo strict:

        copyTo :: !.{#Char} !Int !*{#Char} -> .{#Char}

Method Calls (methcall):
========================

Add strictness annotations:

:: Toggler = E.a: { state :: a
                  , activate_ :: ! a -> a
                  , value_ :: ! a -> Bool
                  }

Use a strict record instead of a tuple:

:: TogglerR = ! { t :: !Toggler, max :: !Int, count :: !Int}

makeNToggle b max =
    { state = {t = makeToggle b, max=max, count = 0}
   
    , activate_ = \ {t, max, count} ->
        if (count+1 >= max)
            {t=activate t,max=max, count=0}
            {t=t, max=max, count=count+1}   
           
    , value_ = \ {t} -> value t
    }

Hash Access (hash):
===================

- import StdOverloadedList:

import StdEnv, StdOverloadedList, ArgEnv, LanguageShootout

- add strictness annotation:

:: HexConverter = Hc !Int !Int !.String

- use a strict list, compile with -ou (reuse unique nodes optimization)

:: HashTable a = { nBuckets::Int
                 , table::!.{![!Item a!]}
                 }

hash :: !{#.Char} !(HashTable .a) -> Int
hash key ht=:{nBuckets}
   = (abs (loop key (size key - 1) 0)) rem nBuckets  
   where
   loop k n h
      | n>(-1) = loop k (n-1) (11*h + toInt k.[n])       
               = h

htNew n = { nBuckets = nprime
        , table = {[] \\ i <- [0..nprime-1]}
        }
   where   
   nprime = hd (dropWhile (\x = x < n) primes)        
     
htHasKey :: !{#.Char} !.(HashTable a) -> .Bool             
htHasKey k ht=:{table}= findIn k table.[hash k ht]   

htAdd k v ht=:{table}
   #! i = hash k ht
   #! (b,table) = uselect table i
   = if (findIn k b)
      {ht & table = update ht.table i (addItem k v b [|])}
      {ht & table = update ht.table i [|{key=k,val=v}:b]}
     
findIn k [|] = False
findIn k [|item:ls] = item.key == k || findIn k ls  
  
addItem k v [|] ls` = ls`
addItem k v [|item:ls] ls`
    | item.key == k  
        = [|{item & val=v}:ls++|ls`]
        = addItem k v ls [|item:ls`]  

Spell Checker (spellcheck):
===========================

- Import StdStrictLists:

import StdEnv,StdStrictLists //, SimpleHash

- Use a larger hashtable:
 
Start world
    # dictionary = htNew 40000

- Do not use fend:

    readWords f ht
           # (line, f) = freadline f
        | size line==0
            = ht
               = readWords f (htAdd line 1 ht)

- Use a unique strict list in the hashtable, compile with -ou:

// SimpleHash implementation would normally be imported
  
primes =: [
   53,         97,         193,       389,       769,
   1543,       3079,       6151,      12289,     24593,
   49157,      98317,      196613,    93241,     786433,
   1572869,    3145739,    6291469,   12582917,  25165843,
   50331653,   100663319,  201326611, 402653189, 805306457
   ]

:: Item a = { key::!String
            , val::!a
            }

:: HashTable a = { nBuckets::!Int
                 , table::!.{!.[!Item a!]}
                 }

hash :: !{#.Char} !Int -> Int
hash key nBuckets
   = (abs (loop key (size key - 1) 0)) rem nBuckets  
   where
   loop k n h
      | n>=0
          = loop k (n-1) (5*h + toInt k.[n])        
          = h

htNew n = { nBuckets = nprime
        , table = {[|] \\ i <- [0..nprime-1]}
        }
   where   
   nprime = hd (dropWhile (\x = x < n) primes)        

htHasKey :: !{#.Char} !.(HashTable a) -> .Bool
htHasKey k ht=:{table,nBuckets}= findIn k table.[hash k nBuckets]
where
    findIn k [|item:ls] = item.key == k || findIn k ls
    findIn k [|] = False

htAdd :: !{#.Char} !a !*(HashTable a) -> *HashTable a
htAdd k v ht=:{table,nBuckets}
    #! i = hash k nBuckets
    #! (b,table) = replace table i [|]
    # b = addItem k v b
    = {ht & table = {table & [i] = b}}
where
    addItem :: !{#.Char} .a !*[!u:(Item .a)!] -> *[!v:(Item .a)!], [u<=v]
    addItem k v [|item:ls]
       | item.key <> k
               = [|item : addItem k v ls]
               = [|{item & val=v}:ls]
    addItem k v [|]
        = [|{key=k,val=v}]

Statistical Moments (moments):
==============================

- Use a modified quicksort algorithm to compute the medians,
  instead of heapsort in function median:

      # a = find_medians a

- Add strictness annotations and eliminate common subexpressions:

   loop :: !.{#Real} !Real Real Real Real Real Int -> .(!Real,!Real,!Real,!Real)
   loop a mean adev var skew kurt i
       | i<0
           = (adev,var,skew,kurt)
           #! ai=a.[i]
           # dev = ai - mean
           # dev2=dev*dev
           # dev3=dev2*dev
     	   = loop a mean (adev + abs dev)(var + dev2) (skew + dev3)(kurt + dev3*dev) (i-1)

- Modified quicksort to compute medians:

find_medians :: *{#Real} -> .{#Real}
find_medians a0
    # (n_elements,a) = usize a0
    # k = (n_elements>>1) + (n_elements bitand 1)
    = quick_sort1 0 (n_elements-1) k a
    where
        quick_sort0 :: !Int !Int !Int !*{#Real} -> *{#Real}
        quick_sort0 b e k a
            | k>=b && k<=e
                = quick_sort1 b e k a
                = a

        quick_sort1 b e k a
            | b>=e
                = a
        quick_sort1 b e k a=:{[b]=ab}
            # m=(b+e)>>1
            # (am,a) = a![m]
            = find_large am (b+1) e e b k {a & [m]=ab}
        where
            find_large am l e r b k a
                | l<=e && a.[l]<=am
                    = find_large am (l+1) e r b k a
                    = find_small_or_equal am r b l e k a
           
            find_small_or_equal am r b l e k a
                | r>b && a.[r]>am
                    = find_small_or_equal am (r-1) b l e k a
                | l<r
                    # (al,a)=a![l]
                      (ar,a)=a![r]
                    = find_large am (l+1) e (r-1) b k {a & [l]=ar,[r]=al}
           	| b==r
                    = quick_sort2 (r-1) (r+1) b e k {a & [b]=am}
                    # (ar,a)=a![r]
                    = quick_sort2 (r-1) (r+1) b e k {a & [r]=am,[b]=ar}

            quick_sort2 l r b e k a
                | l-b>=e-r
                    = quick_sort0 b l k (quick_sort0 r e k a)
                    = quick_sort0 r e k (quick_sort0 b l k a)

Lists (lists):
==============

Use an array, like the ocaml version, compile with -ou.

listOps size
    # d1 = iota size
    # (d2,d1) = copy d1
    # d3 = empty size
    # (d2,d3) = headToTailLoop d2 d3
    # (d3,d2) = tailToTailLoop d3 d2
    # d1 = reverseDq d1
    | firstDq d1==size && equal d1 d2
        = lengthDq d1
        = abort "Test Failed!"
  where
    headToTailLoop :: !*(Deque a) !*(Deque a) -> (!*Deque a,!*Deque a)
    headToTailLoop x y
        | emptyDq x   = (x,y)
                      # (z,x) = popFirst x
                      # y = addLast z y
                      = headToTailLoop x y

    tailToTailLoop :: !*(Deque a) !*(Deque a) -> (!*Deque a,!*Deque a)
    tailToTailLoop x y
        | emptyDq x = (x,y)
                    # (z,x) = popLast x
                    # y = addLast z y
                    = tailToTailLoop x y

:: Deque a = { first ::!Int, last ::!Int, size :: !Int, a :: !.{!a} }

empty :: !Int -> *Deque Int
empty n
    # n=inc n
    = { first=0, last=0, size=n,a = createArray n 0 }

iota :: !Int -> *Deque Int
iota n
    #! size=inc n
    = { first=1, last=0, size=size, a = {i \\ i<-[0..n]} }

emptyDq :: !(Deque a) -> Bool
emptyDq {first,last,size} = first==last

lengthDq :: !(Deque a) -> Int
lengthDq {first,last,size} = up (last-first) size

up n s :== n+((n>>31) bitand s) // if (n < 0) (n + s) n

popFirst :: !*(Deque a) -> (!a,!*Deque a)
popFirst {first,last,size,a}
    | first <> last
        #! e = a.[first]
        # first=first+1
        | first<>size
            = (e,{first=first,last=last,size=size,a=a})
            = (e,{first=0,last=last,size=size,a=a})
        = abort "popFirst: empty"

popLast :: !*(Deque a) -> (!a,!*Deque a)
popLast {first,last,size,a}
    | first <> last
        | last>0
            # last = last-1
            #! e = a.[last]
            = (e,{first=first,last=last,size=size,a=a})
            # last = size-1
            #! e = a.[last]
            = (e,{first=first,last=last,size=size,a=a})
        = abort "popLast: empty"

addLast :: !a !*(Deque a) -> *Deque a
addLast elem {first,last,size,a}
    # last`=last+1
    | last` <> size
        | last` <> first
                = {first=first,last=last`,size=size,a={a & [last]=elem}}
                = abort "addLast : Full"
        | 0 <> first
                = {first=first,last=0,size=size,a={a & [last]=elem}}
                = abort "addLast : Full"

firstDq :: !(Deque a) -> a
firstDq {first,last,size,a}
    | first <> last
        = a.[first]
        = abort "popFirst: empty"

copy :: *(Deque a) -> (!*Deque a,!*Deque a)
copy {first,last,size,a}
    | size==0
        = ({first=first,last=last,size=size,a={}},{first=first,last=last,size=size,a=a})
    #! e=a.[0]
    # (a1,a2) = copy_array 0 size (createArray size e) a
    = ({first=first,last=last,size=size,a=a1},{first=first,last=last,size=size,a=a2})
  where
    copy_array :: !Int !.Int !*{!a} !u:{!a} -> (!.{!a},!v:{!a}), [u <= v]
    copy_array i s a1 a2
        | i<s
            #! e=a2.[i]
            = copy_array (i+1) s {a1 & [i]=e} a2
            = (a1,a2)

reverseDq :: *(Deque a) -> *Deque a
reverseDq {first,last,size,a}
    #! w=up (last-first) size // length
    # a = reverse_a ((w>>1)+1) first (up (last-1) size) size a
    = {first=first,last=last,size=size,a=a}
    where
        reverse_a :: !Int !Int !Int !Int !*{!a} -> *{!a}
        reverse_a n i1 i2 size a
            | n==0
                = a
            #! m=min (size-i1) n
            #! m=min (i2+1) m
            # a = reverse_a2 i1 i2 m a
              i1=i1+m
              i2=i2-m
              i1=if (i1>=size) (i1-size) i1
              i2=if (i2<0) (i2+size) i2
            = reverse_a (n-m) i1 i2 size a

        reverse_a2 :: !Int !Int !Int !*{!a} -> *{!a}
        reverse_a2 i1 i2 e1 a
            | i1<e1
                # (v1,a) = a![i1]
                # (v2,a) = a![i2]
                = reverse_a2 (i1+1) (i2-1) e1 {a & [i1]=v2,[i2]=v1}
                = a

equal :: !.(Deque a) !.(Deque a) -> .Bool | == a
equal {first=first1,last=last1,size=size1,a=a1} {first=first2,last=last2,size=size2,a=a2}
    #! len1=up (last1-first1) size1 // length
    #! len2=up (last2-first2) size2 // length
    = len1==len2 && equal_a len1 first1 first2 size1 size2 a1 a2
where
    equal_a :: !Int !Int !Int !Int !Int !.{!a} !.{!a} -> .Bool | == a
    equal_a n i1 i2 s1 s2 a1 a2
        | n==0
            = True
        #! m=min (s1-i1) n
        #! m=min (s2-i2) m
        | equal_a2 i1 i2 (i1+m) a1 a2
            # i1=i1+m
              i2=i2+m
              i1=if (i1>=s1) (i1-s1) i1
              i2=if (i2>=s2) (i2-s2) i2
            = equal_a (n-m) i1 i2 s1 s2 a1 a2
            = False

    equal_a2 :: !Int !Int !Int !.{!a} !.{!a} -> .Bool | == a
    equal_a2 i1 i2 e1 a1 a2
        | i1<e1
            = a1.[i1]==a2.[i2] && equal_a2 (i1+1) (i2+1) e1 a1 a2
            = True

Hashes Part II (hash2):
=======================

Use a unique and strict list, added some hashtable functions,
compile with -ou on linux.

module hash2

import StdEnv, LanguageShootout
import StdOverloadedList

Start = hashOps argi

hashOps n
   # hashtable1 = htNew 10000
   # hashtable2 = htNew 10000
   # hashtable1 = addKeys 0 hashtable1
   # (hashtable1,hashtable2) = repeat_inserts n hashtable1 hashtable2
   # (foo_1_1,hashtable1) = htFind "foo_1" hashtable1
   # (foo_9999_1,hashtable1) = htFind "foo_9999" hashtable1
   # (foo_1_2,hashtable2) = htFind "foo_1" hashtable2
   # (foo_9999_2,hashtable2) = htFind "foo_9999" hashtable2
   =    toString foo_1_1.val+++" "+++
        toString foo_9999_1.val+++" "+++
        toString foo_1_2.val+++" "+++
        toString foo_9999_2.val+++"\n"
  where
    addKeys i ht
         | i>9999
             = ht
             = addKeys (i+1) (htAdd ("foo_"+++toString i) i ht)

    repeat_inserts :: !Int (HashTable Int) (HashTable Int) -> (HashTable Int,!HashTable Int)
    repeat_inserts n hashtable1 hashtable2
        | n>0
            # (hashtable1,hashtable2) = htFold add_val hashtable1 hashtable2
            = repeat_inserts (n-1) hashtable1 hashtable2
            = (hashtable1,hashtable2)

    add_val {key,val=val1} hashtable2
        = htUpdate (\ item=:{val} = {item & val=val+val1}) key 0 hashtable2

// SimpleHash implementation would normally be imported
  
primes =: [
   53,         97,         193,       389,       769,
   1543,       3079,       6151,      12289,     24593,
   49157,      98317,      196613,    93241,     786433,
   1572869,    3145739,    6291469,   12582917,  25165843,
   50331653,   100663319,  201326611, 402653189, 805306457
   ]

:: Item a = { key::!String
            , val::!a
            }

:: SHashTable a = { nBuckets::!Int
                 , table::!.{!.[!Item a!]}
                 }

:: *HashTable a :== SHashTable a

hash :: !{#.Char} !Int -> Int
hash key nBuckets
   = (abs (loop key (size key - 1) 0)) rem nBuckets  
   where
   loop :: !{#Char} !Int !Int -> Int
   loop k n h
      | n>=0
          = loop k (n-1) (11*h + toInt k.[n])       
          = h

htNew n = { nBuckets = nprime
        , table = {[|] \\ i <- [0..nprime-1]}
        }
   where   
   nprime = hd (dropWhile (\x = x < n) primes)        

htFind :: !{#.Char} !(HashTable a) -> (!Item a,HashTable a)              
htFind k ht=:{table,nBuckets}
    # (list,table) = table![hash k nBuckets]
    = (find k list,{ht & table=table})
    where
        find k [|item:ls]
            | item.key == k
                   = item
                   = find k ls

htUpdate :: ((Item a) -> Item a) !{#.Char} a !*(HashTable a) -> *HashTable a
htUpdate f k v ht=:{table,nBuckets}
    #! i = hash k nBuckets
    #! (b,table) = replace table i [|]
    # b = updateItem k v b f
    = {ht & table = {table & [i] = b}}
where
    updateItem :: !{#.Char} a !*[!(Item a)!] ((Item a) -> Item a) -> *[!(Item a)!]
    updateItem k v [|item:ls] f
       | item.key <> k
               = [|item : updateItem k v ls f]
               = [|f item:ls]
    updateItem k v [|] f
        = [|f {key=k,val=v}]

htAdd :: !{#.Char} !a !*(HashTable a) -> *HashTable a
htAdd k v ht=:{table,nBuckets}
    #! i = hash k nBuckets
    #! (b,table) = replace table i [|]
    # b = addItem k v b
    = {ht & table = {table & [i] = b}}
where
    addItem :: !{#.Char} .a !*[!u:(Item .a)!] -> *[!v:(Item .a)!], [u<=v]
    addItem k v [|item:ls]
       | item.key <> k
               = [|item : addItem k v ls]
               = [|{item & val=v}:ls]
    addItem k v [|]
        = [|{key=k,val=v}]

htFold :: ((Item a) -> .(.b -> .b)) !(HashTable a) .b -> (!HashTable a,!.b)
htFold f ht=:{nBuckets,table} s
    # (table,s) = ht_fold 0 nBuckets table s
    = ({ht & table=table},s)
    where
        ht_fold i n table s
            | i<n
                # (list,table) = table![i]
                #! s=ht_fold_list list s
                = ht_fold (i+1) n table s
                = (table,s)

        ht_fold_list [|e:l] s
            #! s = f e s
            = ht_fold_list l s
        ht_fold_list [|] s
            = s

Word Frequency (wordfreq)
=========================

Implemented reading words. Hash table uses a unique and strict list,
added some hashtable functions, compile with -ou on linux.

module wordfreq

import StdEnv,StdStrictLists

Start :: *World -> (!Bool,!*World)
Start world
    # (console,world) = stdio world
    # ht = htNew 2048
    # (ht,console) = read_words_in_file0 console ht
    # items = sort (htToList ht)
    # console = write_items items console
    = fclose console world

read_words_in_file0 :: *File !(HashTable Int) -> (!HashTable Int,!*File)
read_words_in_file0 f ht
    # (s,f) = freads f 4096
    | size s==0
        = (ht,f)
    # (ia,s) = skip_to_alpha_char 0 s
    = read_words_in_string1 ia s f ht

read_words_in_string0 :: Int *{#Char} *File !(HashTable Int) -> (!HashTable Int,!*File)
read_words_in_string0 i s f ht
    # (ia,s) = skip_to_alpha_char i s
    = read_words_in_string1 ia s f ht

read_words_in_string1 :: Int *{#Char} *File !(HashTable Int) -> (!HashTable Int,!*File)
read_words_in_string1 ia s f ht
    # (ina,s) = skip_alpha_chars_and_convert_to_lower_case ia s
    | ia==ina
        = read_words_in_file0 f ht
    #! word = s % (ia,ina-1)
    | ina<4096
        # ht = htUpdate (\ item=:{val} = {item & val=val+1}) word 0 ht
        = read_words_in_string0 ina s f ht
        = read_words_in_file1 word f ht

read_words_in_file1 :: {#Char} *File !(HashTable Int) -> (!HashTable Int,!*File)
read_words_in_file1 word f ht
    # (s,f) = freads f 4096
    | size s==0
        = (htUpdate (\ item=:{val} = {item & val=val+1}) word 0 ht,f)
    # (ia,s) = skip_to_alpha_char 0 s
    | ia<>0
        # ht = htUpdate (\ item=:{val} = {item & val=val+1}) word 0 ht
        = read_words_in_string1 ia s f ht
    # (ina,s) = skip_alpha_chars_and_convert_to_lower_case ia s
    | ina<4096
        #! rest_of_word = s % (0,ina-1)
        # ht = htUpdate (\ item=:{val} = {item & val=val+1}) (word+++rest_of_word) 0 ht
        = read_words_in_string0 ina s f ht
        = read_words_in_file1 (word+++s) f ht

skip_alpha_chars_and_convert_to_lower_case :: !Int !*{#Char} -> (!Int,!*{#Char})
skip_alpha_chars_and_convert_to_lower_case i s
    | i<size s
        #! c=s.[i]
        | c>='a' && c<='z'
            = skip_alpha_chars_and_convert_to_lower_case (i+1) s
        | c>='A' && c<='Z'
            = skip_alpha_chars_and_convert_to_lower_case (i+1) {s & [i]=toChar (toInt c+32)}
            = (i,s)
        = (i,s)

skip_to_alpha_char :: !Int !*{#Char} -> (!Int,!*{#Char})
skip_to_alpha_char i s
    | i<size s
        #! c=s.[i]
        | c<'A' || c>'z' || ( c>'Z' && c<'a')
            = skip_to_alpha_char (i+1) s
            = (i,s)
        = (i,s)

instance < (Item Int) where
        (<) {val=v1,key=k1} {val=v2,key=k2}
            | v1<>v2
                = v1>v2
                = k1>k2

write_items [{key,val}:items] fo
    # s = toString val
    # fo = if (size s<7) (fwrites (createArray (7-size s) ' ') fo) fo
    = write_items items (fo <<< s <<< ' ' <<< key <<< '\n')
write_items [] fo
    = fo


// SimpleHash implementation would normally be imported
  
primes =: [
   53,         97,         193,       389,       769,
   1543,       3079,       6151,      12289,     24593,
   49157,      98317,      196613,    93241,     786433,
   1572869,    3145739,    6291469,   12582917,  25165843,
   50331653,   100663319,  201326611, 402653189, 805306457
   ]

:: Item a = { key::!String
            , val::!a
            }

:: SHashTable a = { nBuckets::!Int
                 , table::!.{!.[!Item a!]}
                 }

:: *HashTable a :== SHashTable a

hash :: !{#.Char} !Int -> Int
hash key nBuckets
   = (abs (loop key (size key - 1) 0)) rem nBuckets  
   where
   loop :: !{#Char} !Int !Int -> Int
   loop k n h
      | n>=0
          = loop k (n-1) (5*h + toInt k.[n])       
          = h

htNew n = { nBuckets = nprime
        , table = {[|] \\ i <- [0..nprime-1]}
        }
   where   
   nprime = hd (dropWhile (\x = x < n) primes)        

htUpdate :: ((Item a) -> Item a) !{#.Char} !a !*(HashTable a) -> *HashTable a
htUpdate f k v ht=:{table,nBuckets}
    #! i = hash k nBuckets
    #! (b,table) = replace table i [|]
    # b = updateItem b k v f
    = {ht & table = {table & [i] = b}}
where
    updateItem :: !*[!(Item a)!] !{#.Char} a ((Item a) -> Item a) -> *[!(Item a)!]
    updateItem [|item:ls] k v f
       | item.key <> k
               = [|item : updateItem ls k v f]
               = [|f item:ls]
    updateItem [|] k v f
        = [|f {key=k,val=v}]

htToList :: !(HashTable a) -> [Item a]
htToList ht=:{nBuckets,table}
    = ht_to_list 0 nBuckets table []
    where
        ht_to_list i n table l
            | i<n
                # (list,table) = table![i]
                #! l=ht_buckets_to_list list l
                = ht_to_list (i+1) n table l
                = l

        ht_buckets_to_list :: *[!Item a!] [Item a] -> [Item a]
        ht_buckets_to_list [|i:is] l
            = ht_buckets_to_list is [i:l]
    ht_buckets_to_list [|] l
            = l

Regards,

John van Groningen