Betr.: [clean-list] Passing commandline arguments to a clean program

Diederik van Arkel vanarkel@knoware.nl
Wed, 8 Sep 2004 13:40:33 +0200


On Sep 7, 2004, at 5:04 PM, Sander van den Berg wrote:

> Thanks for all the help. I finished my little test program. Its 
> working fine right now except for the following:
>
> Whenever I try to generate the Character Frequency table of a large 
> text file, I get either heap full or stack errors, even when I 
> increase the heap-size.
>
> I attached the program.
>
> Are there any other things I could improve on the code?

Hi Sander,

although there are many ways to improve your FreqList based version 
further such as using
strict and unique lists it is much easier here to use an array to hold 
the frequency counts.
The following version reduces heap usage from over 1.5GB for the list 
based version with
Jerzy's improvements and one or two other fixes to <64K for my sample 
(470K) test file.

Besides using an array it has also moved some of the io operations to 
avoid allocating
storage.

Regards,

Diederik van Arkel

====================================

module CharFreq

import StdEnv, ArgEnv

:: FreqList :== [(!Char,!Int)]
:: FreqArray :== {#Int}

iniFreq` :: *FreqArray
iniFreq` = createArray 256 0

freq`` :: !File !*FreqArray -> *FreqList
freq`` file fa
	# (readok,char,file) = sfreadc file
	| not readok
		= [(c,n) \\ c <- ['\0'..'\255'] & n <-: fa]
	#! x = toInt char
	   n = fa.[x] + 1
	= freq`` file {fa & [x] = n}

freqListSort` :: FreqList -> FreqList
freqListSort` l = sortBy (\(_,l)(_,r)=l<=r) l

openFile :: String *env -> (File,*env) | FileSystem env
openFile "" fs = abort "no filename passed"
openFile a fs
	| not openok	= abort ("error opening filename " +++ a)
	| otherwise 	= (file,fsa)
where
	(openok,file,fsa)	= sfopen a FReadText fs
	
mkString` :: !FreqList !*File -> *File
mkString` [] f	= f
mkString` [x:xs] f
	= mkString` xs (f <<< "Char: " <<< (toString o fst) x <<< " freq: " 
<<< (toString o snd) x <<< "\n")

checkParam :: {.{#Char}} -> String
checkParam s
	| not (paramsize == 1)	= abort "wrong nr of arguments"
	| otherwise				= s.[1]
where
	paramsize = (size s) - 1
	
//open a file, read the chars and print them
Start :: *World -> *World
Start world
	#	(console, world)		= stdio world
		console					= fwrites "meest voorkomende char: \n" console
	#	argv					= getCommandLine
		(file,world)			= openFile (checkParam argv) world
	#	freqlist				= freqListSort` (freq`` file iniFreq`)
		console 				= mkString` (reverse freqlist) console
		(ok,world)				= fclose console world
	|	not ok					= abort "Cannot close console"
	|	otherwise				= world