--This file, sortbyte.ex is a mod of my previous sortbyte.ex. 
--It contains code for sort_char and a demo program for testing.
--Changes include:
--  Enabled proper treatment of short input strings, ie 1 and 2 byte strings
--    among longer strings as inputs. See data generation at end.
--  Simplified logic, as made possible by defined data range, ie  chars
--    are limited to the range  0 to 255 in the input strings.
--  For instance, data[i] = {{13},{0},{255},{77},{66}}
--    0 min allowed item, 255 max allowed item
--Mod of bucket_sort allsorts.ex to provide byte_sort for 0..255 strings
--This program, char_sort requires  ~30..45% less time than shell sort for
--    well spread data.
--Tested with up to 180000 input 6-char strings. Occasional causeway errors
--  start at about this value and become more frequent for greater than 180000.
--  Occasionally, 240,000 works but usually not. These values are highly
--  configuration dependent.
--Shell sort as in sort.e gives causeway errors starting at ~40000 items for
--  my pentium 100/120, DOS 6.22, no disk cache (crashes sooner with smartdrv)
--Indicated time for 10240 6 char random strings  is
--      .27 seconds vs .47 seconds for shell_sort as in sort.e

--AP Adamson, July, 1997                
		----------------------------------------
		-- Sorting Algorithms in Euphoria     --
		----------------------------------------
without warning
without type_check
include machine.e
include sort.e
tick_rate(1000)

type natural(integer x)
    return x >= 0
end type

type file_number(integer x)
    return x >= -1
end type

--**************************************

function char_sort(sequence s)  
--Input form {{0,255,23,44,....},{...},{...},...}
    sequence    si, binsSeq, sorted, binSeq
    integer     index,leng,tag, lengSi, interval, numItems
    numItems = length(s)    
    --Create the binsSeq
    
    binsSeq = repeat({}, (numItems + 2))
    --Prepare constants for index calculation
    --maxTag = 167777215 = interval = #100000*255 + #100 * 255 + 255
    --minTag = 0
    interval = floor(16777215 / (numItems-1)) + 1
    --populate the bins sequence
    for i = 1 to numItems do 
	si = s[i]    --just a convenient name for current string, saves time
	lengSi = length(si)
	
	--Tag (for a group of inputs) is based on first 3 chars of string.
	if lengSi > 2 then
	tag =   #10000 * si[1]      --Faster if single statement is broken up
	tag = tag + #100 * si[2]  
	tag = tag + si[3] 
	elsif lengSi > 1 then
	tag = #10000 * si[1]  
	tag = tag + #100 * si[2] 
	else
	tag = #10000 * si[1] 
	end if

	--Calc the index (for proper bin)
	index =  floor(tag/interval) + 1  -- - offSet
	binSeq = binsSeq[index]         --Faster if named
	leng = length(binSeq)           --Faster if named
    
    if not leng then   --ie, still empty
	binsSeq[index] = append(binSeq,si)          --add first item
    elsif leng = 1 then                             --add second item
	if compare(si,binSeq[1])  > -1  then
	    binsSeq[index] = append(binSeq, si)
	else
	    binsSeq[index] = prepend(binSeq, si)
	end if
    else            --2 or more items already in bin
	for j = 1 to leng by 1 do                   --add rest of items
	    if compare(binSeq[j], si) = 1 then
		binsSeq[index] = binSeq[1..j-1] 
		    & prepend(binSeq[j..leng], si)
		exit
	    elsif j = leng then binsSeq[index] = append(binSeq, si)     
	    end if
	end for
    end if
    end for
    --Write from binsSeq to sorted sequence
    sorted = repeat(0, numItems)
    index = 0
    for i = 1 to length(binsSeq) do
	    binSeq = binsSeq[i]
	    leng = length(binSeq)
	    if leng then
		    for j = 1 to leng by 1 do
			index = index + 1
			sorted[index] = binSeq[j]
		    end for
	    end if
    end for
    binsSeq = {}
    return sorted 
end function   --char_sort

--*******************************************

procedure all_sorts()
-- test a bucket sort mod for 0..255 sequences such as {{4,33,55,76}{..}..}
    file_number printer
    natural nitems, strLength, nextChar
    atom t0, t
    sequence data, sdata, shellSData  , str
    printer = 1  -- open("PRN", "w")
  nitems = 10240-12 --0228 --234 --5 --232
strLength = 6
--create some strings data        
    str = {}
    data = {}
    for i = 1 to nitems by 1 do
	for j   = 1 to strLength by 1 do
	    nextChar = rand(#100) - 1
	    str = append(str, nextChar)
	end for
	data = append(data, str)
	str = {}
    end for
    
    --Now tailor the data a bit for special tests.
    data = prepend(data , data[1])
    data = prepend(data , data[1])
    data = prepend(data , data[1])
    data = append(data , data[2])
    data = append(data , data[8])
    data = append(data , data[8])
    data = append(data, {7})
    data = append(data, {7})
    data = append(data, {7})
    data = append(data, {6,8})
    data = append(data, {7,8})
    data = append(data, {7,8})
--Done with data creation  
    
    printf(printer, 
    "\ntime (sec.) to char_sort %d string items\n",{length(data)})
    puts(1,"\n")
	t0 = time()
	    sdata = char_sort(data)
	t = time() - t0
	printf(printer, "APA char_sort                 %9.4f\n", t)
--? sdata
--Check it
for i = 2 to length(sdata) by 1 do
    if compare(sdata[i],sdata[i-1]) = -1  then      t = length(sdata) 
	puts(1,"ERROR\n")  ? i   exit
    end if
    if i = length(sdata) then puts(1,"OKOKOKOK\n")    end if
end for
	--Comment out the following for numItems > 40K.

	t0 = time()
	    sdata = sort(data)
	t = time() - t0
	printf(printer, "Euphoria sort.e shell sort    %9.4f\n", t)

--if compare(shellSData, sdata) then 
--puts(1,"!!!!!!!!!!!!!!!!!!!!!!!!!!!!NG!\n") else puts(1,"Check is OK!\n")
--end if
for i = 2 to length(sdata) by 1 do
    if compare(sdata[i],sdata[i-1]) = -1  then      t = length(sdata) 
	puts(1,"ERROR\n")  ? i   exit --? sdata[t-3..t] exit
    end if
    if i = length(sdata) then puts(1,"OKOKOKOK")    end if
end for
end procedure
--****************************************
all_sorts()

