#!./exu -- File Archive Search -- -- Searches the descriptions of over 1500 .zip files on RapidEuphoria.com -- -- The descriptions are stored in a Euphoria EDS database, archive.edb without type_check include file.e include get.e include wildcard.e as wild include database.e include archive.e constant TRUE = 1, FALSE = 0 constant TO_LOWER = 'a' - 'A' constant MAX_HITS = 100 -- max number of matching entries to display constant SCORE = 1, ENTRY = 2 constant OUT_CHUNK_SIZE = 10 -- number of entries to output per table constant P_GEN = 1, P_DOS = 2, P_WIN = 3, P_LNX = 4 atom t0 t0 = time() sequence top_hits top_hits = {} sequence keywords -- keywords entered by user sequence platforms -- platforms selected by user platforms = repeat(FALSE, 4) integer nhits, totalCount nhits = 0 totalCount = 0 integer log_file log_file = -1 -- open("asearch.log", "a") object query query = "nothing yet" function crash(object x) -- in case of fire ... integer errfile errfile = open("ex.err", "a") puts(errfile, "\nquery is: " & query & '\n') close(errfile) -- send an e-mail containing ex.err system("mail -s \"asearch.exu crashed!\" rds@RapidEuphoria.com < ex.err > /dev/null", 2) return 0 end function crash_routine(routine_id("crash")) function lower(sequence s) -- (quickly) convert a line to lower case integer c for i = 1 to length(s) do c = s[i] if c <= 'Z' then if c >= 'A' then s[i] = c + TO_LOWER end if end if end for return s end function procedure stats() -- save some stats for performance analysis printf(log_file, "matched %d of %d, time: %.2f\n\n", {nhits, totalCount, time()-t0}) end procedure procedure html_puts(object text) -- write HTML output puts(1, text) end procedure procedure html_printf(sequence format, object text) -- write HTML output printf(1, format, text) end procedure procedure errMessage(sequence msg) -- issue a fatal error message and quit html_puts("

") html_printf("%s \n\n", {msg}) if log_file != -1 then printf(log_file, "%s\n", {msg}) stats() close(log_file) end if abort(1) end procedure sequence LETTER LETTER = repeat(FALSE, 256) for c = 0 to 255 do if (c >= '0' and c <= '9') or (c >= 'a' and c <= 'z') then LETTER[c+1] = TRUE end if end for function hits(sequence line, sequence word, integer n) -- find out how many matches of word there are in a line. -- if length(word) is <= n, then we need whole-word match, not a substring integer len, white_before, white_after, p atom count count = 0 while TRUE do p = match(word, line) -- should fail 99% of the time if p = 0 then return count end if len = length(word) if (p = 1) or not LETTER[1+line[p-1]] then white_before = TRUE else white_before = FALSE end if if (p + len > length(line)) or not LETTER[1+line[p+len]] then white_after = TRUE else white_after = FALSE end if if len <= n then -- short word - need clear match if white_before and white_after then count += 1 end if else -- long word - could be a substring, but reduce score count += 1 - 0.2 * (not white_before) - 0.2 * (not white_after) end if line = line[p+len..length(line)] end while return count end function procedure AddHit(sequence counts, integer rec_num) -- record the score for a entry -- keep track of the best scores and entries atom score integer p score = 0.0 if length(counts) = 0 then -- no keywords were specified score = 1 else for i = 1 to length(counts) do score += sqrt(counts[i]) end for if score = 0.0 then return end if end if nhits += 1 if length(top_hits) = 0 then top_hits = {{score, rec_num}} else p = 0 for i = length(top_hits) to 1 by -1 do if score <= top_hits[i][SCORE] then p = i exit end if end for if p != MAX_HITS then top_hits = append(top_hits[1..p], {score, rec_num}) & top_hits[p+1..length(top_hits)] if length(top_hits) > MAX_HITS then top_hits = top_hits[1..length(top_hits)-1] end if end if end if end procedure procedure scan(integer rec_num, sequence keywords, sequence platforms) -- scan one entry for all possible keywords sequence counts, href sequence entry, text atom bonus integer p entry = db_record_data(rec_num) if not platforms[entry[A_PLATFORM]+1] then return end if if equal(entry[A_CATEGORY], "hide*") then return end if -- only consider filename, not full path href = entry[A_HREF] p = length(href) while p > 0 and href[p] != '/' do p -= 1 end while href = href[p+1..length(href)] text = {entry[A_TITLE], entry[A_NAME], entry[A_DESCRIPTION], href} counts = repeat(0, length(keywords)) for i = 1 to length(text) do text[i] = lower(text[i]) if i <= 2 then bonus = 1.5 -- hit on title or name else bonus = 1.0 end if bonus *= 1 + sqrt(entry[A_MONEY])/200.0 + (entry[A_YEAR]-1996)*.01 + entry[A_MONTH]*.01/12.0 for j = 1 to length(keywords) do counts[j] += bonus * hits(text[i], keywords[j], 3) end for end for AddHit(counts, rec_num) end procedure procedure search() -- top level search keywords = wild:lower(keywords) -- general lower if db_open("ARCHIVE.EDB", DB_LOCK_SHARED) != DB_OK then errMessage("Can't open ARCHIVE.EDB") end if if db_select_table("archive") != DB_OK then errMessage("Can't open archive table") end if totalCount = db_table_size() for i = 1 to totalCount do scan(i, keywords, platforms) end for end procedure procedure top_link(sequence percent, sequence name, sequence url, sequence w) -- HTML for one top link html_puts("\n") html_puts("\n") html_puts("" & name & "\n") html_puts("\n") html_puts("\n") if length(w) > 0 then html_puts("\n") end if end procedure procedure top_links() -- display the top links html_puts("\n") html_puts("\n") top_link("7", "Home", "index.html", "1") top_link("16", "What Is Euphoria?", "hotnew.htm", "1") top_link("14", "Documentation", "manual.htm", "1") top_link("7", "News", "news.htm", "1") top_link("14", "EUforum", "listserv.htm", "1") top_link("17", "Download Euphoria", "v20.htm", "1") top_link("18", "Instant Registration!", "reg.htm", "") html_puts("\n") html_puts("\n") html_puts("\n") for i = 1 to 6 do html_puts("\n") html_puts("\n") end for html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("
\n") html_puts("\n") html_puts("
\n") html_puts("
\n") html_puts("\n") html_puts("\n") top_link("23", "Recent User Contributions", "contrib.htm", "1") top_link("12", "The Archive", "archive.htm", "1") top_link("22", "Other Euphoria Web Sites", "othersit.htm", "1") top_link("16", "RDS Development", "contract.htm", "1") top_link("22", "Related Books & Software", "books.htm", "") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("
\n") for i = 1 to 4 do html_puts("\n") end for html_puts("
\n") end procedure procedure htmlHeader1() -- First batch of HTML html_puts("Content-type: text/html\n\n") html_puts("\n") html_puts("search results for Euphoria File Archive\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n\n") top_links() html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("
\n") html_puts("\n") html_puts("Search Results
\n\n") end procedure constant months = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"} procedure htmlHeader2() -- second batch of HTML html_puts("

\n") html_puts("

\n\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("
\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("
\n") html_puts("Search Again:\n") html_puts("\n") html_puts("\n") html_puts("DOS  \n") html_puts("WIN  \n") html_puts("LNX  \n") html_puts("GEN\n") html_puts("\n") html_puts("\n") html_puts("Powered by Euphoria\n") html_puts("
\n") html_puts("\n") html_puts("\n") html_puts("
\n") html_puts("\n") html_puts("Type one or more words.\n") html_puts("
\n") html_puts("
\n") html_puts("
\n") html_puts("

\n") end procedure procedure printTabHead() -- HTML for table of messages html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") end procedure function recent(integer month_num, integer year_num) -- Is the date in the past 6 months (roughly)? integer i, j sequence current_date i = year_num * 12 + month_num current_date = date() j = (current_date[1] + 1900) * 12 + current_date[2] if j - i <= 6 then return TRUE else return FALSE end if end function function strip_hash(sequence desc) -- remove separator between main and update descriptions integer m m = match("## ", desc) if m then desc = desc[1..m-1] & desc[m+3..length(desc)] end if return desc end function procedure display_entry(integer rec_num) -- display an archive entry using HTML sequence entry, thedate, desc entry = db_record_data(rec_num) html_puts("\n\n") html_printf("", entry[A_SIZE]) html_puts("\n") html_puts("\n") if entry[A_MONEY] = 0 then html_puts("\n") else html_printf("\n", entry[A_MONEY]/100) end if html_puts("\n") html_puts("\n") desc = entry[A_DESCRIPTION] if entry[A_UPDATED] then desc = strip_hash(desc) end if html_puts("") html_puts("\n\n") end procedure procedure printResult() -- output the entries that match integer t, tabSize, numLeft, numIteration if length(top_hits) = 0 then errMessage("No match found. Try again.") html_puts("\n\n") return end if puts(1, "

") html_printf("matched %d of %d entries", {nhits, totalCount}) if nhits > MAX_HITS then html_printf(" - displaying the best %d", MAX_HITS) end if html_puts("\n

\n") tabSize = OUT_CHUNK_SIZE -- number of entries in one table for speed if nhits < MAX_HITS then numLeft = nhits -- remaining number of entries still to be printed else numLeft = MAX_HITS end if t = 1 while numLeft > 0 do if numLeft <= tabSize then numIteration = numLeft numLeft = 0 else numIteration = tabSize numLeft -= tabSize end if -- Web browser must receive a complete table before it displays anything printTabHead() for k = 1 to numIteration do display_entry(top_hits[t][ENTRY]) t += 1 end for html_puts("\n

") if entry[A_PLATFORM] = 0 then html_puts("GEN") elsif entry[A_PLATFORM] = 1 then html_puts("DOS") elsif entry[A_PLATFORM] = 2 then html_puts("WIN") else html_puts("LNX") end if html_puts("") html_puts("" & entry[A_TITLE] & "%dK" & entry[A_NAME] & "") thedate = sprintf("%s %d/%02d", {months[entry[A_MONTH]], entry[A_DAY], remainder(entry[A_YEAR], 100)}) if entry[A_UPDATED] then if recent(entry[A_YEAR], entry[A_MONTH]) then html_puts("updated " & thedate) else html_puts(thedate) end if else html_puts(thedate) end if html_puts(" \"this" & " %.2f" & "", db_record_key(rec_num)) & "\"click" & "
" & desc & "\n") -- '\n' meaningful html_puts("
 
\n") end while html_puts("

 
\n") html_puts("") html_puts("

End of Search Results
\n") html_puts("\n\n") end procedure function hex_char(integer c) -- is c a valid hex character? return find(c, "0123456789ABCDEFabcdef") end function function hex_val(integer c) -- return value of a hex character if c >= 'A' and c <= 'F' then return 10 + c - 'A' elsif c >= 'a' and c <= 'f' then return 10 + c - 'a' else return c - '0' end if end function function parse_input(sequence s) -- crack the syntax sent from Web browser: aaa=bbb&ccc=ddd&... -- Convert to {{"aaa", "bbb"}, {"ccc", "ddd"}, ...} left-right pairs integer i, c sequence word_pairs, left_word, right_word word_pairs = {} i = 1 s &= {0,0,0} -- end markers while s[i] != 0 do left_word = "" while 1 do -- build left word c = s[i] if c = '=' or c = '&' or c = 0 then exit end if if c = '%' and hex_char(s[i+1]) and hex_char(s[i+2]) then c = 16 * hex_val(s[i+1]) + hex_val(s[i+2]) i += 2 elsif c = '+' then c = ' ' end if left_word &= c i += 1 end while i += 1 right_word = "" while 1 do -- build right word c = s[i] if c = '&' or c = 0 then exit end if if c = '%' and hex_char(s[i+1]) and hex_char(s[i+2]) then c = 16 * hex_val(s[i+1]) + hex_val(s[i+2]) i += 2 elsif c = '+' then c = ' ' end if right_word &= c i += 1 end while i += 1 word_pairs = append(word_pairs, {left_word, right_word}) end while return word_pairs end function function getKeywords() -- get values from the CGI query string, e.g. -- dos=on&keywords=apple+orange sequence keystring sequence key, pairs, var, val, d query = getenv("QUERY_STRING") if atom(query) then query = getenv("query_string") if atom(query) then errMessage("Internal Error - no query_string") end if end if if log_file != -1 then d = date() d[1] += 1900 printf(log_file, "%d-%d-%d %d:%02d\n%s\n", append(d[1..5], query)) flush(log_file) end if pairs = parse_input(query) keystring = "" for i = 1 to length(pairs) do var = lower(pairs[i][1]) val = pairs[i][2] if equal(var, "dos") then platforms[P_DOS] = TRUE elsif equal(var, "win") then platforms[P_WIN] = TRUE elsif equal(var, "lnx") then platforms[P_LNX] = TRUE elsif equal(var, "gen") then platforms[P_GEN] = TRUE elsif equal(var, "keywords") then keystring = val end if end for keywords = {} if length(keystring) = 0 then return "Enter one or more words for searching. Try again." end if -- make list of keywords from keystring key = "" keystring &= ' ' for i = 1 to length(keystring) do if keystring[i] = ' ' then if length(key) then keywords = append(keywords, key) key = "" end if elsif keystring[i] != '"' then key = append(key, keystring[i]) end if end for if length(keywords) = 0 then return "Type one or more keywords for search. Try again." end if return "" end function sequence msg htmlHeader1() msg = getKeywords() htmlHeader2() if length(msg) > 0 then errMessage(msg) end if flush(1) search() printResult() if log_file != -1 then stats() close(log_file) end if db_close()