#!./exu -- How to use EUforum Search -- -- Searches are not case-sensitive. -- -- Matching messages are displayed in order of score. Logical connectives -- such as "and" and "or" are not supported, but the scoring system always -- favors messages that contain many different search words, over messages -- that contain the same search word many times. For example, if you search -- for: -- object oriented -- -- then messages that contain both "object" and "oriented" (an exact match) -- will always rank higher than messages that contain only one of these words, -- even where there are many occurrences of that one word. -- -- The results will indicate the number of exact matches, versus the -- number of partial matches, and the exact matches will be listed first. -- -- To search for a phrase, put double-quotes around the words in the phrase. -- e.g "Euphoria programming language" -- -- Specifying "Posted by" will limit the search to messages where the -- "From:" line contains the string that you provide. If no keywords are -- specified, the messages by that poster will be sorted most recent first. -- -- If you specify neither "Posted By" nor "Keywords", you'll get a -- chronological listing of messages in the chosen date range, most -- recent message first. -- -- To extract all the messages for a given thread, click on the Subject -- line of any message in that thread. The complete thread will be -- displayed in chronological order. -- -- Recent messages in the search results (past week or so) can also be viewed -- on the EUforum message board. Just click the link -- "view this message in EUforum". This will also make it easier to -- reply to the message. -- -- How it works internally: -- -- Several years of EUforum messages (well over 100 Mb) are stored -- in monthly files on OpenEuphoria.org and are updated immediately -- when a new message is posted. -- -- The search is speeded up by having a master index file of all messages. -- It contains the poster's name and a special "signature" that records -- all the 2-letter combinations that exist in that message. The signature -- consists of 676 bits (26x26). When searching for a keyword, only the -- messages that have all the necessary 2-letter combinations -- contained in that keyword are searched. -- -- The search is also speeded up by keeping a cache of the most recent -- words that have been searched for, with all of the scoring information -- for those words. This is particularly helpful when the user moves -- to the next page of results, and also when he performs a modified -- search using some of the same words as before. without type_check include machine.e include file.e include get.e include wildcard.e as wild include dll.e -- one-byte tags for master index file constant TAG_MONTH = 1, TAG_SIGNATURE = 2, TAG_FROM = 3, TAG_OFFSET = 4 constant SIG_SIZE = 26 -- a..z constant SIG_LEN = floor(SIG_SIZE * SIG_SIZE / 8)+1 constant TRUE = 1, FALSE = 0 constant TO_LOWER = 'a' - 'A' constant SCORE = 1, LOCATION = 2 -- for top_hits constant OUT_CHUNK_SIZE = 5 -- number of messages to output per table constant LINE_WIDTH = 85 -- wrap output lines constant UNKNOWN = 255 constant BIG_VALUE = 1000 -- large value for matching a search word constant EOF = -1 constant COMPRESSED = FALSE -- is monthly data compressed? constant M_CODE="123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" -- one-char codes month & year type boolean(integer x) return x = TRUE or x = FALSE end type atom t0 t0 = time() sequence top_hits -- the best messages so far top_hits = {} sequence the_date the_date = date() the_date[1] += 1900 object query query = "" integer fromMonth, fromYear, toMonth, toYear sequence postedBy, keywords boolean thread -- is this a search to find members of a thread? thread = FALSE object first_res integer max_hits -- max number of matching messages to collect max_hits = 25 integer max_per_page max_per_page = 25 integer nhits, npartial, totalCount, scanned nhits = 0 npartial = 0 totalCount = 0 scanned = 0 integer log_file log_file = -1 -- open("esearch.log", "a") function crash(object x) -- in case of fire ... integer errfile errfile = open("ex.err", "a") puts(errfile, "\nquery is: " & query & '\n') close(errfile) -- send an e-mail containing ex.err system("mail -s \"esearch.exu crashed!\" rds@RapidEuphoria.com < ex.err > /dev/null", 2) return 0 end function crash_routine(routine_id("crash")) procedure log_msg(sequence text) -- record a message in the log if log_file != -1 then puts(log_file, text) puts(log_file, '\n') flush(log_file) end if end procedure function lower(sequence s) -- (quickly) convert a line to lower case integer c for i = 1 to length(s) do c = s[i] if c <= 'Z' then if c >= 'A' then s[i] = c + TO_LOWER end if end if end for return s end function function delete_leading_white(sequence s) -- delete leading whitespace while length(s) and find(s[1], " \t\n\r") do s = s[2..length(s)] end while return s end function function delete_trailing_white(sequence s) -- delete trailing whitespace while length(s) and find(s[length(s)], " \t\n\r") do s = s[1..length(s)-1] end while return s end function procedure html_puts(object s) -- write out some HTML puts(1, s) end procedure procedure html_printf(sequence format, object s) -- write out some formatted HTML printf(1, format, s) end procedure procedure stats() -- save some stats for performance analysis log_msg(sprintf("matched %d of %d, scanned %d, time: %.2f\n", {nhits, totalCount, scanned, time()-t0})) end procedure procedure warnMessage(sequence msg) -- issue a message but don't quit html_puts("

") html_printf("%s \n\n", {msg}) flush(1) log_msg(msg) stats() end procedure procedure errMessage(sequence msg) -- issue a fatal error message and quit warnMessage(msg) abort(1) end procedure sequence dat procedure save_words() -- save scoring info to the word cache integer fn, c, run, prev if thread then return -- we don't have the correct score, only 3 months anyway end if for i = 1 to length(keywords) do fn = open("words/word_" & keywords[i], "wb") if fn != -1 then puts(fn, int_to_bytes(length(dat[i]))) run = 0 prev = -1 -- use run-length compression on 0's and UNKNOWN's for j = 1 to length(dat[i]) do c = dat[i][j] if c = 0 or c = UNKNOWN then if prev = c then run += 1 if run = 256 then puts(fn, 255) puts(fn, c) run = 1 end if else if run then puts(fn, run) end if puts(fn, c) run = 1 end if else if run then puts(fn, run) end if puts(fn, c) run = 0 end if prev = c end for if run then puts(fn, run) end if close(fn) end if end for end procedure function terminate(integer x) -- handle SIG_TERM termination request from server warnMessage("Sorry, server was busy - try again\n") save_words() abort(1) return 0 end function -- set up termination handler atom terminate_addr terminate_addr = call_back(routine_id("terminate")) constant SIGTERM = 15 atom libc libc = open_dll("libc.so") integer signal if libc != 0 then signal = define_c_proc(libc, "signal", {C_INT, C_INT}) if signal != -1 then c_proc(signal, {SIGTERM, terminate_addr}) end if end if integer current_month, current_year integer current_file current_month = -1 current_year = -1 current_file = -1 sequence current_name function msg_open(sequence filename) -- open a monthly message file for reading if COMPRESSED then system("zcat " & filename & ".gz > msgtemp.TXT", 2) return open("msgtemp.TXT", "r") else return open(filename, "r") end if end function function open_month(integer year, integer month) -- return file handle for a given month's data if year = current_year and month = current_month then return current_file end if if current_file > 0 then close(current_file) end if current_name = sprintf("%4d%02d.TXT", {year, month}) current_file = msg_open("../mlist/" & current_name) if current_file <= 0 or getc(current_file) = -1 then current_file = open("../mlist/current.txt", "r") if current_file <= 0 then errMessage("Couldn't open current.txt, or " & current_name) end if end if current_month = month current_year = year return current_file end function constant BASE_MONTH = 1996*12 + 6 procedure AddHit(sequence counts, sequence location, integer chars_per_line) -- record the score for a message -- keep track of the best scores and messages atom score integer p, byte_count, age byte_count = location[4] * chars_per_line score = 0 if length(keywords) = 0 then -- we don't sort by message content score = 1 else if byte_count < 20 then return -- some garbage messages end if age = location[1]*12 + location[2] - BASE_MONTH -- score per word, counts[i] has max 254 for i = 1 to length(counts) do if counts[i] then -- first priority: number of *unique* words matched: score += BIG_VALUE * (counts[i] > 0) -- second priority: total number of words matched, -- full vs. partial matches, -- adjusted for message length, -- and slightly favoring newer messages score += sqrt(counts[i] / (4000+10*byte_count)) * (1 + .001 * age) end if end for if score = 0 then return end if if thread then score = BIG_VALUE + 1 -- same for all messages end if end if nhits += 1 if score < length(counts) * BIG_VALUE then npartial += 1 end if if length(top_hits) = 0 then top_hits = {{score, location}} else p = 0 for i = length(top_hits) to 1 by -1 do if thread then if score <= top_hits[i][SCORE] then -- earliest first p = i exit end if else if score < top_hits[i][SCORE] then -- latest first p = i exit end if end if end for if p != max_hits then top_hits = append(top_hits[1..p], {score, location}) & top_hits[p+1..length(top_hits)] if length(top_hits) > max_hits then top_hits = top_hits[1..length(top_hits)-1] end if end if end if end procedure sequence LETTER LETTER = repeat(FALSE, 256) for c = 0 to 255 do if (c >= '0' and c <= '9') or (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') then LETTER[c+1] = TRUE end if end for function scan(sequence keywords, integer year, integer month, integer offset, integer line_count) -- scan one message for all possible keywords -- return the scores for the various words - (10,7,5,0) per occurrence integer f, n, c, p, p2, len boolean white_before, white_after object line sequence tline sequence counts object word atom count f = open_month(year, month) if seek(f, offset) != 0 then errMessage(sprintf("seek to %d failed in %d/%d", {offset, year, month})) end if scanned += 1 counts = repeat(0, length(keywords)) line = gets(f) if match("X-EUFORUM:", line) then -- ignore it line = gets(f) end if for i = 1 to line_count do if atom(line) then exit end if n = length(line) -- convert line to lower case - in-lined for speed for j = 1 to n-1 do c = line[j] if c <= 'Z' then if c >= 'A' then c += TO_LOWER line[j] = c end if end if end for for j = 1 to length(counts) do -- count number of occurrences of word - in-lined for speed word = keywords[j] count = 0 tline = line while TRUE do if atom(word) then exit end if p = match(word, tline) -- should fail 99% of the time if thread then p2 = match("subject:", line) if p2 = 1 then -- subject line if p > 8 then return {1} -- always just one word else return {0} -- no point in continuing end if else p = 0 end if end if if p = 0 then exit end if len = length(word) if p = 1 then white_before = length(tline) = length(line) else white_before = not LETTER[1+tline[p-1]] end if if (p + len > length(tline)) or not LETTER[1+tline[p+len]] then white_after = TRUE else white_after = FALSE end if if white_before and white_after then count += 10 -- full credit: complete word match elsif len > 3 then -- short words must match complete words if white_before or white_after then count += 7 -- prefix or suffix else count += 5 -- middle of a substring end if end if tline = tline[p+len..$] end while counts[j] += count end for line = gets(f) end for return counts end function function sig_match(sequence word0, sequence signature) -- return TRUE if word matches the message signature integer index, p, bit for j = 1 to length(word0) do -- check if bit is ON index = word0[j] p = floor(index/8)+1 bit = power(2, and_bits(index, 7)) if and_bits(signature[p], bit) = 0 then return FALSE end if end for return TRUE end function integer master master = open("../mlist/master", "rb") if master = -1 then errMessage("Couldn't open master file") end if constant WORD_CACHE_SIZE = 1000 -- retain this many word data files procedure trim_cache() -- remove surplus word data files integer oldest, n sequence word_files sequence oldest_date word_files = dir("words") n = length(word_files) while n > WORD_CACHE_SIZE + 2 do -- find minimum date oldest = 0 oldest_date = {9999} for i = 1 to length(word_files) do if match("word_", word_files[i][D_NAME]) then if compare(oldest_date, word_files[i][D_YEAR..D_SECOND]) > 0 then oldest = i oldest_date = word_files[i][D_YEAR..D_SECOND] end if end if end for if oldest then system("rm \"words/" & word_files[oldest][D_NAME] & '"', 2) word_files[oldest][D_NAME] = "" n -= 1 else exit end if end while end procedure constant NUM_MESSAGES = 80000 -- at least this many in database procedure search() -- top level search integer month, year, prev_year, offset, c, run, j boolean skip, look_for integer fn, m, firstm, lastm, line_count, chars_per_line, len, message_num integer a, b, index sequence lc, off, res, word sequence from, signature sequence keywords0, possible, s keywords = wild:lower(keywords) -- general lower keywords0 = repeat({}, length(keywords)) for i = 1 to length(keywords) do word = keywords[i] for k = 1 to length(word)-1 do if word[k] >= 'a' and word[k] <= 'z' and word[k+1] >= 'a' and word[k+1] <= 'z' then -- a 2-letter combo - record the index a = remainder(word[k], SIG_SIZE) b = remainder(word[k+1], SIG_SIZE) index = a * SIG_SIZE + b keywords0[i] = append(keywords0[i], index) end if end for end for postedBy = lower(postedBy) firstm = fromYear*12+fromMonth lastm = toYear*12+toMonth off = repeat(0, 4) lc = repeat(0, 3) if first_res = 1 then html_puts("Searching ...\n") end if prev_year = -1 -- load up word score data files from previous searches dat = repeat(0, length(keywords)) for i = 1 to length(dat) do if thread then fn = -1 -- can't use saved scores log_msg("THREAD") else fn = open("words/word_" & keywords[i], "rb") end if if fn != -1 then log_msg("cache HIT: " & keywords[i]) s = get_bytes(fn, 4) if length(s) = 4 then len = bytes_to_int(s) dat[i] = repeat(UNKNOWN, len) j = 1 while j <= len do c = getc(fn) if c = EOF then log_msg(sprintf(" - incomplete#1: %d/%d", {j, len})) exit -- truncated end if if c = 0 or c = UNKNOWN then run = getc(fn) if run = EOF then log_msg(sprintf(" - incomplete#2: %d/%d", {j, len})) exit end if if c = 0 then dat[i][j..j+run-1] = 0 end if j += run else dat[i][j] = c j += 1 end if end while else -- corrupt file dat[i] = repeat(UNKNOWN, NUM_MESSAGES) end if close(fn) else if not thread then log_msg("cache MISS: " & keywords[i]) end if dat[i] = repeat(UNKNOWN, NUM_MESSAGES) end if end for message_num = 0 while 1 do c = getc(master) if c = EOF then exit end if if c = TAG_MONTH then -- month header month = getc(master) year = getc(master) + 1900 c = getc(master) if c = EOF then errMessage("unexpected EOF") end if m = year*12+month if m >= firstm and m <= lastm then skip = FALSE if first_res = 1 and year != prev_year then html_printf("%d ...\n", year) flush(1) prev_year = year end if else skip = TRUE end if end if if c != TAG_FROM then errMessage("expected TAG_FROM") end if from = gets(master) c = getc(master) if c != TAG_SIGNATURE then errMessage("expected TAG_SIGNATURE") end if signature = get_bytes(master, SIG_LEN) c = getc(master) if c != TAG_OFFSET then errMessage("expected TAG_OFFSET") end if -- read offset off[1] = getc(master) off[2] = getc(master) off[3] = getc(master) off[4] = getc(master) -- read line count lc[1] = getc(master) lc[2] = getc(master) lc[3] = getc(master) chars_per_line = getc(master) message_num += 1 if not skip then -- date is ok totalCount += 1 -- number of messages in date range if length(postedBy) = 0 or match(postedBy, from) then -- poster is ok possible = repeat(0, length(keywords)) look_for = FALSE for i = 1 to length(keywords) do if message_num > length(dat[i]) then dat[i] &= repeat(UNKNOWN, 500 + message_num - length(dat[i])) end if if dat[i][message_num] = UNKNOWN then if sig_match(keywords0[i], signature) then -- must scan for this word in the message possible[i] = keywords[i] look_for = TRUE else -- no scan, score is 0 dat[i][message_num] = 0 end if end if end for offset = bytes_to_int(off) line_count = lc[1] + 256 * lc[2] + 65536 * lc[3] if look_for then -- we have to scan this message for certain words res = scan(possible, year, month, offset, line_count) for i = 1 to length(keywords) do if sequence(possible[i]) then -- record what we learned if res[i] >= UNKNOWN then res[i] = UNKNOWN-1 -- score per word is max 254 end if dat[i][message_num] = res[i] else res[i] = dat[i][message_num] end if end for else -- no words to scan for res = repeat(0, length(keywords)) for i = 1 to length(keywords) do res[i] = dat[i][message_num] end for end if AddHit(res, {year, month, offset, line_count}, chars_per_line) end if end if end while -- save word data information save_words() end procedure procedure top_link(sequence percent, sequence name, sequence url, sequence w) -- HTML for one top link html_puts("\n") html_puts("\n") html_puts("" & name & "\n") html_puts("\n") html_puts("\n") if length(w) > 0 then html_puts("\n") end if end procedure procedure top_links() -- display the top links html_puts("\n") html_puts("\n") top_link("7", "Home", "index.html", "1") top_link("16", "What Is Euphoria?", "hotnew.htm", "1") top_link("14", "Documentation", "manual.htm", "1") top_link("7", "News", "news.htm", "1") top_link("14", "EUforum", "listserv.htm", "1") top_link("17", "Download Euphoria", "v20.htm", "1") top_link("18", "Donate", "reg.htm", "") html_puts("\n") html_puts("\n") html_puts("\n") for i = 1 to 6 do html_puts("\n") html_puts("\n") end for html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("
\n") html_puts("\n") html_puts("
\n") html_puts("
\n") html_puts("\n") html_puts("\n") top_link("23", "Recent User Contributions", "contrib.htm", "1") top_link("12", "The Archive", "archive.htm", "1") top_link("22", "Other Euphoria Web Sites", "othersit.htm", "1") top_link("16", "RDS Development", "contract.htm", "1") top_link("22", "Related Books & Software", "books.htm", "") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("
\n") for i = 1 to 4 do html_puts("\n") end for html_puts("
\n") end procedure procedure htmlHeader1() -- First batch of HTML html_puts("Content-type: text/html\n\n") html_puts("\n") html_puts("search results for EUforum\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n\n") top_links() html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("
\n") html_puts("\n") html_puts("Search Results
\n") end procedure constant month_codes = "123456789ABC" constant months = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"} sequence keystring procedure htmlHeader2() -- second batch of HTML sequence selected html_puts("

\n") html_puts("

\n\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("
\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") puts(1, "\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("
\n") html_puts("EUforum Search\n") html_puts("\n") html_puts("\n") html_puts("Powered by Euphoria\n") html_puts("
\n") html_puts("Starting:
\n") html_puts("\n") html_puts("\n") html_puts("
\n") html_puts("Ending:
\n") html_puts("\n") html_puts("\n") html_puts("
\n") html_puts("Posted by:
\n") html_puts("\n
\n") html_puts("Keywords:
\n") html_puts("
\n") html_puts("
\n\n") html_puts("
\n\n") html_puts("

\n") end procedure function from_line(sequence low_line) -- Is this the FROM: line? integer f, b f = match("from:", low_line) if f = 0 then return FALSE end if b = find('>', low_line) if b = 0 then return TRUE end if return f <= 4 and f < b end function boolean made_bold function insert_bold(sequence line, sequence keyword, integer small) -- mark all occurrences of keyword with ... -- note: after inserting ... we'll have a glitch if -- a subsequent keyword happens to be 'b' integer p, p1 integer len boolean good, white_before, white_after sequence low_line low_line = lower(line) len = length(keyword) p = 1 while 1 do p1 = match(keyword, low_line[p..length(low_line)]) if p1 = 0 then exit end if p += p1 - 1 if len <= small then if p = 1 or not LETTER[1+low_line[p-1]] then white_before = TRUE else white_before = FALSE end if if p+len > length(low_line) or not LETTER[1+low_line[p+len]] then white_after = TRUE else white_after = FALSE end if if white_before and white_after then good = TRUE else good = FALSE p += len end if else good = TRUE end if if good then made_bold = TRUE line = line[1..p-1] & "" & line[p..p+len-1] & "" & line[p+len..length(line)] low_line = lower(line) p += 3+len+4 end if end while return line end function boolean found_subject sequence subject procedure subject_underline(sequence line, integer year, integer month) -- add the href to the subject line integer start integer m1, m2, y1, y2 if line[length(line)] = '\n' then line = line[1..length(line)-1] end if start = find(':', line)+1 -- must be there while start <= length(line) do if not find(line[start], " \t\n\r") then exit end if start += 1 end while html_puts(line[1..start-1]) html_puts("") if thread then html_puts("") end if html_puts(line[start..length(line)]) if thread then html_puts("") end if html_puts("\n") end procedure function blank_plus(sequence s) -- replace blanks by plus signs -- and various other characters by hex codes sequence new_s new_s = "" for i = 1 to length(s) do if find(s[i], " \t\r") then new_s &= '+' elsif s[i] = '?' then new_s &= "%3F" elsif s[i] = '&' then new_s &= "%26" elsif s[i] = '%' then new_s &= "%25" elsif s[i] = '+' then new_s &= "%2B" elsif s[i] = '#' then new_s &= "%23" else new_s &= s[i] end if end for return new_s end function function delete_re(sequence s) -- remove all "Re:" etc integer r sequence sl sl = lower(s) while TRUE do r = match("re:", lower(s)) if r then s = s[1..r-1] & s[r+3..length(s)] else exit end if end while return s end function function set_subject(sequence line) -- set the subject words, minus any "Re:" etc. integer substart if not found_subject then substart = match("subject:", lower(line)) if substart = 1 then found_subject = TRUE subject = line[substart+8..length(line)] subject = delete_re(subject) subject = delete_leading_white(delete_trailing_white(subject)) subject = blank_plus(subject) return TRUE else return FALSE end if end if return FALSE -- never reached??? end function boolean found_from procedure show_bold(sequence text, integer year, integer month) -- display one line with keywords in bold sequence low_line sequence line boolean sub sub = set_subject(text) line = "" for i = 1 to length(text) do -- replace < and > if text[i] = '<' then line &= "<" elsif text[i] = '>' then line &= ">" else line &= text[i] end if end for low_line = lower(line) if not thread then if length(postedBy) and not found_from and from_line(low_line) then line = insert_bold(line, postedBy, 0) found_from = TRUE else for k = 1 to length(keywords) do line = insert_bold(line, keywords[k], 3) end for end if end if if sub then subject_underline(line, year, month) else html_puts(line) end if end procedure procedure printTabHead() -- HTML for table of messages html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") html_puts("\n") end procedure procedure printResult() -- output the entries that match integer count, t, tabSize, numLeft, numIteration, e, m integer prev_t, chop, fn, year, month, offset, line_count, fscore atom score boolean no_hit_in_msg object line, euforum_num sequence s, match_type if length(top_hits) = 0 then errMessage("No match found. Try again.") html_puts("\n\n") return end if no_hit_in_msg = FALSE html_puts("

") if thread then if nhits = 1 then s = "" else s = "s" end if html_printf("%d message%s in thread", {nhits,s}) else if length(keywords) > 1 then html_puts("exactly ") end if html_printf("matched %d", nhits-npartial) if npartial > 0 then html_printf(", partially matched %d", npartial) end if html_printf(" of %d messages in date range", totalCount) end if tabSize = OUT_CHUNK_SIZE -- number of entries in one table for speed if nhits - first_res + 1 < max_per_page then numLeft = nhits - first_res + 1 else numLeft = max_per_page end if if nhits > max_per_page then html_printf(" - displaying results %d to %d", {first_res, first_res + numLeft - 1}) end if html_puts("\n

\n") t = first_res count = 0 while numLeft > 0 do if numLeft <= tabSize then numIteration = numLeft numLeft = 0 else numIteration = tabSize numLeft -= tabSize end if -- do a set in one table so browser can start printing it printTabHead() for k = 1 to numIteration do count += 1 found_from = FALSE found_subject = FALSE score = top_hits[t][SCORE] year = top_hits[t][LOCATION][1] month = top_hits[t][LOCATION][2] fn = open_month(year, month) if fn <= 0 then return end if offset = top_hits[t][LOCATION][3] if seek(fn, offset) != 0 then errMessage(sprintf("seek to %d failed in %d/%d", {offset, year, month})) end if line_count = top_hits[t][LOCATION][4] line = gets(fn) euforum_num = {} m = match("X-EUFORUM: ", line) if m then if year*12 + month >= the_date[1]*12 + the_date[2] - 3 then -- fairly recent, might be in EUforum line = line[m+11..length(line)] euforum_num = value(line) if euforum_num[1] = GET_SUCCESS then euforum_num = euforum_num[2] e = open(sprintf("../EUforum/m%d.html", euforum_num), "r") if e != -1 then close(e) else euforum_num = {} -- message not available anymore end if end if end if line = gets(fn) -- skip to real first line end if fscore = floor(score/BIG_VALUE) if fscore = length(keywords) then match_type = "exact match" else match_type = "partial match" end if if atom(euforum_num) then html_printf("

\n\n", euforum_num) else html_printf("\n") end if if remainder(count, 2) = 1 then html_puts("\n") prev_t = t t += 1 end for html_puts("\n
" & " # %d", t) if length(keywords) > 1 then html_printf("  %s %d/%d", {match_type, fscore, length(keywords)}) end if html_printf("view this message in " & "" & "EUforum  
" & " # %d", t) if length(keywords) > 1 then html_printf("  %s %d/%d", {match_type, fscore, length(keywords)}) end if html_puts("
")
	    else
		html_puts("
")
	    end if
	    
	    made_bold = FALSE
	    for i = 1 to line_count do
		if atom(line) then
		    exit
		end if
		while length(line) > LINE_WIDTH do
		    chop = LINE_WIDTH
		    for j = 1 to 7 do
			if find(line[chop], " \t\n\r,.)]") then
			    exit
			end if
			chop -= 1
		    end for
		    if chop = LINE_WIDTH-7 then
			chop = LINE_WIDTH
		    end if
		    
		    show_bold(line[1..chop], year, month)
		    html_puts('\n')
		    line = line[chop+1..length(line)]
		end while
		show_bold(line, year, month)
		line = gets(fn)
	    end for
	    if not made_bold then
	        no_hit_in_msg = TRUE -- something is wrong
	    end if
	    html_puts("
\n") end while html_puts("

 
\n") html_puts("") count = nhits - first_res + 1 if nhits > max_per_page then -- more than one page of results chop = match("&first_res=", query) if chop then -- remove old one while TRUE do query = query[1..chop-1] & query[chop+1..length(query)] if chop > length(query) or query[chop] = '&' then exit end if end while end if count -= max_per_page html_puts("

") if first_res > max_per_page then html_puts("<= Previous %d", {first_res - max_per_page, max_per_page}) html_puts(" Results\n") end if if count > 0 then -- more results coming after this page html_puts("     max_per_page then html_printf("&first_res=%d>Next %d", {first_res + max_per_page, max_per_page}) else html_printf("&first_res=%d\">Final %d", {first_res + max_per_page, count}) end if html_puts(" Results =>") end if html_puts("
\n") else html_puts("
End of Search Results
\n") end if html_puts("\n

 \n") if no_hit_in_msg and not thread and length(keywords) then -- probably a word-wrap issue, could be on any page of results -- viewed by the user -- system("echo \"" & query & "\" > ex.err", 2) -- system( -- "mail -s \"esearch.exu no_hit_in_msg!\" rds@RapidEuphoria.com < ex.err > /dev/null", -- 2) end if end procedure function hex_char(integer c) -- is c a valid hex character? return find(c, "0123456789ABCDEFabcdef") end function function hex_val(integer c) -- return value of a hex character if c >= 'A' and c <= 'F' then return 10 + c - 'A' elsif c >= 'a' and c <= 'f' then return 10 + c - 'a' else return c - '0' end if end function function parse_input(sequence s) -- crack the syntax sent from Web browser: aaa=bbb&ccc=ddd&... -- Convert to {{"aaa", "bbb"}, {"ccc", "ddd"}, ...} left-right pairs integer i, c sequence word_pairs, left_word, right_word word_pairs = {} i = 1 s &= {0,0,0} -- end markers while s[i] != 0 do left_word = "" while 1 do -- build left word c = s[i] if c = '=' or c = '&' or c = 0 then exit end if if c = '%' and hex_char(s[i+1]) and hex_char(s[i+2]) then c = 16 * hex_val(s[i+1]) + hex_val(s[i+2]) i += 2 elsif c = '+' then c = ' ' end if left_word &= c i += 1 end while i += 1 right_word = "" while 1 do -- build right word c = s[i] if c = '&' or c = 0 then exit end if if c = '%' and hex_char(s[i+1]) and hex_char(s[i+2]) then c = 16 * hex_val(s[i+1]) + hex_val(s[i+2]) i += 2 elsif c = '+' then c = ' ' end if right_word &= c i += 1 end while i += 1 word_pairs = append(word_pairs, {left_word, right_word}) end while return word_pairs end function function getKeywords() -- get values from the CGI query string, e.g. -- fromMonth=6&fromYear=1&toMonth=3&toYear=7&postedBy=rds&keywords=apple+orange sequence key, pairs, var, val boolean inquote query = getenv("QUERY_STRING") if atom(query) then query = getenv("query_string") if atom(query) then errMessage("Internal Error - no query_string") end if end if log_msg(sprintf("%d-%d-%d %d:%02d\n%s", append(the_date[1..5], query))) pairs = parse_input(query) -- defaults in case of corrupted query string fromMonth = 6 fromYear = 1996 toMonth = the_date[2] toYear = the_date[1] keystring = "" postedBy = "" first_res = 1 for i = 1 to length(pairs) do var = lower(pairs[i][1]) val = pairs[i][2] if equal(var, "frommonth") and length(val) then if val[1] >= 'A' then fromMonth = val[1] - 'A' + 10 else fromMonth = val[1] - '0' end if elsif equal(var, "fromyear") and length(val) then if val[1] >= 'A' then fromYear = 1995 + val[1] - 'A' + 10 else fromYear = 1995 + val[1] - '0' end if elsif equal(var, "tomonth") and length(val) then if val[1] >= 'A' then toMonth = val[1] - 'A' + 10 else toMonth = val[1] - '0' end if elsif equal(var, "toyear") and length(val) then if val[1] >= 'A' then toYear = 1995 + val[1] - 'A' + 10 else toYear = 1995 + val[1] - '0' end if elsif equal(var, "postedby") then postedBy = val while length(postedBy) > 0 and (postedBy[1] = ' ' or postedBy[1] = '\t') do postedBy = postedBy[2..length(postedBy)] end while elsif equal(var, "thread") then thread = TRUE elsif equal(var, "keywords") then keystring = val elsif equal(var, "first_res") then first_res = value(val) if first_res[1] = GET_SUCCESS then first_res = first_res[2] max_hits += first_res-1 else first_res = 1 end if end if end for if fromYear = 1996 and fromMonth < 6 then fromMonth = 6 end if keywords = {} if fromYear * 100 +fromMonth > toYear*100 + toMonth then return "Starting month/year must be earlier or equal to ending month/year" end if -- make list of keywords from keystring key = "" if thread then -- just one string to search for (and it might contain double-quotes) if length(keystring) and keystring[1]='"' then keystring = keystring[2..$] end if if length(keystring) and keystring[$] = '"' then keystring = keystring[1..$-1] end if if length(keystring) then keywords = {keystring} else keywords = {} end if else keystring &= ' ' inquote = FALSE for i = 1 to length(keystring) do if keystring[i] = '"' then inquote = not inquote elsif not inquote and keystring[i] = ' ' then if length(key) then keywords = append(keywords, key) key = "" end if elsif not find(keystring[i], "`'&") then key = append(key, keystring[i]) end if end for keystring = keystring[1..length(keystring)-1] end if return "" end function sequence msg htmlHeader1() msg = getKeywords() htmlHeader2() if length(msg) > 0 then errMessage(msg) end if flush(1) search() log_msg(sprintf("search completed, time: %.2f", time()-t0)) printResult() flush(1) stats() if not thread and remainder(the_date[5], 10) = 0 then trim_cache() end if