#!./exu -- How to use EUforum Search -- -- Searches are not case-sensitive. -- -- Matching messages are displayed in order of score. Logical connectives -- such as "and" and "or" are not supported, but the scoring system always -- favors messages that contain many different search words, over messages -- that contain the same search word many times. For example, if you search -- for: -- object oriented -- -- then messages that contain both "object" and "oriented" (an exact match) -- will always rank higher than messages that contain only one of these words, -- even where there are many occurrences of that one word. -- -- The results will indicate the number of exact matches, versus the -- number of partial matches, and the exact matches will be listed first. -- -- To search for a phrase, put double-quotes around the words in the phrase. -- e.g "Euphoria programming language" -- -- Specifying "Posted by" will limit the search to messages where the -- "From:" line contains the string that you provide. If no keywords are -- specified, the messages by that poster will be sorted most recent first. -- -- If you specify neither "Posted By" nor "Keywords", you'll get a -- chronological listing of messages in the chosen date range, most -- recent message first. -- -- To extract all the messages for a given thread, click on the Subject -- line of any message in that thread. The complete thread will be -- displayed in chronological order. -- -- Recent messages in the search results (past week or so) can also be viewed -- on the EUforum message board. Just click the link -- "view this message in EUforum". This will also make it easier to -- reply to the message. -- -- How it works internally: -- -- Several years of EUforum messages (well over 100 Mb) are stored -- in monthly files on OpenEuphoria.org and are updated immediately -- when a new message is posted. -- -- The search is speeded up by having a master index file of all messages. -- It contains the poster's name and a special "signature" that records -- all the 2-letter combinations that exist in that message. The signature -- consists of 676 bits (26x26). When searching for a keyword, only the -- messages that have all the necessary 2-letter combinations -- contained in that keyword are searched. -- -- The search is also speeded up by keeping a cache of the most recent -- words that have been searched for, with all of the scoring information -- for those words. This is particularly helpful when the user moves -- to the next page of results, and also when he performs a modified -- search using some of the same words as before. without type_check include machine.e include file.e include get.e include wildcard.e as wild include dll.e -- one-byte tags for master index file constant TAG_MONTH = 1, TAG_SIGNATURE = 2, TAG_FROM = 3, TAG_OFFSET = 4 constant SIG_SIZE = 26 -- a..z constant SIG_LEN = floor(SIG_SIZE * SIG_SIZE / 8)+1 constant TRUE = 1, FALSE = 0 constant TO_LOWER = 'a' - 'A' constant SCORE = 1, LOCATION = 2 -- for top_hits constant OUT_CHUNK_SIZE = 5 -- number of messages to output per table constant LINE_WIDTH = 85 -- wrap output lines constant UNKNOWN = 255 constant BIG_VALUE = 1000 -- large value for matching a search word constant EOF = -1 constant COMPRESSED = FALSE -- is monthly data compressed? constant M_CODE="123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" -- one-char codes month & year type boolean(integer x) return x = TRUE or x = FALSE end type atom t0 t0 = time() sequence top_hits -- the best messages so far top_hits = {} sequence the_date the_date = date() the_date[1] += 1900 object query query = "" integer fromMonth, fromYear, toMonth, toYear sequence postedBy, keywords boolean thread -- is this a search to find members of a thread? thread = FALSE object first_res integer max_hits -- max number of matching messages to collect max_hits = 25 integer max_per_page max_per_page = 25 integer nhits, npartial, totalCount, scanned nhits = 0 npartial = 0 totalCount = 0 scanned = 0 integer log_file log_file = -1 -- open("esearch.log", "a") function crash(object x) -- in case of fire ... integer errfile errfile = open("ex.err", "a") puts(errfile, "\nquery is: " & query & '\n') close(errfile) -- send an e-mail containing ex.err system("mail -s \"esearch.exu crashed!\" rds@RapidEuphoria.com < ex.err > /dev/null", 2) return 0 end function crash_routine(routine_id("crash")) procedure log_msg(sequence text) -- record a message in the log if log_file != -1 then puts(log_file, text) puts(log_file, '\n') flush(log_file) end if end procedure function lower(sequence s) -- (quickly) convert a line to lower case integer c for i = 1 to length(s) do c = s[i] if c <= 'Z' then if c >= 'A' then s[i] = c + TO_LOWER end if end if end for return s end function function delete_leading_white(sequence s) -- delete leading whitespace while length(s) and find(s[1], " \t\n\r") do s = s[2..length(s)] end while return s end function function delete_trailing_white(sequence s) -- delete trailing whitespace while length(s) and find(s[length(s)], " \t\n\r") do s = s[1..length(s)-1] end while return s end function procedure html_puts(object s) -- write out some HTML puts(1, s) end procedure procedure html_printf(sequence format, object s) -- write out some formatted HTML printf(1, format, s) end procedure procedure stats() -- save some stats for performance analysis log_msg(sprintf("matched %d of %d, scanned %d, time: %.2f\n", {nhits, totalCount, scanned, time()-t0})) end procedure procedure warnMessage(sequence msg) -- issue a message but don't quit html_puts("
") html_printf("%s \n