#!/bin/sh # standard sh program for building queryable MG collections # set no_tags to -G to exclude SGML tags from index no_tags= # change stem_case to change stemming/casefold: # -s 0 = no casefold or stemming # -s 1 = casefold, no stemming # -s 2 = stemming, no casefold # -s 3 = stemming and casefold stem_case=1 # if MG 1.2 is located somewhere else, change this appropriately PATH=/usr/local/mg-1.2/bin:$PATH # set source to collection text generation command + arguments source= # set text to name of collection text= # change dir to explicit directory of collection # if script is not in the same dir as the collection # or if script may not be executed from the collection directory dir=. # make generated files have 777 permission so # future users can regenerate the collection w/o # having to chown or chmod umask 000 set -ex # exit if any of the following fail, print commands cd ${dir} touch ${text}.rebuilding ${source} | mg_passes ${no_tags} -s ${stem_case} -T1 -I1 -t 1 -d . -f ${text} mg_compression_dict -d . -f ${text} mg_perf_hash_build -d . -f ${text} ${source} | mg_passes ${no_tags} -s ${stem_case} -T2 -I2 -t 1 -d . -f ${text} mg_weights_build -d . -f ${text} mg_invf_dict -d . -f ${text} mg_fast_comp_dict -d . -f ${text} rm ${text}.rebuilding # remove files not needed by mgquery rm ${text}.text.dict rm ${text}.invf.dict rm ${text}.invf.idx rm ${text}.text.stats rm ${text}.text.idx rm ${text}.weight rm ${text}.invf.chunk rm ${text}.invf.chunk.trans rm ${text}.invf.dict.hash rm ${text}.trace # create .lastbuild file for future reference echo $0 $@ > ${text}.lastbuild