#!/usr/bin/perl
#
#           RiSearch
#
# web search engine, version 0.99.08
# (c) Sergej Tarasov, 2000-2001
#
# Homepage: http://risearch.org/
# email: risearch@risearch.org
# Last modified: 11.11.2002


$| = 1;

print "Content-Type: text/html\n\n";

require './config.cgi';

($DAY, $MONTH, $YEAR, $HOUR, $MINUTE) = (localtime)[3,4,5,2,1];
$date = sprintf "%04d.%02d.%02d", $YEAR+1900, $MONTH+1, $DAY;

$code = "\${\$_[0]} =~ tr/-+!a-zA-Z$CAP_LETTERS$LOW_LETTERS$numbers/ /cs;";
$remove_non_alphabetic = eval "sub { $code }";

$code = "\${\$_[0]} =~ tr/A-Z$CAP_LETTERS/a-z$LOW_LETTERS/;";
$to_lower_case = eval "sub { $code }";

&read_template("template.htm");
&get_query();

if (scalar(@query) != 0) {
    &get_results();
    &boolean();
    eval { ($search_time,undef,undef,undef) = times };
    if ($@) { $search_time = $@ }
}

print &print_template("header");
if (scalar(@query) != 0) {
    if ($rescount>0) {
        print &print_template("results_header");
        &print_results();
        print &print_template("results_footer");
    } else {
        print &print_template("no_results");
    }
} else {
    print &print_template("empty_query");
}
print &print_template("footer");

#===================================================================

sub get_query {
if($ENV{'REQUEST_METHOD'} eq 'GET'){ 
   $query=$ENV{'QUERY_STRING'};
   }
 elsif($ENV{'REQUEST_METHOD'} eq 'POST'){
   read(STDIN, $query, $ENV{'CONTENT_LENGTH'});
   }


@formfields=split /&/,$query;

$stype = "AND";
foreach(@formfields){
   if(/^query=(.*)/) {$ndquery=$1}
   if(/^stpos=(.*)/) {$stpos=$1}
   if(/^stype=(.*)/) {$stype=$1}
   if(/^z=(.*)/)     {$zones{$1}++ if ($1 != 0)}
   }
$query = urldecode($ndquery);


&$to_lower_case(\$query);
$query =~ s/(&[a-zA-Z0-9#]*?;)/&esc2char($1)/eg;

if ( ($stpos == 0) && ($create_log eq "YES") ) {
    open QUERY, ">>log/$date";
    print QUERY "$query\n";
    close(QUERY);
}

&$remove_non_alphabetic(\$query);
@dum = split / /,$query;
@query = ();
foreach $dum (@dum) {
   if (exists($stop_words{$dum})) { next }
   if (length($dum) >= $min_length) { $query[$#query+1] = $dum }
}
for ($i=0; $i<scalar(@query); $i++) {
   if ($query[$i] =~ /\!/)   { $wholeword[$i] = 1;} # WholeWord
   $query[$i] =~s/[\! ]//g;
   if ($stype eq "AND")     { $querymode[$i] = 2;} # AND
   if ($query[$i] =~ /^\-/) { $querymode[$i] = 1;} # NOT
   if ($query[$i] =~ /^\+/) { $querymode[$i] = 2;} # AND
   $query[$i] =~s/^[\+\- ]//g;
}

if ($stpos <0) {$stpos = 0};
}
#===================================================================

sub get_results {

open HASH, "$HASH" or &my_die("Died: could not open $HASH");
binmode(HASH);
open HASHWORDS, "$HASHWORDS" or &my_die("Died: could not open $HASHWORDS");
binmode(HASHWORDS);
open SITEWORDS, "$SITEWORDS" or &my_die("Died: could not open $SITEWORDS");
open FINFO, "$FINFO" or &my_die("Died: could not open $FINFO");
open WORD_IND, "$WORD_IND" or &my_die("Died: could not open $WORD_IND");
binmode(WORD_IND);


@allres = ();
my $query = "";

for ($j=0; $j<scalar(@query); $j++) {
    $query = @query[$j];
    @{$allres[$j]} = ();
    
    if ($INDEXING_SCHEME == 1) {
    	$substring_length = length($query)
    } else {
    	$substring_length = 4
    }
    $hash_value = &hash(substr($query,0,$substring_length));
    seek(HASH,$hash_value*4,0);
    read(HASH,$dum,4);
    $dum = unpack("N", $dum);
    seek(HASHWORDS,$dum,0);
    read(HASHWORDS,$dum,4);
    $dum1 = unpack("N", $dum);
    for ($i=0; $i<$dum1; $i++) {
        read(HASHWORDS,$dum,8);
        ($wordpos, $filepos) = unpack("NN", $dum);
        seek(SITEWORDS,$wordpos,0);
        $word = <SITEWORDS>;
        $word =~ s/\x0A//;
        $word =~ s/\x0D//;
        if ( ($wholeword[$j]==1) && ($word ne $query) ) {$word = ""};
        if (index($word,$query)>=0){
            seek(WORD_IND,$filepos,0);
            read(WORD_IND,$dum,4);
            $dum2 = unpack("N",$dum);
            read(WORD_IND,$dum,$dum2*4);
            for($k=0; $k<$dum2; $k++){
            	push(@{$allres[$j]}, substr($dum,$k*4,4) );
            };    # for $k
        };
    };   # for $i
}; # for $query

@res = ();
    for ($j=0; $j<scalar(@query); $j++) {
    	push(@res,@{$allres[$j]});
    	$found_number = scalar@{$allres[$j]}?scalar@{$allres[$j]}:0;
        $query_statistics .= " $query[$j]-$found_number\n";
    }
}
#===================================================================

sub boolean {
for ($i=0; $i<scalar(@query); $i++) {
    %union=%isect=();
    @resonly=();
    

    if ($querymode[$i] == 1) {               # NOT
       @seen{@{$allres[$i]}} = ();
       foreach $e (@res) {
          push (@resonly, $e) unless exists $seen{$e};
       }
       @res = @resonly;
    }

    if ($querymode[$i] == 2) {               # AND
       foreach $e (@res) { $union{$e} = 1 }
       foreach $e (@{$allres[$i]}) {
          if ($union{$e}) { $isect{$e}=1 }
       }
       @res = keys %isect;
    }
}

if ($stype eq "OR") {
   @res = keys %{{ map { $_, 1 } @res }}
}

@resonly=();
if (scalar keys %zones != 0) {
    foreach $zone (keys %zones) {
        $zones_regexp .= $zone[$zone]."|";
    } 
    $zones_regexp =~ s/\|$//;
    foreach $item (@res) {
        seek(FINFO,unpack("N",$item),0);
        read(FINFO,$dum,100);
        push (@resonly, $item) if($dum=~/$zones_regexp/io);
    }
    @res = @resonly;
}
    

$rescount = scalar(@res);
}
#===================================================================

sub print_results {

    for ($i=$stpos; $i<$stpos+$res_num; $i++) {
        if ($i == scalar(@res)) {last};
        $strpos = unpack("N",$res[$i]);
        seek(FINFO,$strpos,0);
        $dum = <FINFO>;
        ($url, $size, $title, $description) = split(/::/,$dum);
        print &print_template("results");
    };  # for

    if (scalar keys %zones != 0) {
        foreach $zone (keys %zones) {
            $zones_str .= '&z='.$zone
        }
    } 

    if ($rescount <= $res_num) {$next_results = ""; return 1}
    

    my $mhits = 20 * $res_num;
    my $pos2 = $stpos - $stpos % $mhits;
    my $pos1 = $pos2 - $mhits;
    my $pos3 = $pos2 + $mhits;

    if ($pos1 < 0) { my $prev = "" }
    else {
        $prev = " <A HREF=search.cgi?query=".$ndquery."\&stpos=".$pos1."\&stype=".$stype.$zones_str;
        $prev .= ">PREV</A> \n";
    }

    if ($pos3 > $rescount) { my $next = "" }
    else {
        $next = " <A HREF=search.cgi?query=".$ndquery."\&stpos=".$pos3."\&stype=".$stype.$zones_str;
        $next .= ">NEXT</A> \n";
    }

    $next_results .= $prev;
    $next_results .=  " |\n";
    for ($i=$pos2; $i<$pos3; $i += $res_num) {
       if ($i >= $rescount) {last}
       $page_number = $i/$res_num+1;
       if ( $i != $stpos ) {
           $next_results .=  "<A HREF=search.cgi?query=".$ndquery."\&stpos=".$i."\&stype=".$stype.$zones_str;
           $next_results .=  ">".$page_number."</A> |\n";
       } else {
           $next_results .=  $page_number." |\n";
       }
    }
    $next_results .=  $next;
}
#===================================================================

sub read_template {
    my ($filename) = @_;
    
    open TEMPLATE, $filename or print "Could not find template";
    local $/;
    my $template = <TEMPLATE>;
    close(TEMPLATE);
    
    while ( $template =~ m|<!-- RiSearch::([^:]+?)::start -->(.*?)<!-- RiSearch::\1::end -->|gs) {
    	$templates{$1} = $2;
    }   
    return 1;
}
#===================================================================

sub print_template {
    my $part = shift;
    my $template = $templates{$part};      
    my $rand_number = int (rand(256));
    
    $template =~ s|%query%|$query|gs;
    $template =~ s|%search_time%|$search_time|gs;
    $template =~ s|%query_statistics%|$query_statistics|gs;
    $template =~ s|%stpos%|$stpos+1|egs;
    $template =~ s|%url%|$url|gs;
    $template =~ s|%title%|$title|gs;
    $template =~ s|%size%|$size|gs;
    $template =~ s|%description%|$description|gs;
    $template =~ s|%rescount%|$rescount|gs;
    $template =~ s|%next_results%|$next_results|gs;
    $template =~ s|%rand_number%|$rand_number|gs;
    $template =~ s|%right_form\((.*?)\)%|&right_form($1)|egs;
    
    return $template;
}
#===================================================================

sub urldecode{    
 local($val)=@_;  
 $val=~s/\+/ /g;
 $val=~s/%([0-9A-H]{2})/pack('C',hex($1))/ge;
 return $val;
}
#===================================================================

sub my_die {
   my ($str) = @_;
   print "$str\n";
   die
}
#===================================================================

sub right_form {
    my ($words) = @_;
    my $dum = $rescount % 10;
    $words =~ /"([^"]+)","([^"]+)","([^"]+)"/;
    my $w1 = $1; my $w2 = $2; my $w3 = $3;
    my $f = $w3;
    if ($dum == 1) {$f = $w1};
    if (($dum>1) && ($dum<5)) {$f = $w2};
    $dum = $rescount % 100;
    if (($dum>10) && ($dum<15)) {$f = $w3};
    return $f;
}
#===================================================================
