#! /usr/bin/perl # Written by Jon Dehdari 2004 # Perl 5.8 # Stemmer and Syntax Parser for Persian # The license is the GPL (www.fsf.org) # The format of the resolve.txt file is as follows: # 1. Mokassar: 'ktb ktAb' OR 'ktb ktAb_+PL' # 2. Preparsed (speed): 'krdn kr_+dn' # 3. Don't stem: 'bArAn bArAn' # 4. Stop word: 'u ' use strict; use utf8; #use diagnostics; #binmode(STDOUT, ":utf8"); use LWP::Simple qw(!head); use CGI qw(:standard); #must use this full line, not just CGI $CGI::POST_MAX=20000; my $query = new CGI; my $input_type = param ("input_type"); my $width = param ("width"); my $remove_stops = param ("remove_stops"); my $use_web_page = param ("use_web_page"); my $web_page = param ("web_page"); my $use_file = param ("use_file"); my $uploaded_file = param ("uploaded_file"); my $text_from = param ("text_from"); #my $input_type = "1"; #my $text_from = "\u{d986}\u{d8a7}\u{d986}\u{d987}\u{d8a7}"; #my $text_from = "\u{0646}\u{0627}\u{0646}\u{0647}\u{0627}"; #my $text_from = "نانها"; #my $text_from = "قدومي بفارغ الصبر"; #my $text_from = "CmA nmidAnid."; #my $use_web_page = "false"; #my $web_page = "false"; #my $remove_stops = "false"; #my $use_file = "false"; #my $uploaded_file = "false"; my %unicode2roman; my $text_from_new; my @charx; my $charx; my $input_rtl; my $resolve_file; my %resolve; my @resolve; my $resolve; my $ar_chars = "EqHSTDZLVU"; #my $longvowel = "Aui]"; if ($input_type =~ /[012357]/) { $input_rtl = "true"; } ### Checks for stupid width inputs and corrects them if ($width > 800) { $width = "800"; } if ($width < 20) { $width = "20"; } #if ($remove_stops eq "true") {$resolve_file = "resolve_no_stops.txt"; } #else ($remove_stops ne "true") {$resolve_file = "resolve.txt"; } $resolve_file = "resolve.txt"; if ($input_type eq 4) { print $query->header( -charset => 'ISO-8859-1'); } #if ($input_type eq 1) { print $query->header( -charset => 'UTF-8'); } else { print $query->header( -charset => 'windows-1256'); } print( '', "\n", '', "\n", "\n", # "