#!/usr/bin/perl
#
# rdx2latex.pl
# converts .rdx (extended RD format) file to .tex
#
# Usage: perl rdx2latex.pl filename.rdx > filename.tex
#
# Version 0.3
#
# Yusuke Kubota
# Last Modified: *November 22 2008*
#

$line = "";

$espace = "";

%IN_ENV =
    ('framing' => 0,
     'shadow' => 0,
     'verbatim' => 0,
     'quote' => 0,
     'emb_itemize' => 0,
     'itemize' => 0,
     'emb_enumerate' => 0,
     'enumerate' => 0,
     'xlist' => 0,
     'exe' => 0,
     'emb2_itemize' => 0,
     'emb2_enumerate' => 0,
     );

$prevline = "";

sub conv_env;
sub conv_emb_list;
sub conv_list;
sub print_preamble;


# If there is a second argument, use it as the preamble.
# Otherwise, use the default preamble. 

if (@ARGV == 2) {
    open(IN, @ARGV[1]);
    while (<IN>) {
	print $_;
    }
    close(IN);
} else {
    print_preamble;
}


open(MAIN, @ARGV[0]);
foreach $line (<MAIN>) {

# if you want to let some string not processed by rdx2latex.pl
# then mark it like this: ESCAPE{target string}
    
    $line =~ s/ESCAPE{(.*?)}/XXXXX/;
    $escape = $1;

## non-list environments

    conv_env("\[\]", "framing");
    conv_env(">>", "quote");
    conv_env("~~", "verbatim");
    conv_env("##", "shadow");
    
# for verbatim
    if ($IN_ENV{verbatim} == 1) {
	print $line;
	$line =""
	}

    
# 2nd embedding
    
    conv_emb2_list("^    [0-9]\. ", "emb2_enumerate");
    conv_emb2_list("^    - ", "emb2_itemize");
    
    
# embedded list environments

    conv_emb_list("^  [0-9]\. ", "emb_enumerate");
    conv_emb_list("^  - ", "emb_itemize");
    conv_emb_list("^  @", "xlist");

    
## list environments

    conv_list("^[0-9]\. ", "enumerate");
    conv_list("^- ", "itemize");
    conv_list("^@", "exe");

    
# replace list identifiers 
    
  if ($IN_ENV{'xlist'} == 1) { $line =~ s/^  @(.*)/   \\ex\\label{\1}/g; }
  if ($IN_ENV{'exe'} == 1) { $line =~ s/^@(.*)/ \\ex\\label{\1}/g; }

  if ($IN_ENV{'emb_itemize'} == 1) { $line =~ s/^  - /   \\item /g; }
  if ($IN_ENV{'itemize'} == 1) { $line =~ s/^- / \\item /g; }

  if ($IN_ENV{'emb_enumerate'} == 1) { $line =~ s/^  [0-9]\. /   \\item /g; }
  if ($IN_ENV{'enumerate'} == 1) { $line =~ s/^[0-9]\. / \\item /g; }    

  if ($IN_ENV{'emb2_itemize'} == 1) { $line =~ s/^    - /   \\item /g; }

  if ($IN_ENV{'emb2_enumerate'} == 1) { $line =~ s/^    [0-9]\. /   \\item /g; }

    
# replace specific strings
    
    
  # Ellipses
  $line =~ s/(^|[^.])\.\.\.($|[^.])/\1\\ldots\2/g;

  # Fix double quotes
#  $line =~ s/(^|\s)\"/\1``/g;
#  $line =~ s/\"(\W|$)/''\1/g;
		   

  # emphasis (bold)
  $line =~ s/\*\*(.*?)\*\*/\\textbf{\1}/g;
		   
  # emphasis (emph)
  $line =~ s/\*(.*?)\*/\\emph{\1}/g;

  # semantics (double brackets)
  $line =~ s/\[\[(.*?)\]\]/\\sem[]{\1}/g;

  # || for hfill
  $line =~ s/\|\| +([^\n\r\f\t]+)/\\hfill{\\mbox{\1}}\2/g;    
    
  # verbatim
  $line =~ s/\|(.*?)\|/\\verb\|\1\|/g;
  
  # italic (for linguistic expression)
  $line =~ s/"(.*?)"/\\textit{\1}/g;

  # section headers

  $line =~ s/^=== (.*)/\\subsubsection{\1}/g;	
  $line =~ s/^== (.*)/\\subsection{\1}/g;
  $line =~ s/^= (.*)/\\section{\1}/g;

  # paragraphs

  $line =~ s/^# (.*)/\\paragraph{\1}/g;
    
  # &
#  $line =~ s/&/\\&/g;		    
	 
  # label
#  $line =~ s/<<(.*?)>>/\\label{\1}/g;		    

  # citet, etc.
  $line =~ s/<>([0-9a-zA-Z\-:_]*)/\\citet{\1}/g;
  $line =~ s/<s>([0-9a-zA-Z\-:_]*)/\\cites{\1}/g;
  $line =~ s/<p>([0-9a-zA-Z\-:_]*)/\\citep{\1}/g;

    
  # ref
  $line =~ s/<([0-9a-zA-Z\-]*?)>/\\ref{\1}/g;

  # ex
#  $line =~ s/^(\s)*@(.*)/\1 \\ex\\label{\2}/g;

    
  # subscript
  $line =~ s/\_\_([a-z])/\$\_\1\$/g;

  # Fix single quotes
  $line =~ s/(^|\s)'/\1`/g;

  # url -- in the markdown format: [description](<http:/...>)
  $line =~ s/\[(.*?)\]\(<(.*?)>\)/\1 (\\url{\2})/g;


# put back the escaped string

    $line =~ s/XXXXX/$escape/;

  print $line;
		    
#  $prevline = $line;
	 
} 

close(MAIN);

# Footer
print "\\end{document}\n";


# conv_env(INMKR, OUTMKR)
# converts INMKR to OUTMKR
#
# changes the values of keys in the hash table $IN_ENV appropriately
# for keeping track of whether one is inside or outside of the
# environment in question

sub conv_env {
    my $INMKR = $_[0];
    my $OUTMKR = $_[1];

    my $ln;
    chomp ($ln = $line);

    if ($ln eq $INMKR && $IN_ENV{$OUTMKR} == 0) {
	    $IN_ENV{$OUTMKR} = 1;
	    print "\\begin{";
	    print $OUTMKR;
	    print "}\n";
	    $line =""
   } elsif ($ln eq $INMKR) {
       $IN_ENV{$OUTMKR} = 0;
       print "\\end{";
       print $OUTMKR;
       print "}\n";
       $line =""
       }
    
}


# conv_list(INLBL, OUTMKR)
# conv_emb_list(INLBL, OUTMKR)
#
# converts list environments (conv_emb_list is for embedded lists)
#
# changes the values of keys in the hash table $IN_ENV appropriately
# for keeping track of whether one is inside or outside of the
# environment in question

# conv_emb2_list and conv_emb_list should really be merged into one
# subroutine

sub conv_emb2_list {
    my $INLBL = $_[0];
    my $OUTMKR = $_[1];

    my $OUTMKR_PRT = $OUTMKR;
    $OUTMKR_PRT =~ s/^emb2_//g;
    
    my $ln;
    chomp ($ln = $line);

    if ($ln =~ /$INLBL/ && $IN_ENV{$OUTMKR} == 0) {
      $IN_ENV{$OUTMKR} = 1;
      print "    \\begin{";
      print $OUTMKR_PRT;
      print "}\n";
      
   } elsif (($line =~ /^(- |[0-9]\. |@)/ || $line =~ /^$/) &&
	    $IN_ENV{$OUTMKR} == 1) {
       $IN_ENV{$OUTMKR} = 0;
       print "    \\end{";
       print $OUTMKR_PRT;
       print "}\n";
       }
}


sub conv_emb_list {
    my $INLBL = $_[0];
    my $OUTMKR = $_[1];

    my $OUTMKR_PRT = $OUTMKR;
    $OUTMKR_PRT =~ s/^emb_//g;
    
    my $ln;
    chomp ($ln = $line);

    if ($ln =~ /$INLBL/ && $IN_ENV{$OUTMKR} == 0) {
      $IN_ENV{$OUTMKR} = 1;
      print "  \\begin{";
      print $OUTMKR_PRT;
      print "}\n";
      
   } elsif (($line =~ /^(- |[0-9]\. |@)/ || $line =~ /^$/ ||
	     $line =~ /^  [A-Z0-9]/) && $IN_ENV{$OUTMKR} == 1) {
       $IN_ENV{$OUTMKR} = 0;
       print "  \\end{";
       print $OUTMKR_PRT;
       print "}\n";
       }
}

sub conv_list {
    my $INLBL = $_[0];
    my $OUTMKR = $_[1];

    my $ln;
    chomp ($ln = $line);

    if ($ln =~ /$INLBL/ && $IN_ENV{$OUTMKR} == 0) {
      $IN_ENV{$OUTMKR} = 1;
      print "\\begin{";
      print $OUTMKR;
      print "}\n";
      
   } elsif ($line eq "\n" && $IN_ENV{$OUTMKR} == 1) {
       $IN_ENV{$OUTMKR} = 0;
       print "\\end{";
       print $OUTMKR;
       print "}";
       }
}
    

# print default preamble

sub print_preamble {

    print << 'END';
\documentclass[11pt]{article}

\usepackage{times}
\usepackage{gb4e}

\begin{document}
END
}
