% -*- latex -*-
\documentclass{seminar}
\usepackage{lsalike}
\usepackage{longtable}
\usepackage{cmbright}
\usepackage{path}
\usepackage[all,ps,dvips]{xy}
\SelectTips{cm}{}
\CompileMatrices
\special{papersize=11in, 8.5in}
\newenvironment{indentLeft}
  {\begin{list}{}{\rightmargin 0pt\leftmargin 0.5in}\item[]}
  {\end{list}}
\newenvironment{indentLeftLess}
  {\begin{list}{}{\rightmargin 0pt\leftmargin 0.275in}\item[]}
  {\end{list}}
\def\pt{\phantom`}
\def\ps{\phantom*}
\def\hx{\hskip1ex\relax}
\def\gap{\vrule height0.4pt depth0pt width0.4cm}
\newcommand{\ip}[2]{#1 <\!\!< #2}

\slideframe{none}
\begin{document}
\begin{slide}\thispagestyle{empty}
\begin{center}
\begin{large}Improving the Efficiency of Parsing\\
with Discontinuous Constituents\end{large}\\[12pt]
Mike Daniels and Detmar Meurers\\
The Ohio State University\\[12pt]
NLULP 2002\\
28 July 2002
\end{center}
\end{slide}

\begin{slide}
\begin{center}\begin{large}Introduction\end{large}\end{center}
\begin{itemize}
\item Background: Discontinuous Constituents
\begin{itemize}
\item ongoing development of HPSG linearization grammar\\ (Reape 1993,
  Kathol 2000, M\"uller 1999)\nocite{reape:93,kathol:00,smueller:99}
\item increasing use of discontinuous constituents throughout
  syntactic frameworks and treebanks (NEGRA \cite{skut:ea:97},
    VERBMOBIL \cite{hinrichs-et-al-treebanks-vm00})
\end{itemize}
\item Parsing of Generalized ID/LP grammars (GIDLPs)
\begin{itemize}
\item Context-free grammars with two requirements relaxed:
\begin{itemize}
\item that the RHS be totally ordered
\item that the LHS symbol dominate a contiguous series of terminals
\end{itemize}
\end{itemize}
\end{itemize}
\end{slide}

\begin{slide}
\begin{center}\begin{large}Introduction (Cont.)\end{large}\end{center}
\begin{itemize}
\item Goals
\begin{itemize}
\item Efficient parsing of GIDLPGs.
\item Recognize and take advantage of linear/context-free subgrammars.
\end{itemize}
\item Method
\begin{itemize}
\item In general, parsing with grammars licensing discontinuous
  constituents requires an exponential number of edges.
\item We focus on methods of limiting the number of edges required for
  parsing with such grammars, keeping other issues as simple as
  possible (for example, using atomic categories).
\end{itemize}
\end{itemize}
\end{slide}

\begin{slide}
\begin{center}\begin{large}Grammar Format\end{large}\end{center}
\begin{flushleft}
  Based on the Linear Specification Language \cite{goetz:penn:97,suhre:99}.\\
  We have \textbf{lexical entries} of the form $A \to t$ and
  \textbf{grammar rules} of the form $A \to \alpha; L$, where:\\
\begin{itemize}
\item $\alpha$ is a \textbf{list of non-terminals}.
\item $L$ is a set of \textbf{linearization constraints}:
\begin{itemize}
\item $A < B$ \textbf{(precedence)}: The terminals dominated by $A$ all
  occur to the left of the terminals dominated by $B$.
\item $\ip{A}{B}$ \textbf{(immediate precedence)}: The rightmost
  terminal dominated by $B$ occurs immediately to the left of the
  leftmost terminal dominated by $B$.
\item $[A]$ \textbf{(isolation)}: $A$ dominates an uninterrupted
  sequence of terminals in the input.
\end{itemize}
\end{itemize}
\end{flushleft}
\end{slide}

\begin{slide}
\begin{center}\begin{large}RHS Order\end{large}\end{center}
\begin{itemize}
\item Even though the right-hand side of a grammar rule may be
  partially ordered, we still require it to be stated as a list.
\item This ordering of the non-terminals in $\alpha$ is used to guide
  parsing; it need not reflect surface order.
\item For example, the rule\\[6pt]
  s $\to$ verb$_1$ nom$_2$ acc$_3$; $\{$2 $<$ 1, 3 $<$ 1$\}$\\[6pt]
  requires that the verb be parsed first, even though it must follow
  the other two constituents.
\end{itemize}
\end{slide}

\begin{slide}
\begin{center}\begin{large}Sample Grammar\end{large}\end{center}
\begin{tabular}{l|l|l}
1&\multicolumn{2}{l}{s $\to$ verb$_1$ nom$_2$ acc$_3$; $\{2 < 1, 3 <
  1\}$}\\
2&\multicolumn{2}{l}{s $\to$ verb$_1$ nom$_2$; $\{2 < 1\}$}\\
3&\multicolumn{2}{l}{s $\to$ conj$_1$ s$_2$ s$_3$; $\{\ip{2}{1},
  \ip{1}{3}, [2], [3]\}$}\\
4&\multicolumn{2}{l}{acc $\to$ adj$_1$ acc$_2$; $\{~\}$}\\
5&\textit{nalas} $\to$ nom & `Nala' (a proper name)\\
6&\textit{nagaram} $\to$ acc & `city'\\
7&\textit{agacchat} $\to$ verb & `went'\\
8&\textit{caiva} $\to$ conj & `and then'\\
9&\textit{avadat} $\to$ verb & `spoke'\\
10&\textit{ruciram} $\to$ adj & `shining'\\
\end{tabular}
\end{slide}

\begin{slide}
\begin{center}\begin{large}Example\end{large}\end{center}
\begin{flushleft}
(1) \begin{tabular}[t]{*7{l@{\hskip6pt}}}
ruciram & nalas & nagaram & agacchat & caiva & nalas & avadat\\
shining & nala & city & went & and then & nala & spoke\\
\multicolumn{7}{l}{`Nala went to the shining city and then Nala spoke.'}\\
\end{tabular}
\end{flushleft}
\begin{center}
\begin{small}
\begin{tabular}{c}
\xymatrix@C-2.2pc@R-1.6pc{
&&&&&&&&\ar@{-}[ddllllll]\ar@{-}[dddrrr]\ar@{-}[ddddd]\txt{s}\\
\\
&&\ar@{-}[dll]\ar@{-}[ddd]\ar@{-}[dddrrrr]\ar@{-->}@/^1ex/[dddrrrrrr]\txt{[s]}\\
\ar@{-}[dd]\ar@{-}[ddrrrr]\ar@{-->}@/^1ex/[ddrrrrrr]\txt{acc} &&&&&&&&&&&%
\ar@{-}[ddl]\ar@{-}[ddr]\txt{[s]}\\
\\
\ar@{-}[d]\txt{adj} && \ar@{-}[d]\ar@{-->}@/^2ex/[rrrr]\txt{nom} &&
\ar@{-}[d]\txt{acc} && \ar@{-}[d]\txt{verb} &&
\ar@{-}[d]\ar@{-->}@/^1ex/[uurrr]\txt{conj} &&
\ar@{-}[d]\ar@{-->}@/^1ex/[rr]\txt{nom} && \ar@{-}[d]\txt{verb}\\
{\txt{ruciram}} && {\txt{nalas}} && {\txt{nagaram}} &&
{\txt{agacchat}} && {\txt{ca}} && {\txt{nalas}} && {\txt{avadat}} \\
}
\end{tabular}
\end{small}\\
\begin{tiny}
(Solid lines represent dominance, while dashed arrows represent the
precedence relations enforced by the grammar.)
\end{tiny}
\end{center}
\end{slide}

\begin{slide}
\begin{center}\begin{large}Parsing Framework\end{large}\end{center}
\begin{itemize}
\item Based on Earley's Algorithm: a top-down, goal-directed approach.
\item Also a bottom-up approach: driven by lexical information.
\item Modified to start with lexical seeding and to use an active
  chart.
\end{itemize}
\end{slide}

\begin{slide}
\begin{center}\begin{large}Edge Coverage: Earley\end{large}\end{center}
\begin{itemize}
\item An active chart contains edges encoding in-process as
  well as completed parses of a (sub)string.
\item Traditionally, edge coverage is modelled by an interval.
\item For instance, $\langle$[0,2], S, [VP]$\rangle$ might represent
  the edge for\\\textit{The dog} in
  \textit{$_0$The$_1$dog$_2$sleeps$_3$}.
\end{itemize}
\end{slide}

\begin{slide}
\begin{center}\begin{large}Edge Coverage: GIDLP\end{large}\end{center}
\begin{itemize}
\item GIDLP grammars allow for discontinuity, and so a single interval
  is no longer sufficient: a constituent may dominate any subset of
  the input.
\item Following Johnson (1985)\nocite{johnson:85parsing} and Reape
  (1991)\nocite{reape:91parsing}, we represent edge coverage with
  bitvectors.
\item Each active bit represents the inclusion of the word at that
  position; the left edge of the input corresponds to the left edge of
  the vector.
\end{itemize}
\end{slide}


\begin{slide}
\begin{center}\begin{large}Encoding Bitvectors\end{large}\end{center}
\begin{itemize}
\item We want to represent the coverage of the discontinuous
  accusative noun phrase \textit{ruciram nagaram} in the example given
  earlier:
\begin{tabular}[t]{*7{c@{\hskip6pt}}}
\textbf{ruciram} & nalas & \textbf{nagaram} & agacchat & caiva & nalas
& avadat\\
yes & no & yes & no & no & no & no\\
\end{tabular}
\item The following are equivalent in terms of descriptive power:
\begin{itemize}
\item Interval lists (Johnson 1985): \path|[[0,1],[2,3]]|
\item Bit lists (Reape 1991): \path|[1,0,1,0,0,0,0]|
\item Binary numbers: 0000101$_2$ (= 5$_{10}$)\\
\begin{small}
(Notice that the least significant bit of the number corresponds to the
left-most word in the input.)
\end{small}
\end{itemize}
\end{itemize}
\end{slide}

\begin{slide}
\begin{center}\begin{large}Computing With Bitvectors\end{large}\end{center}
\begin{itemize}
\item Parsing requires a number of bitwise operations: overlap,
  lbound, suffix, and so on. (See Appendix A in the handout for a full
  list.)
\item The nature of current hardware allows bitwise operations to be
  computed in parallel in a single processor instruction.
\item Thus it takes just as long to, say, compute the bitwise-AND of
  two 20-bit numbers as it does for two 30-bit numbers.
\item In general, for a processor that has a word size of $n$,
  arithmetic computation time is proportional to the base-$n$
  logarithm of the inputs. Operations on any list-based representation
  will take time proportional to the length of the list itself.
\end{itemize}
\end{slide}

\begin{slide}
\begin{center}\begin{large}Edge Constraints\end{large}\end{center}
\begin{itemize}
\item We also use bitvectors to encode \textbf{constraints} on
  coverage; bitvectors used this way are called bitmasks.
\item We store a negative and positive bitmask on each edge,
  representing different aspects of precedence information.\\
  Assume we've found a noun as the third word of a sentence:
\begin{itemize}
\item Negative: If we know that verbs follow nouns, we shouldn't look
  for verbs in the first or second position.
\item Positive: If we know that verbs immediately follow nouns, then
  any verb we find must cover at least the fourth position.
\end{itemize}
\end{itemize}
\end{slide}

\begin{slide}
\begin{center}\begin{large}Prediction\end{large}\end{center}
\begin{itemize}
\item Given an edge with a non-empty RHS, consider the first element
  of that RHS and see if any rules have that category as their LHS.
\begin{indentLeft}
  \textit{Consider the edge}\\[6pt]
  $s : s_2\, s_3; \{\ip{s_2}{0010000}, \ip{0010000}{s_3}, [s_2],
  [s_3]\}$,\\
  Coverage = 0010000, NMask = 0010000\\[6pt]
  \textit{One qualifying rule in the grammar is number 3:}\\[6pt]
  $s \to c_1\, s_2\, s_3;\{\ip{2}{1}, \ip{1}{3}\}$.\\
\end{indentLeft}
\pagebreak
\item Compute the masks for each such rule by considering each order
  constraint in turn:
\begin{itemize}
\item For precedence constraints, have we already found one of the
  appropriate categories? If so, apply a left- or right-mask, as
  appropriate.
\begin{indentLeftLess}
  \textit{Here, we're predicting $s_2$, which is mentioned in the
    constraint $\ip{s_2}{0010000}$. Since the predicted element needs
    to precede a known location, we augment the negative mask with
    suffix(lbound(0010000)) = 1110000.}
\end{indentLeftLess}
\pagebreak
\item For immediate precedence, also augment the positive mask with
  the bit adjacent to the appropriate bound of the position.
\begin{indentLeftLess}
  \textit{With the constraint $\ip{s_2}{0010000}$, we use the bit
    adjacent to the left bound of 0010000: we augment the positive
    mask with 0001000.}
\end{indentLeftLess}
\end{itemize}
\item Make sure the negative mask has enough ``holes'' for the RHS of
  the current rule to fit into.
\begin{indentLeft}
  \textit{The negative mask 1110000 does in fact have enough space for
    the three elements of the rule we're predicting.}
\end{indentLeft}
\item Enter the resulting edge into the chart.
\begin{indentLeft}
  \textit{We end up with} $s : c\, s_2\, s_3; \{\ip{s_2}{c},
  \ip{c}{s_3}\},$\\
  Coverage = 0000000, NMask = 1110000,\\ PMask = 0100000\\
\end{indentLeft}
\end{itemize}
\end{slide}

\begin{slide}
\begin{center}\begin{large}Completion\end{large}\end{center}
\begin{itemize}
\item Given an active and a passive edge:
\begin{indentLeft}
  \textit{Active:} $s : s_2 s_3; \{\ip{s_2}{0010000}, \ip{0010000}{s_3},
  [s_2], [s_3]\},$\\
  Coverage = 0010000, NMask = 0010000\\[6pt]
  \textit{Passive:}\\ $s : ~; \{~\}$, Coverage = 0001110\\
\end{indentLeft}
\begin{itemize}
\item Check that the active edge's negative mask does not overlap with
  the passive edge's coverage.
\begin{indentLeftLess}
  \textit{0010000 does not overlap with 0001110.}\\
\end{indentLeftLess}
\pagebreak
\item If we're about to create a passive edge: check that the passive
  edge's coverage is bitwise-implied by the active edge's positive
  mask.
\begin{indentLeftLess}
  \textit{Nothing to check here, since we'll still need to find $s_3$.}\\
\end{indentLeftLess}
\item If the active edge contains an isolation constraint, make sure
  the terminals it would govern are contiguous.
\begin{indentLeftLess}
\textit{0001110 is indeed contiguous.}\\
\end{indentLeftLess}
\pagebreak
\item Compute the new coverage and update the LP constraints.
\begin{indentLeftLess}
  \textit{Combining 0001110 and 0010000 yields 0011110. The LP
    constraints $\ip{s_2}{0010000}$ and $[s_2]$ are discarded, as
    they no longer refer to unfound elements.}\\
\end{indentLeftLess}
\item Add the resulting edge to the chart.
\begin{indentLeftLess}
  $s : s_3; \{\ip{0010000}{s_3}, [s_3]\},$\\
  Coverage = 0010000, NMask = 0010000\\
\end{indentLeftLess}
\end{itemize}
\end{itemize}
\end{slide}

\begin{slide}
\begin{center}\begin{large}Summary\end{large}\end{center}
\begin{itemize}
\item Grammar format generalizes ID/LP grammars to allow for
  discontinuous constituents.
\item Parser generalizes Earley's algorithm to allow for discontinuous
  constituents; also allows the processing order of the RHS to be
  specified, thereby generalizing head-driven parsing.
\item Coverage constraints encoded as bitvectors provide an efficient,
  compiled representation of linear precedence information.
\item Like Earley's algorithm, our generalized version can efficiently
  process context-free and linear fragments.
\end{itemize}
\end{slide}

\begin{slide}
\begin{center}\begin{large}Outlook\end{large}\end{center}
\begin{itemize}
\item Add global precedence and isolation statements.
\item Move to complex categories.
\item Large-scale grammar development and evaluation: automatic
  extraction from the NEGRA corpus, and recoding the grammar encoded
  in the Babel program \cite{smueller:96babel}.
\end{itemize}
\end{slide}

\begin{slide}
\begin{center}\begin{large}Contact Information\end{large}\end{center}
\begin{flushleft}
Mike Daniels/Detmar Meurers\\
Department of Linguistics\\
222 Oxley Hall\\
1712 Neil Avenue\\
Columbus, OH 43210-1298\\
\verb+{daniels,dm}@ling.osu.edu+\\
\end{flushleft}
\end{slide}

\begin{slide}
\bibliographystyle{mwd2}
\bibliography{hpsg}
\end{slide}
\end{document}