% Compounds - version 2.0 - Submitted
\documentclass[12pt]{article}
\usepackage{a4}
\usepackage{eaclsub}
\usepackage{examples}

\makeatother

\newcommand {\lsembrac}         {\mbox{[ \kern -0.5 em [}}
\newcommand {\rsembrac}         {\mbox{] \kern -0.5 em ]}}
\newcommand {\sem} [1]          {{\lsembrac #1 \rsembrac}}

% Used as in, e.g., \sem{\phi}^{\rm DPL}_{M,w,t}

% \csmodels is for putting in inference patterns, and \csmodel is for
% running text
\newcommand {\csmodels}         {\hspace*{0.5em}\mbox{$\mid$ \kern -0.5 em $\approx$}\hspace*{0.5em}}
\newcommand {\csmodel}  {\mbox{$\mid$ \kern -0.5 em $\approx$}}


\newcommand {\notcsmodels} {\hspace*{0.5em}\mbox{$\mid$ \kern -0.5 em $\approx$ \kern -1 em $/$}\hspace*{0.5em}}


\newcommand {\happyface} {\mbox{\Large
$\bigcirc$\small\raisebox{-0.02in}{\hspace*{-0.17in}$\smile$}
\raisebox{0.06in}{\hspace*{-0.17in}.\hspace*{0.03in}.}}\normalsize
\hspace{0.05in}}

%%\parindent = 0pt
%%\parskip = \smallskipamount
%\pagestyle{empty}
%%\sloppy


%%\topmargin 0 pt            %   Nominal distance from top of paper to top of 
% page
%%\textheight 649.2 pt       %   Height of text (including footnotes and 
% figures)
%%\oddsidemargin  0 mm
%%\evensidemargin 0 mm
%%\textwidth 161 mm          % Width of text line.
%%\parindent 0 mm
%%\setlength{\parskip}{\baselineskip}

 \def\drs#1#2{\begin{tabular}{|l|}\hline #1 \\ \hline \\
                [-8pt] #2\\[-8pt] \\ \hline \end{tabular} }

 \def\ddrs#1{\begin{tabular}{||c||}\hline \\
                [-8pt] #1\\[-8pt] \\ \hline \end{tabular} }

 \def\topdrs#1#2{\begin{tabular}{|l|}\hline #1 \\ \hline \\
                [-8pt] #2 \\[-8pt] \\ \hline \end{tabular} }

 \def\proof#1#2#3#4{\drs{#1}{#2} \ $\vdash$ \ \drs{#3}{#4}}
 \def\modimp#1#2#3#4{\mbox{\drs{#1}{#2} \ $\Box$ \ \drs{#3}{#4}}}
 \def\imp#1#2#3#4{\drs{#1}{#2} \ $\Rightarrow$ \ \drs{#3}{#4}}
 \def\dis#1#2#3#4{\mbox{\drs{#1}{#2} \ $\vee$ \ \drs{#3}{#4}}}
 \def\pos#1#2{\mbox{$\Diamond$ \ \drs{#1}{#2}}}
 \def\nec#1#2{\mbox{$\Box$ \ \drs{#1}{#2}}}
 \def\nega#1#2{\mbox{$\neg$ \ \drs{#1}{#2}}}
 \def\pred#1#2#3{\mbox{#1\ :\drs{#2}{#3}}}

 \def\raute#1#2{\setlength{\unitlength}{2 em}
        \begin{picture}(2,2)(0,0)
        \put (1,1){\line(1,-1){1}}
        \put (1,1){\line(-1,-1){1}}
        \put (0,0){\line(1,-1){1}}
        \put (1,-1){\line(1,1){1}}
        \put (1,0.15){\makebox(0,0)[b]{#1}}
        \put (1,-0.5){\makebox(0,0)[b]{#2}}
        \end{picture}}

 \def\dup#1#2#3#4#5#6{\mbox{\begin{tabular}{c}
                        \drs{#1}{#2} \hspace{-1.3 em} 
                                \raute{\scriptsize #4}{#3}
                        \hspace{-0.6 em}
                        \drs{#5}{#6} \\
                     \end{tabular}}}

 \def\xdup#1#2#3#4#5#6{\begin{tabular}{c}
                        \drs{#1}{#2}\hspace*{-1mm}\raute{#4}{#3}
                        \hspace{-0.6 em}
                        \drs{#5}{#6} \\
                     \end{tabular}}


\newcommand{\ben}{\begin{enumerate}}
\newcommand{\een}{\end{enumerate}}
\newcommand{\spc}{\bigskip}
\newcommand{\bqt}{\begin{quote}}
\newcommand{\eqt}{\end{quote}}
\newcommand{\bit}{\begin{itemize}}
\newcommand{\eit}{\end{itemize}}
\newcommand{\var}[1]{\mbox{\em #1}}
\newcommand{\linfon}{\langle \! \langle}
\newcommand{\rinfon}{\rangle \! \rangle}
\newcommand{\infon}[3]{\mbox{$\langle \! \langle 
{#1},{#2};{#3}\rangle \! 
\rangle$}} 
\newcommand{\prop}[3]{\mbox{$[{#1}\,|\,{#2 }\models {#3}]$}}
\newcommand{\satisfies}{\| \! $-$}
\newcommand{\use}{\underline}
\newcommand{\linterp}{[ \! [}
\newcommand{\rinterp}{] \! ]}
\newcommand{\interp}[3] {_{#1} \linterp {#2} \rinterp _{#3}}
\newcommand{\interps}[1]{\interp{s}{#1}{g,lex}}

%%\newenvironment{Bibliography}{\section*{References} 
%%\begin{enumerate}}{\end{enumerate}}

%%\renewcommand{\bibitem}[0]{\item[] \hspace*{-\leftmargin}}

%% An Example:

% \topdrs{}{\imp{x}{you\_buy(x) \\ chess\_set(x)}
%       {y}{you\_get(y) \\ spare\_pawn(y)}}


\newcommand{\hidden}[1]{}


\title{Integrating Symbolic and Statistical Representations: \\The
Lexicon Pragmatics Interface}
\author{Ann Copestake \\
{\small Center for the Study 
of Language and Information,} \\ {\small Stanford University,}\\
{\small Ventura
Hall,}\\ {\small Stanford, CA 94305,}\\ {\small USA}\\ {\small {\tt
aac@csli.stanford.edu}}
\And
Alex Lascarides\\
{\small Centre for Cognitive Science {\em and}}\\ {\small Human
Communication Research Centre,}\\ {\small University of Edinburgh,}\\
{\small 2, Buccleuch Place,}\\ {\small Edinburgh, EH8 9LW,}\\ {\small
Scotland, UK}\\ {\small {\tt alex@cogsci.ed.ac.uk}}}

\submissiontype{main}
\wordcount{3172}
\conference{none}
\id{id2}
\subject{lexicon, discourse/pragmatics}
\summary{We describe a formal framework for interpretation 
of words and compounds in a discourse
context which integrates a symbolic lexicon/grammar,
word-sense probabilities, and a 
pragmatic component.  The approach is motivated by the need to handle
productive word use. In this paper, we concentrate
on compound nominals.  We discuss the inadequacies
of approaches which consider compound interpretation
as either wholly lexico-grammatical or wholly pragmatic, and provide
an alternative integrated account.
}

\begin{document}

\maketitle
\bibliographystyle{acl}


\section{Introduction}

When words have multiple senses, these 
may have very different frequencies.  For example, the first
two senses of the noun {\it diet} given in WordNet are:
\begin{enumerate}
\item (a prescribed selection of foods)\\
  \verb+=>+ fare -- (the food and drink that are regularly consumed)
\item \verb+=>+ legislature, legislative assembly, general assembly, law-makers
\end{enumerate}
Most English speakers will share the intuition that the first sense is
much more common than the second, and that this is (partly)
a property of the
word and not its denotation, since near-synonyms occur with much
greater frequency.  Frequency differences are also found between senses
of derived forms (including morphological derivation, zero-derivation
and compounding).  For example, {\it canoe} is less frequent as a verb
than as a noun, and the induced action use (e.g., {\it they canoed the
  kids across the lake}) is much less frequent than the intransitive form
(with location PP) ({\it they canoed across the lake}).\footnote{Here 
and below we base our frequency judgements on semi-automatic analysis
of the written portion
of the tagged British National Corpus ({\sc bnc}).}  
A derived form may become established with one
meaning, but this does not preclude other uses in
sufficiently marked contexts (e.g., Bauer's \shortcite{bauer} 
example of {\it garbage man} with an interpretation
analogous to {\it snowman}).

Because of the difficulty of resolving lexical ambiguity, it is usual
in {\sc nlp} applications to exclude `rare' senses from the lexicon,
and to explicitly list frequent forms, rather than to derive
them.  But this increases errors due to unexpected
vocabulary, especially for highly productive derivational processes.
For this and other reasons it is preferable to assume some generative
devices in the lexicon \cite{Pust:95}.  Briscoe and Copestake
\shortcite{B+C} argue that a differential estimation of the productivity
of derivation processes allows an approximation of the
probabilities of previously unseen derived uses.  If more probable senses are
preferred by the system, 
the proliferation of senses that results from unconstrained use of
lexical rules or other generative devices is effectively
controlled.  An interacting
issue is the granularity of meaning of derived forms.  If the lexicon
produces a small number of very underspecified senses for a wordform,
the ambiguity problem is apparently reduced, but pragmatics may have
insufficient information with which to resolve meanings, or may find
impossible interpretations.

\begin{quotation}
We argue here that by utilising probabilities, 
a language-specific component can offer hints to a
pragmatic module in order to prioritise and control the application of
real-world reasoning to disambiguation.  The objective is an architecture
utilising a general-purpose lexicon with 
domain-dependent probabilities.  The particular issues we consider here are the
integration of the statistical and symbolic components, and
the division of labour between semantics
and pragmatics in determining meaning.  
We concentrate on (right-headed) compound nouns, since these raise
especially difficult problems for {\sc nlp} system architecture 
\cite{ksj}.
\end{quotation}

\section{The grammar of compound nouns}

Within linguistics, attempts to classify nominal
compounds using a small fixed set of meaning relations (e.g.,
\newcite{levi}) are usually thought to have failed, because there
appear to be exceptions to any classification.  Compounds are
attested with meanings which can only be
determined contextually.  Downing \shortcite{downing} discusses 
{\it apple juice seat}, uttered in a context in which it identifies
a place-setting with a glass of apple juice.  Even for
compounds with established meanings, context can force an alternative
interpretation \cite{bauer}.

These problems led to analyses in which
the relationship between the
parts of a compound is undetermined by the grammar,
e.g., \newcite{dowty79}, \newcite{bauer}.
Schematically this is equivalent to the following 
rule, where $R$ is undetermined (to simplify exposition,
we ignore the quantifier for $y$):
\begin{ex}
\label{rule}
\begin{tabular}{cccc}
N0 & $\longrightarrow$ & N1 & N2 \\
$\lambda x [ P(x) \wedge Q(y) \wedge R(x,y)]$ 
& &$\lambda y [Q(y)]$ & $\lambda x [P(x)]$
\end{tabular}
\end{ex}
Similar approaches have been adopted in {\sc nlp} with further
processing using domain restrictions to resolve the interpretation
(e.g., \newcite{Hobbs:90}).

However, this is also unsatisfactory, because (\ref{rule})
overgenerates and ignores systematic properties of various classes of
compounds.  Overgeneration is apparent when we consider translation of German
compounds, since many do not correspond 
straightforwardly to English compounds (e.g.,
Figure~\ref{germ}).
\begin{figure}
\begin{center}
\begin{tabular}{lll}
Arzttermin & *doctor appointment & doctor's appointment\\ \hline\\
Terminvorschlag & * date proposal & proposal for a date\\
Terminvereinbarung & * date agreement & agreement on a date\\ \hline\\
Januarh\"{a}lfte & * January half & half of January\\
Fr\"{u}hlingsanfang & * spring beginning & beginning of spring 
\end{tabular}
\end{center}
\caption{Some German compounds with non-compound translations}
\label{germ}
\end{figure}
Since these exceptions are English-specific they 
cannot be explained via pragmatics.
Furthermore they are not simply due to lexical idiosyncrasies:
for instance, {\it Arzttermin}/*{\it doctor appointment}
is representative of many compounds with human-denoting first elements,
which require a possessive in English. 
So we get {\it blacksmith's hammer} and not *{\it blacksmith hammer} to mean
`hammer of a type conventionally associated with a blacksmith'
(also {\it driver's cab}, {\it widow's allowance} etc).
This is not the usual possessive: compare (((his blacksmith)'s)
hammer) with (his (blacksmith's hammer)).  
Adjective
placement is also restricted: {\it three English blacksmith's hammers}/
*{\it three blacksmith's English hammers}.
We treat these
as a subtype of noun-noun compound with the possessive analysed as 
a case marker.

In another subcategory of compounds, the head provides the
predicate (e.g., {\it dog catcher}, {\it bottle crusher}).  Again, there
are restrictions: it is not usually possible to form
a compound with an agentive predicate taking
an argument that normally requires a preposition
(contrast {\it water seeker} with {*\it water looker}).  
Stress assignment also demonstrates
inadequacies in (\ref{rule}): compounds which have the
interpretation `Y made of X' (e.g., {\it nylon rope}, {\it oak table})
generally have main stress on the righthand noun, in contrast to most other
compounds \cite{lib+spr}.  Stress sometimes
disambiguates meaning: e.g., with righthand stress {\it cotton bag} has
the interpretation {\it bag made of cotton} while with leftmost stress an
alternative reading, {\it bag for cotton}, is available. 
Furthermore, ordering of elements is restricted: e.g., {\it cotton garment bag}/
{*\it garment cotton bag}.

The rule in (\ref{rule}) is therefore theoretically inadequate, 
because it predicts that all noun-noun compounds are acceptable.  
Furthermore, it gives no hint of likely interpretations, leaving an immense 
burden to pragmatics.  

We therefore take a position which is intermediate between the two
extremes outlined above.  We assume that
the grammar/lexicon delimits the range of compounds and
indicates conventional interpretations, but that some compounds may
only be resolved by pragmatics and that non-conventional contextual
interpretations are always available.  We define a
number of schemata which encode conventional meanings.  These cover
the majority of compounds, but for the remainder the interpretation is
left unspecified, to be resolved by pragmatics.

Space limitations
preclude detailed discussion
but Figures~\ref{schhier} and~\ref{schemata}
show a partial default inheritance hierarchy of
schemata (cf., \newcite{jones}).\footnote{We formalise this with
typed default feature structures \cite{pdu}.  
Schemata can be regarded formally as lexical/grammar rules
(lexical rules and grammar rules being very similar in our
framework) 
but inefficiency due to
multiple interpretations is avoided in the implementation by using
a form of packing.}  
Multiple schemata may apply to a single compound: for example, {\it
  cotton bag} is an instantiation of the {\bf made-of} schema, the
{\bf non-derived-purpose-patient} schema and also the {\bf general-nn} schema.
Each applicable schema corresponds to a different sense: so {\it
  cotton bag} is ambiguous rather than vague.  The interpretation of
the hierarchy is that the use of a more general schema implies that
the meanings given by specific subschemata are excluded, and thus we
have the following interpretations for {\it cotton bag}:
\begin{enumerate}
\item $\lambda x [ {\rm cotton}(y) \wedge {\rm bag}(x) \wedge
  \mbox{{\rm made-of}}(y,x)]$
\item $\lambda x [ {\rm cotton}(y) \wedge {\rm bag}(x) \wedge
{\rm TELIC({\it bag})}(y,x)]$ $=$ 
$\lambda x [ {\rm cotton}(y) \wedge {\rm bag}(x) \wedge {\rm contain}(y,x)]$
\item $\lambda x [ R(y,x) \wedge \neg(\mbox{\rm made-of}(y,x) \vee 
{\rm contain}(y,x) \vee \ldots)]$
\end{enumerate}
The predicate made-of is to be interpreted as 
material constituency (e.g.~\newcite{link}).
We follow \newcite{Johnston+Busa} in using Pustejovsky's \shortcite{Pust:95} 
concept of telic role to encode the purpose of an artifact.
These schemata give minimal indications of compound semantics:
it may be desirable to provide more information
\cite{Jetal:95}, but we will not discuss that here. 
\begin{figure}
\setlength{\unitlength}{0.8in}
\begin{center}
\begin{picture}(7,2.9)(0,0.3)
\thicklines
\put(3.5,3){\makebox(0,0){\framebox{\bf general-nn}}}
\put(3.3,2.8){\line(-4,-1){2.6}}
\put(3.4,2.8){\line(-5,-4){0.8}}
\put(3.6,2.8){\line(5,-4){0.8}}
\put(3.7,2.8){\line(4,-1){2.6}}
%
\put(0.5,2){\makebox(0,0){\bf possessive}}
\put(0.4,1.9){\line(-1,-1){0.3}}
\put(0.5,1.9){\line(0,-1){0.3}}
\put(0.6,1.9){\line(1,-1){0.3}}
\put(2.5,2){\makebox(0,0){\framebox{\bf made-of}}}
%
\put(4.5,2){\makebox(0,0){\bf purpose-patient}}
\put(4.6,1.9){\line(1,-1){0.7}}
\put(4.4,1.9){\line(-1,-1){0.7}}
\put(6.5,2){\makebox(0,0){\bf deverbal}}
\put(6.4,1.9){\line(-1,-1){0.7}}
\put(6.5,1.9){\line(0,-1){0.3}}
\put(6.6,1.9){\line(1,-1){0.3}}
%
\thinlines
\put(2.5,1.8){\line(0,-1){0.3}}
%
\put(2.5,1.4){\makebox(0,0){\it cardboard box}}
\put(5.5,1){\makebox(0,0){\framebox{\bf deverbal-pp}}}
\put(3.5,1){\makebox(0,0){\framebox{\bf non-derived-pp}}}
%
\thinlines
\put(5.5,0.8){\line(0,-1){0.3}}
\put(3.5,0.8){\line(0,-1){0.3}}
%
\put(3.5,0.4){\makebox(0,0){\it linen chest}}
\put(5.5,0.4){\makebox(0,0){\it ice-cream container}}
\end{picture}
\end{center}
\caption{Fragment of hierarchy of noun-noun compound schemata.
The boxed nodes indicate actual schemata: other
nodes are included for convenience in expressing generalisations.}
\label{schhier}
\vspace{0.2in}
\begin{center}
\begin{tabular}{lllll}
\hline
{\large\bf general-nn} & N0 & {\tt ->} & N1 & N2 \\
& $\lambda x [ P(x) \wedge Q(y) \wedge R(x,y)]$ & &$\lambda y [Q(y)]$ & $\lambda x [P(x)]$\\
&  R $=$ /general-nn & & anything & anything \\
        &                  & & /stressed & \\ \hline
{\large\bf made-of} & R $=$ made-of && substance & physobj \\
         & &&&                 /stressed  \\ \hline
{\large\bf purpose-patient} &  R $=$ TELIC(N2) && anything & artifact \\ \hline
\end{tabular}
\end{center}
\caption{Details of schemata for noun-noun compounds.  
/ indicates that the value to its right is default information.}
\label{schemata}
\end{figure}


Established compounds may have idiosyncratic interpretations or
inherit from one or more schemata (though compounds with multiple
established senses due to ambiguity in the relationship 
between constituents rather than
lexical ambiguity
are fairly unusual). 
But established compounds may also have unestablished
interpretations,
although, as discussed in \S\ref{lexpref}, these will have minimal
probabilities.  In contrast, an unusual compound, such as {\it
  apple-juice seat}, may only be compatible with {\bf general-nn}, and
would be assigned the most underspecified interpretation.
As we will
see in \S\ref{dice}, this means
pragmatics must find a contextual interpretation.  Thus, for any
compound there may be some context in which it can be interpreted, but
in the absence of a marked context, only compounds which
instantiate one of the subschemata are acceptable.

\section{Encoding Lexical Preferences}
\label{lexpref}

In order to help pragmatics select between the multiple possible
interpretations, we utilise probabilities.  For an established form,
derived or not,
these depend straightforwardly on the frequency of
a particular sense.  For example, 
in the {\sc bnc}, {\it diet} has probability of about 0.9 of
occurring in the food sense and 0.005 in the legislature sense
(the remainder are metaphorical extensions, e.g., {\it diet of crime}).
Smoothing is necessary to avoid giving a
non-zero probability for possible senses which are not found in a
particular corpus. 
For derived forms, the applicable
lexical rules or schemata determine possible senses \cite{B+C}. 
Thus for known compounds, probabilities of established senses
depend on corpus frequencies but
a residual probability
is distributed between unseen interpretations licensed by schemata,
to allow for novel uses.  This distribution is weighted to allow for
productivity differences between schemata.  For unseen
compounds, all probabilities depend
on schema productivity.  Compound schemata range from the non-productive
(e.g., the verb-noun pattern exemplified by {\it pickpocket}), to
the almost fully productive (e.g., made-of) with many 
schemata being 
intermediate (e.g., has-part: {\it 4-door car}
is acceptable but the apparently similar {*\it sunroof car} is not).

We use the following estimate for productivity (adapted from \newcite{B+C}):
\[\mbox{Prod(cmp-schema)} = \frac{M+1}{N} \] 
(where $N$ is the number of pairs of senses
which match the schema input and $M$ is the number of attested two-noun
output forms --- we ignore compounds with more than two nouns for simplicity).
Formulae for calculating the unseen probability mass 
and for allocating it differentially according to schema
productivity are shown in Figure~\ref{formulae}.
Finer-grained, more accurate 
productivity estimates can be obtained by considering
subsets of the possible inputs --- this allows for some real-world effects
(e.g.,  the made-of schema is unlikely for liquid/physical-artifact compounds).
\begin{figure}
\begin{center}
$\mbox{Unseen-prob-mass(cmp-form)} =
\frac{\mbox{number-of-applicable-schemata(cmp-form)}}{freq(\mbox{cmp-form}) +
  \mbox{number-of-applicable-schemata(cmp-form)}}$\\[0.2in]
Estimated-freq(interpretation$_{i}$ with cmp-form$_{j}$) $=$\hfill\mbox{}\\
\mbox{}\hfill$\mbox{Unseen-prob-mass(cmp-form$_{j}$)} 
\times \frac{Prod(cs_{i})}{\sum Prod(cs_{1}), \ldots, Prod(cs_{n})}$
\end{center}
(where $cs_{1} \ldots cs_{n}$ are the compound schemata needed to
derive the $n$ unattested entries for the form$_{j}$) 
\caption{Probabilities for unseen compounds: adapted from
Briscoe and Copestake~(1996)}
% can't use \cite because this is a float
\label{formulae}
\end{figure}

Lexical (and syntactic) probabilities should be combined 
to give an overall probability for a logical form ({\sc lf}).
But we will ignore this here and assume pragmatics has to distinguish
between alternatives which differ only in the sense assigned to one
compound.
Figure~\ref{cottonbag} shows possible interpretations
for {\it cotton bag} with associated probabilities.
Pragmatics screens the
{\sc lf}s for acceptability. 
If a {\sc lf} contains an
underspecified element (e.g., arising from {\bf general-nn}), this 
must be instantiated by pragmatics
from the discourse context.
\begin{figure}
\begin{examples}
\item   \label{cottonbag-sentence}
\begin{subexamples}
\item   Mary put a skirt in a cotton bag
\item
\topdrs{$e,x,y,z,w,t,\mbox{{\em now}}$}{$\mbox{{\em mary}}(x)$,
  $\mbox{{\em skirt}}(y)$, 
  $\mbox{{\em cotton}}(w)$, $\mbox{{\em bag}}(z)$,\\
$\mbox{{\em put}}(e,x,y,z)$, $\mbox{{\em hold}}(e,t)$, $t\prec
\mbox{{\em now}}$,\\
$\mbox{{\em made-of}}(z,w)$}
\hspace{0.5in}
$P = 0.84$
\item
\topdrs{$e,x,y,z,w,t,\mbox{{\em now}}$}{$\mbox{{\em mary}}(x)$,
  $\mbox{{\em skirt}}(y)$, 
  $\mbox{{\em cotton}}(w)$, $\mbox{{\em bag}}(z)$,\\
$\mbox{{\em put}}(e,x,y,z)$, $\mbox{{\em hold}}(e,t)$, $t\prec
\mbox{{\em now}}$,\\
$\mbox{{\em contain}}(z,w)$}
\hspace{0.5in}
$P = 0.14$
\item
\topdrs{$e,x,y,z,w,t,\mbox{{\em now}}$}{$\mbox{{\em mary}}(x)$,
  $\mbox{{\em skirt}}(y)$, 
  $\mbox{{\em cotton}}(w)$, $\mbox{{\em bag}}(z)$,\\
$\mbox{{\em put}}(e,x,y,z)$, $\mbox{{\em hold}}(e,t)$, $t\prec
\mbox{{\em now}}$,\\
$\mbox{{\em R}}_c(z,w), \mbox{{\em R}}_c=?$,\\ 
$\neg(\mbox{{\em made-of}}(z,w) \vee \mbox{{\em contain}}(z,w)\vee\ldots)$}
\hspace{0.5in}
$P = 0.02$
\end{subexamples}
\end{examples}
\caption{Sample of input to pragmatics (simplified).   
{\sc lf}s are encoded in {\sc drt}.
Probabilities are based
on productivity figures for fabric/container
compounds in the {\sc bnc}, using WordNet as a source of semantic categories.}
\label{cottonbag}
\end{figure}

\section{{\sc sdrt} and the Resolution of Underspecified Relations}
\label{dice}

In this section, we'll give a brief
overview of the theory of discourse that
we'll assume:
Segmented Discourse Representation Theory
({\sc sdrt}) (e.g., \newcite{Asher1993}) and the 
accompanying pragmatic component Discourse in Commonsense
Entailment ({\sc dice}) \cite{LA1993}.
This framework has already
been successful in accounting for other phenomena on the 
interface between the lexicon and
pragmatics, e.g., \newcite{AL1995}, \newcite{LC1995}, \newcite{LCB1996}.

{\sc sdrt} is an extension of {\sc drt}
\cite{KR1993}, where discourse is
represented as a recursive set of {\sc
  drs}s representing the clauses, linked together
with rhetorical relations such as {\em Elaboration}
and {\em Contrast}, cf. \newcite{Hobbs1985}, \newcite{Polanyi1985}.
Building an {\sc sdrs} 
involves computing a rhetorical relation between 
the representation of the current clause and the
{\sc sdrs} built so far.  {\sc dice} 
specifies
how various background knowledge resources interact to
provide clues about which rhetorical relation holds.

The rules in {\sc dice} include default conditions of the form $P>Q$,
which means {\em If P, then normally Q}.  For example, {\tt
  Elaboration} states: if $\beta$ is to be attached to $\alpha$ with a
rhetorical relation, where $\alpha$ is part of the discourse structure
$\tau$ already (i.e., $\langle\tau,\alpha,\beta\rangle$ holds), and
$\beta$ is a subtype of $\alpha$---which by {\tt Subtype} means that
$\alpha$'s event is a subtype of $\beta$'s, and the individual filling
some role $\theta_i$ in $\beta$ is a subtype of the one filling the
same role in $\alpha$---then normally, $\alpha$
and $\beta$ are attached together with {\em Elaboration}
\cite{AL1995}.  The {\tt 
  Coherence Constraint on Elaboration} states that an elaborating event must be
temporally included in the elaborated event.
\begin{itemize}
\item   {\tt Subtype:} 
$(\theta_i(e_\alpha,\gamma_1) \wedge \theta_i(e_\beta,\gamma_2) \wedge
\mbox{{\em e-condn}}_\beta\sqsubseteq \mbox{{\em e-condn}}_\alpha
\wedge \gamma_2\sqsubseteq \gamma_1) \rightarrow \mbox{{\em
    Subtype}}(\beta,\alpha)$ 
\item   {\tt Elaboration:} $(\langle\tau,\alpha,\beta\rangle \wedge
  \mbox{{\em Subtype}}(\beta,\alpha)) > \mbox{{\em Elaboration}}(\alpha,\beta)$
\item   {\tt Coherence Constraint on Elaboration:} $\mbox{{\em
      Elaboration}}(\alpha,\beta) \rightarrow e_\beta\subseteq e_\alpha$
\end{itemize}

A distinctive feature of {\sc sdrt} is that if the {\sc dice} axioms
yield a nonmonotonic conclusion that the discourse relation is $R$,
and information that's necessary for the coherence
of $R$ 
isn't already in the constituents connected with $R$ (e.g.,
$\mbox{{\em Elaboration}}(\alpha,\beta)$ is nonmonotonically inferred,
but $e_\beta\subseteq e_\alpha$ is not in $\alpha$ or in $\beta$),
then this content can be added to the constituents
in a constrained manner through a
process known as {\sc sdrs} {\em Update}.  
Informally, $\mbox{{\em
    Update}}(\tau,\alpha,\beta)$ is an {\sc sdrs}, which includes
(a) the discourse
context $\tau$, plus (b) the new information $\beta$, and (c) an
attachment of $\beta$ to $\alpha$ (which is part of $\tau$)
with a rhetorical relation $R$
that's computed via {\sc dice}, 
where (d) the content of $\tau$,
$\alpha$ and $\beta$ are modified so that the coherence constraints on
$R$ are met.\footnote{
If $R$'s coherence constraints can't be inferred, then the logic
underlying {\sc dice} guarantees that $R$ won't be nonmonotonically inferred.}
Note that this is more complex than {\sc drt}'s notion
of update.
{\em Update} models how
interpreters are
allowed and expected to fill in certain gaps in what the speaker
says.

An {\sc sdrs} $S$ is well-defined (written $\downarrow
S$) if there are no conditions of the form $x=?$ (i.e., there are no
unresolved anaphoric elements), and every constituent is attached with
a rhetorical relation.
A discourse is incoherent if $\neg \downarrow
\mbox{{\em Update}}(\tau,\alpha,\beta)$ holds for every available
attachment point $\alpha$ in $\tau$. 

For example, the representations of (\ref{discourse}a,b) (in
simplified form) are respectively $\alpha$ and $\beta$:
\begin{examples}
\item   \label{discourse}
\begin{subexamples}
\item    Mary put her clothes into various large bags.
\item   [$\alpha$]
\singlespace{\topdrs{$x,Y,Z,e_\alpha,t_\alpha,n$}{$\mbox{{\em mary}}(x)$,
  $\mbox{{\em clothes}}(Y)$, $\mbox{{\em bag}}(Z)$, \\
$\mbox{{\em put}}(e_\alpha,x,Y,Z)$, $\mbox{{\em
    hold}}(e_\alpha,t_\alpha)$, $t_\alpha\prec n$}}
\item   She put her skirt into the bag made out of cotton.
\item   [$\beta$]
\singlespace{\topdrs{$x,y,z,w,e_\beta,t_\beta,n,u,B$}{$\mbox{{\em mary}}(x)$,
  $\mbox{{\em skirt}}(y)$, $\mbox{{\em bag}}(z)$, $\mbox{{\em
      cotton}}(w)$, $\mbox{{\em made-of}}(z,w)$,\\
$u=?$, $B(u,z)$, $B=?$,\\
$\mbox{{\em put}}(e_\beta,x,y,z)$, $\mbox{{\em
    hold}}(e_\beta,t_\beta)$, $t_\beta\prec n$}}
\end{subexamples}
\end{examples}
In words, the conditions in $\beta$ require the
object denoted by the definite description to
be linked by some `bridging' relation $B$ (possibly identity,
cf. \newcite{Sandt1992}) to an object $u$
identified in the discourse context \cite{AL1996}.
In {\sc sdrt}, the values
of $u$ and $B$ are computed as a byproduct of 
{\sc sdrt}'s {\em Update} function
(cf. \newcite{Hobbs1979}); one specifies $u$ and $B$ by inferring the
relevant new semantic content arising
from $R$'s coherence constraints, where $R$ is the rhetorical relation
inferred via the {\sc dice} axioms.
If one cannot resolve the conditions $u=?$ or $B=?$ through {\sc sdrs}
update, then by the above definition of well-definedness on {\sc
  sdrs}s
the discourse is incoherent (and
we have presupposition failure).  

For reasons of space, we gloss over the details given in \newcite{AL1996}
for specifying $u$ and $B$
through the {\sc sdrt} update
procedure.  However, the axiom {\tt Assume Coherence} below is
derivable from the
axioms given there.  First some notation: let
$\beta[C]$ mean that $\beta$ contains condition $C$, and
assume that $\beta[C/C']$ stands for the {\sc
  sdrs} which is the same as $\beta$, save that the condition $C$ in
$\beta$ is replaced by $C'$.  Then in
words, {\tt Assume Coherence} stipulates that if the
discourse can be coherent only if the anaphor $u$ is resolved to $x$
and $B$ is resolved to the specific relation $P$, then one {\em
  monotonically} assumes
that they are resolved this way:
\begin{itemize}
\item   {\tt Assume Coherence:}\\
$(\downarrow\mbox{{\em Update}}(\tau,\alpha,\beta[u=?,B=?/u=x,B=P])
\wedge\\
\hspace*{0.2in}(C'\neq (u=x \wedge B=P) \rightarrow \neg\downarrow\mbox{{\em
    Update}}(\tau,\alpha,\beta[u=?,B=?/C']))) \rightarrow\\
\hspace*{0.3in}(\mbox{{\em Update}}(\tau,\alpha,\beta) \leftrightarrow
\mbox{{\em
    Update}}(\tau,\alpha,\beta[u=?,B=?/u=x,B=P]))$
\end{itemize}
Intuitively, it should be clear that in (\ref{discourse}a,b) $\neg
\downarrow\mbox{{\em Update}}(\alpha,\alpha,\beta)$ holds, unless the
bag in (\ref{discourse}b) is one of the bags mentioned in
(\ref{discourse}a)---i.e., $u=Z$ and $B=\mbox{{\em member-of}}$.
For otherwise the events in (\ref{discourse}) are too `disconnected'
to support any rhetorical
relation.  On the other hand, assigning
$u$ and $B$ these values allows us to use {\tt Subtype} and {\tt
  Elaboration} to infer {\em Elaboration} (because
skirt is a kind of clothing, and the bag in (\ref{discourse}b) is one
of the bags in (\ref{discourse}a)).  So {\tt Assume Coherence}, {\tt
  Subtype} and {\tt Elaboration} yield that (\ref{discourse}b)
elaborates (\ref{discourse}a) and the bag in (\ref{discourse}b) is one
of the bags in (\ref{discourse}a).

Applying {\sc sdrt} to compounds encodes the effects of pragmatics on
the compounding relation.  For example,
to reflect the fact that compounds such as {\em apple juice seat},
which are compatible only
with {\bf general-nn}, are acceptable
only when context resolves the compound relation, we assume
that the {\sc drs} conditions
produced by this schema are: $R_c(y,x)$, $R_c=?$, and
$\neg(\mbox{{\em made-of}}(y,x) \vee \mbox{{\em contain}}(y,x)
\vee\ldots)$.
By the above definition of
well-definedness on {\sc sdrs}s, the compound is coherent only if
we can resolve $R_c$ to a particular
relation via
the {\sc sdrt} {\em Update} function, which in turn is
determined by {\sc dice}.  Rules such as {\tt Assume Coherence} 
serve to specify the necessary compound relation, so long as context
provides enough information.


\section{Integrating Lexical Preferences and Pragmatics}
\label{comb}

We now extend {\sc sdrt} and {\sc dice}
to handle the probabilistic information given in \S\ref{lexpref}.  We
want the pragmatic component to utilise this knowledge,
while still maintaining sufficient flexibility 
that less frequent senses are favoured in certain
discourse contexts.

Suppose that the new information
to be integrated with the discourse context 
is ambiguous between $\beta_1,\ldots,\beta_n$.
Then we assume that exactly one of $\mbox{{\em 
    Update}}(\tau,\alpha,\beta_i)$, $1\leq i\leq n$, holds.
We gloss this complex disjunctive formula as $\mbox{{\em
    !}}\bigvee_{1\leq i\leq n}(\mbox{{\em
    Update}}(\tau,\alpha,\beta_i))$.  Let
$\beta_k\succ \beta_j$ mean that the probability of {\sc drs}
  $\beta_k$ is greater than that of $\beta_j$.  
Then the rule schema below ensures that the most frequent possible
  sense that produces discourse coherence is (monotonically) favoured:
\begin{itemize}
\item   {\tt Prefer Frequent Senses:}\\
$(\mbox{{\em
    !}}\bigvee_{1\leq i\leq n}(\mbox{{\em
    Update}}(\tau,\alpha,\beta_i)) \wedge \downarrow\mbox{{\em
    Update}}(\tau,\alpha,\beta_j) \wedge\\
\hspace*{0.2in}(\beta_k\succ \beta_j \rightarrow
\neg\downarrow\mbox{{\em Update}}(\tau,\alpha,\beta_k)))\rightarrow\\
\hspace*{0.2in}\mbox{{\em Update}}(\tau,\alpha,\beta_j)$
\end{itemize}
{\tt Prefer Frequent Senses} is a declarative rule for disambiguating
constituents in a discourse
context.  But from a procedural perspective it captures: try to attach the
{\sc drs} based on the most probable senses first; if it works you're
done; if not, try the next most probable sense, and so on.

Let's examine the interpretation of compounds.
Consider (\ref{discourse1}):
\begin{examples}
\item   \label{discourse1}
\begin{subexamples}
\item   Mary sorted her clothes into various large bags.
\item   She put her skirt in the cotton bag.
\end{subexamples}
\end{examples}
Let's consider the representation $\beta'$ 
of (\ref{discourse1}b) with
the highest probability: i.e., the one where cotton bag means {\em bag
  made of cotton}.  
Then similarly to (\ref{discourse}), {\tt Assume
  Coherence}, {\tt Subtype} and {\tt Elaboration} are used to infer that 
{\em the cotton bag} is one of the bags mentioned in
(\ref{discourse1}a) and {\em Elaboration} holds.
Since this updated {\sc sdrs}
is well-defined, {\tt Prefer Frequent Senses} ensures
that it's true.  And so {\em
  cotton bag} means {\em bag made from cotton} in this context.

Contrast this with (\ref{discourse2}):
\begin{examples}
\item   \label{discourse2}
\begin{subexamples}
\item   Mary sorted her clothes into various bags made
  from plastic.
\item   She put her skirt into the cotton bag.
\end{subexamples}
\end{examples}
$\mbox{{\em Update}}(\alpha,\alpha,\beta')$ is not well-defined
because the {\em cotton bag} cannot be one of the bags in 
(\ref{discourse2}a).
On the other hand, $\mbox{{\em Update}}(\alpha,\alpha,\beta'')$ is
well-defined, where $\beta''$ is the {\sc drs} 
where {\em cotton bag} means {\em bag containing
  cotton}.   This is because one can now assume this bag is one of the
bags mentioned in (\ref{discourse2}a), and therefore {\em Elaboration}
can be inferred as before.  So {\tt Prefer Frequent
  Senses} ensures that $\mbox{{\em Update}}(\alpha,\alpha,\beta'')$
holds but $\mbox{{\em Update}}(\alpha,\alpha,\beta')$ does not.

% {\tt Prefer Frequent Senses} predicts {\em diet} has its food sense in
% (\ref{disagree}) (assuming $\mbox{{\em
%    Update}}(\emptyset,\emptyset,\alpha) = \alpha$), but it has the
% law-maker sense in (\ref{diet-discourse}), because 
% {\sc sdrt}'s coherence
% constraints on {\em Contrast} (\cite{Asher1993})---which is the
% relation required for {\em Update}
% because of the cue word {\em but}---can't be met when
% {\em diet} means {\em food}.
% \begin{examples}
% \item   \label{diet-discourse}
% \begin{subexamples}
% \item   US citizens voted in favour of the President's bill in the
% referendum. 
% \item   But the diet disagreed with the president.
% \end{subexamples}
% \end{examples}
% above para is redundant - delete it
% Comment---Ann---I have commented this out, because you're right that
% there's not enough space for it.  But I thought it might be
% important to remind them that this isn't just a story about how to
% choose senses for compounds; it's a story about how to choose senses
% generally.  If there's enough room in the full paper (should this be
% accepted!!), then I guess I would like this to go back in.

In general, pragmatic reasoning is computationally expensive, even in
very restricted domains.  But the account of disambiguation we've offered
circumscribes pragmatic reasoning as much as possible.  All
nonmonotonic reasoning remains packed into the definition of
$\mbox{{\em Update}}(\tau,\alpha,\beta)$, where one needs pragmatic
reasoning anyway for inferring rhetorical relations.  {\tt Prefer
  Frequent Senses} is a monotonic rule, it doesn't increase the load
on nonmonotonic reasoning, and it doesn't introduce extra pragmatic
machinery peculiar to the task of disambiguating word senses.  Indeed,
this rule offers a way of checking whether fully specified relations
between compounds are acceptable, rather than relying on (expensive)
pragmatics to compute them.

We have mixed stochastic and symbolic reasoning.
Hobbs {\em et al} \shortcite{Hobbs:90} also mix numbers and rules
by means of
weighted abduction.  However, the theories differ in several important
respects.  First, our pragmatic
component has no access to word forms and syntax (and so it's not
language specific), whereas
Hobbs {\em et al}'s 
rules for pragmatic interpretation can access these knowledge sources.
Second, our probabilities encode the frequency of
word senses associated with word forms.
In contrast, the weights that guide abduction 
correspond to a wider variety of information, and do not
necessarily correspond to word sense/form frequencies.  Indeed, it is
unclear what meaning is conveyed by the weights,
and consequently the means by which they can be
computed are not well understood.

\section{Conclusion}
\label{conc}

We have demonstrated that compound noun interpretation requires the
integration of the lexicon, probabilistic information and pragmatics.
A similar case can be made for the interpretation of 
morphologically-derived forms and words in 
extended usages.  We believe that the proposed architecture is theoretically
well-motivated, but also practical, since
large-scale semi-automatic acquisition of
the required 
frequencies from corpora is feasible, though admittedly time-consuming.
However further work is required before we can demonstrate this,
in particular
to validate or revise the formulae in
\S\ref{lexpref} and
to further develop the compound schemata.

\newcommand{\book}[4]{\item #1 (#4) {\it #2,} #3.}
\newcommand{\booknoauthor}[3]{\item #1 (#3) {\it #2,}}
\newcommand{\forthedbook}[4]{\item #1 (eds.) (#4, forthcoming) {\it #2,} #3.}
\newcommand{\unpub}[4]{\item #1 (#4) {\it #2,} #3.}
\newcommand{\bookart}[7]{\item #1 (#7) `#2' in #5 (eds.), {\it #4,} #6, pp.~#3.}
\newcommand{\bookartnopp}[6]{\item #1 (#6) `#2' in #4 (eds.), {\it #3,} #5.}
\newcommand{\forthart}[6]{\item #1 (#6, forthcoming) `#2' in #4 (eds.),
{\it #3,} #5.}
\newcommand{\journart}[6]{\item #1 (#6) `#2', {\it #3,} {\it vol.#4,}
#5.}
\newcommand{\journartnopp}[5]{\item #1 (#5) `#2', {\it #3,} {\it vol.#4,}}
\newcommand{\forthjournart}[4]{\item #1 (#4) `#2', {\it #3,}}
\newcommand{\procart}[6]{\item #1 (#6) `#2', {\it Proceedings of the
#3,} #4, pp.~#5.}
\newcommand{\forthprocart}[5]{\item #1 (#5) `#2', 
to appear in {\it Proceedings of the #3,} #4}
\newcommand{\procartnopp}[5]{\item#1 (#5) `#2', {\it Proceedings of the 
#3,} #4.}


\begin{thebibliography}{}

\bibitem[\protect\citename{Asher}1993]{Asher1993}
\book{Asher, N.}
{Reference to Abstract Objects in Discourse}
{Kluwer Academic Publishers}
{1993}

\bibitem[\protect\citename{Asher and Lascarides}1995]{AL1995}
\journart{Asher, N. and A. Lascarides}
{Lexical Disambiguation in a Discourse Context}
{Journal of Semantics}
{12.1}
{69--108}
{1995}

\bibitem[\protect\citename{Asher and Lascarides}1996]{AL1996}
\unpub{Asher, N. and A. Lascarides}
{Bridging}
{Proceedings of the International Workshop on Semantic
  Underspecification, Berlin, October 1996, available from the Max
  Plank Institute.}
{1996}

\bibitem[\protect\citename{Bauer}1983]{bauer}
\book{Bauer, L.}
     {English word-formation}
     {Cambridge University Press, Cambridge, England}
     {1983}

\bibitem[\protect\citename{Briscoe and Copestake}1996]{B+C}
\procartnopp{Briscoe, E.J. and A.~Copestake}
{Controlling the application of lexical rules}
{ACL SIGLEX Workshop on Breadth and Depth of Semantic Lexicons}
{Santa Cruz, CA}
{1996}


\bibitem[\protect\citename{Downing}1977]{downing}
\journart{Downing, P.}
        {On the Creation and Use of English Compound Nouns}
        {Language}
        {53(4)}
        {810--842}
        {1977}


\bibitem[\protect\citename{Dowty}1979]{dowty79}
\book{Dowty, D.}
     {Word meaning in Montague Grammar}
     {Reidel, Dordrecht}
     {1979}

\bibitem[\protect\citename{Hobbs}1979]{Hobbs1979}
\journart{Hobbs, J.}
{Coherence and Coreference}
{Cognitive Science}
{3}
{67--90}
{1979}

\bibitem[\protect\citename{Hobbs}1985]{Hobbs1985}
\unpub{Hobbs, J.}
{On the Coherence and Structure of
Discourse}  
{Report No. CSLI-85-37, Center for the Study of
Language and Information}
{1985}


\bibitem[\protect\citename{Hobbs et al}1993]{Hobbs:90}
\journart{Hobbs, J.R., M.~Stickel, D.~Appelt and P.~Martin}
   {Interpretation as Abduction}
   {Artificial Intelligence}
        {63.1}
        {69--142}
   {1993}

\bibitem[\protect\citename{Johnston et al}1995]{Jetal:95}
\procartnopp{Johnston, M., B.~Boguraev and J.~Pustejovsky}
{The acquisition and interpretation of complex nominals}
{AAAI Spring Symposium on representation and acquisition
of lexical knowledge}
{Stanford, CA}
{1995}

\bibitem[\protect\citename{Johnston and Busa}1996]{Johnston+Busa}
\procartnopp{Johnston, M. and F.~Busa}
{Qualia structure and the compositional interpretation of compounds}
{ACL SIGLEX workshop on breadth and depth of semantic lexicons}
{Santa Cruz, CA}
{1996}

\bibitem[\protect\citename{Jones}1995]{jones}
\procartnopp{Jones, B.}
{Predicting nominal compounds}
{17th Annual conference of the Cognitive Science Society}
{Pittsburgh, PA}
{1995}


\bibitem[\protect\citename{Kamp and Reyle}1993]{KR1993}
\book{Kamp, H. and U.~Reyle}
    {From Discourse to Logic: an
introduction to modeltheoretic semantics, formal logic and Discourse
Representation Theory}
    {Kluwer Academic Publishers, Dordrecht, Germany}
    {1993}



%\bibitem[\protect\citename{Krifka}1987]{krifka}
%\procart{Krifka, M.}
%{Nominal reference and temporal constitution: towards a semantics of quantity}
%    {6th Amsterdam Colloquium}
%    {University of Amsterdam}
%    {153--173}
%    {1987}


\bibitem[\protect\citename{Lascarides and Asher}1993]{LA1993}
\journart{Lascarides, A. and N. Asher}
{Temporal Interpretation, Discourse Relations and Commonsense
  Entailment}
{Linguistics and Philosophy}
{16.5}
{437--493}
{1993}


\bibitem[\protect\citename{Lascarides et al}1996]{pdu}
\journart{Lascarides, A., E.J.~Briscoe, N.~Asher and A.~Copestake}
      {Persistent associative default unification}
      {Linguistics and Philosophy}
      {19:1}
      {1--89}
      {1996}

\bibitem[\protect\citename{Lascarides and Copestake}1995]{LC1995}
\procartnopp{Lascarides, A. and A. Copestake}
{The Pragmatics of Word Meaning}
{SALT V}
{Austin, Texas}
{1995}

\bibitem[\protect\citename{Lascarides, Copestake and Briscoe}1996]{LCB1996}
\journart{Lascarides, A., A. Copestake and E. J. Briscoe}
{Ambiguity and Coherence}
{Journal of Semantics}
{13.1}
{41--65}
{1996}


\bibitem[\protect\citename{Levi}1978]{levi}
\book{Levi, J.}
     {The syntax and semantics of complex nominals}
     {Academic Press, New York}
     {1978}


\bibitem[\protect\citename{Liberman and Sproat}1992]{lib+spr}
\bookart{Liberman, M. and R. Sproat}
{The stress and structure of modified noun phrases in English}
{131--182}
{Lexical matters}
{I.A.~Sag and A.~Szabolsci}
{CSLI Publications}
{1992}


\bibitem[\protect\citename{Link}1983]{link}
\bookart{Link, G.}
    {The logical analysis of plurals and mass terms: a lattice-theoretical
approach}
    {302--323}
    {Meaning, use and interpretation of language}
    {B\"{a}uerle, Schwarze and von Stechow}
    {de Gruyter, Berlin}
    {1983}

\bibitem[\protect\citename{Polanyi}1985]{Polanyi1985}
\procart{Polanyi, L.}
{A Theory of Discourse Structure and Discourse
Coherence}
{Papers from the General Session at the Twenty-First
Regional Meeting of the Chicago Linguistics Society}
{Chicago}
{25--27}
{1985}

\bibitem[\protect\citename{Pustejovsky}1995]{Pust:95}
\book{Pustejovsky, J.}
    {The Generative Lexicon}
    {MIT Press, Cambridge, MA}
    {1995}


\bibitem[\protect\citename{van der Sandt}1992]{Sandt1992}
\journartnopp{van der Sandt, R.}
{Presupposition Projection as Anaphora Resolution}
{Journal of Semantics}
{19.4}
{1992}

\bibitem[\protect\citename{Sparck Jones}1983]{ksj}
\bookart{Sparck Jones, K.}
{So what about parsing compound nouns?}
{164--168}
{Automatic natural language parsing}
{K.~Sparck Jones and Y.~Wilks}
{Ellis Horwood, Chichester, England}
{1983}

\bibitem[\protect\citename{Webber}1991]{Webber1991}
\journart{B. Webber}
{Structure and Ostension in the Interpretation of Discourse Deixis}
{Language and Cognitive Processes}
{6.2}
{107--135}
{1991}


\end{thebibliography}

\end{document}





%%% Local Variables: 
%%% mode: latex
%%% TeX-master: t
%%% End: 







