\documentclass[11pt]{article}
\RequirePackage{paper}
\draftfalse
% \drafttrue


%% put all macros in paper.sty


% \title{A Simple Semantics for Traversals in Object Graphs}
\title{Navigating through Object Graphs Using Local Meta-Information}
\source{papers/navigation-01/paper.tex}



\begin{document}
\maketitle

\begin{abstract}

  Traversal through object graphs is needed for many programming
  tasks.  We show how this task may be specified declaratively at a
  high level of abstraction, and we give a simple and intuitive
  semantics for such specifications.  The algorithm is implemented in
  a Java library called DJ.

%  Traversal through object graphs is needed for many programming
%  tasks.  We present the semantics of a graph-based language,
%  called traversal strategies, for expressing object traversal at a
%  high level of abstraction.  The contribution of the paper lies in
%  the simple and intuitive semantics of how to search through an
%  object graph given its meta-information (class graph) and
%  declarative traversal instructions (traversal strategies) expressed
%  in terms of the class graph.  We also derive an algorithm and show
%  that it implements the semantics correctly.


%\cmt{Latest version:}

%Traversal through object graphs is needed for many programming tasks.
%In most programs, the programmer needs to deal with many traversal
%concerns whose implementation is often scattered across several
%classes and tangled with other concerns. We present the semantics of a
%graph-based language, called traversal strategies, for encapsulating
%and localizing traversal concerns and expressing them at a high level
%of abstraction. This eliminates scattering of traversal concerns and
%controls the tangling with other concerns. The contribution of the
%paper lies in the simple and intuitive semantics of how to search
%through an object graph given its meta-information (class graph) and
%declarative traversal instructions (traversal strategies) expressed in
%terms of the class graph. We also derive an algorithm and show that it
%implements the semantics correctly.

\end{abstract}

\section{Introduction}

A common task in object-oriented programming is to take an object $o$
in an object graph and find all the objects $o'$ that are reachable
from $o$ according to some search criterion.  For example, given an
object $o$ of class {\tt A}, find all the objects of class \texttt{C}
that are reachable from $o$ along a path that goes through some object
of class \texttt{B}.  In order to make the search tractable, we
typically search based on \emph{local meta-information}.  In such a
search, we search only those edges from $o$ that \emph{might} lead to
an object that satisfies the search criterion, based on the class
structure of the object graph.  We may explore objects that are not on
the path to a desired target object, but that is an unavoidable
consequence of searching based only on meta-information.

In this paper we present a simple semantics for such a search, and
introduce an algorithm for performing the search, given a search
criterion like the one above.  The semantics and algorithm address two
technical problems: the meaning of the modal operator ``might'', and
the treatment of inheritance. We give an example of this algorithm as
embodied in the DJ library for Java \cite{DJ}.
  
Section~\ref{sec:notation} presents our notation, and
section~\ref{sec:definitions} presents our model of classes and
objects.  In section~\ref{sec:the-problem}, we formulate the basic
traversal problem in the terms of our model.  The key to the problem
is to find a set $\FIRST(c,c')$ of edges such that $e \in \FIRST(c,c')$ iff it
is possible for an object of class $c$ to reach an object of type $c'$
by a path beginning with an edge $e$.  Section~\ref{sec:algorithms}
gives semantics to the core traversal specifications of DJ and shows
how to interpret them as search algorithms.  Section~\ref{sec:static}
completes the story by showing how to compute the $\FIRST$ sets
statically.  Section~\ref{sec:simplexample} presents an example
showing how these concepts are employed in the DJ library.
Section~\ref{sec:related-work} discusses related work.  Finally,
section~\ref{sec:conclusions} presents some conclusions and ideas for
further development.


\section{Notation}
\label{sec:notation}

We will be using relations as our fundamental tool, so we will need
some notation for dealing with relations.

If $A$ and $B$ are sets, a relation from $A$ to $B$ is a subset $R$ of
$A \times B$.  If $a,b \in R$, we will write $R(a,b)$, $a
\mathrel{R}b$ , and $(a,b) \in R$ interchangebly.  A relation from $A$
to $A$ is often called a relation \emph{on} $A$.

We denote composition of relations by concatenation \eg $x \mathrel{(R S)} z$ iff there exists a $y$ such that $x \R y$ and $y \S z$.  We
also write $x \R y \S z$.   $R^*$ denotes the reflexive transitive closure of $R$.

%If
%$B$ is a set, we will occasionally write $R \comp B$ for \setof{a \alt
%  \exists b \in B\ \mathit{s.t.\ } a \R b}.

We often think of directed graphs as relations (and vice versa), so we
write $C(c_1,c_2)$ or $c_1 \mathrel{C} c_2$ when there is an edge from
$c_1$ to $c_2$ in $C$.  We take as given the definition of a path in a
directed graph.


\section{The Model}
\label{sec:definitions}

\begin{definition}
  A \emph{class graph} (sometimes called a \emph{class diagram})
  consists of a set $C$ (of ``classes''), a set $E$ (of field names),
  for each $e \in E$ a relation (also named $e$) on classes (``has
  part named $e$''), and a reflexive, transitive relation $\le$ on
  classes (``is a subclass of''). We write $C(c_1, c_2)$ iff there
  exists $e \in E$ such that $e(c_1, c_2)$.
\end{definition}

Each relation $e$ codes the effect of finding the $e$ part of an
object.  Usually the relation $e$ is a partial function (that is, for
any $c_1$, there is at most one $c_2$ such that $e(c_1,c_2)$), but we
will not need this property.  When $e(c_1,c_2)$, we sometimes say that
$c_1$ has an $e$-part of type $c_2$.  (The significance of the word
``type'' will be explained momentarily).   We use $\le$ to denote the
reflexive, transitive closure of the inheritance relation, so $c \le
c'$ means that $c$ is either the same as $c'$ or is one of $c'$'s
descendants. 

We use $C$ to denote the entire class graph $\langle C, E, \le
\rangle$.  We write $\ge$ for the inverse of $\le$.

An object graph is a model of the objects, represented in the heap or
elsewhere, and their references to each other:

\begin{definition}
  If $C$ is a class graph, then an \emph{object graph} for $C$ consists
  of:
  \begin{enumerate}
  \item a set $O$ (of ``objects''),
  \item a map $\class : O \to C$, and
  \item for each $e \in E$, a relation (also denoted $e$) on $O$
  \end{enumerate}
such that if $e(o_1,o_2)$, then $$\class(o_1) \mathrel{(\le e \ge)}
 class(o_2)$$

  We say that $o$ is of \emph{type} $c$ when $\class(o) \le c$.
\end{definition}

An object is of type $c$ when its class is some class that is a
descendant of $c$.  This corresponds to the usual expectation in a
typed object-oriented language: if a variable is of type $c$, its
value is either null or is an object whose class is either $c$ or a
descendent of $c$.

The traversal of an edge labeled $e$ corresponds to retrieving the
value of the $e$ field.  Condition 3 captures the notion that every
edge in the object graph is an image of a has-as-part edge in the
class graph: There is an edge $e(o_1,o_2)$ in $O$ only when there
exist classes $c_1$ and $c_2$ such that $o_1$ is of type $c_1$, $c_1$
has an $e$-part of type $c_2$, and $o_2$ is of type $c_2$, that is, 
$$\class(o_1){} \le c_1 \mathrel{e} c_2 \ge {}\class(o_2)$$  See
figure~\ref{fig:single-link}. 

\begin{figure}[htbp]
  \begin{center}
   \diagram{single-link.eps}
    \caption{Typical link in an object graph.  If there is an $e$-edge
      from $o1$ to $o2$, then there is a path\ $\class(o1){} \le
      \mathrel{e} \ge {}\class(o2)$ in the class graph.}
    \label{fig:single-link}
  \end{center}
\end{figure}


As we did for class graphs, we use $O$ to denote the entire object
graph whose set of objects is $O$.

All parts are optional (allowing for null values) or multi-valued
(for a given object $o_1$, there may be many objects $o_2$ such that
$e(o_1, o_2)$).  The latter case allows us to handle collections:
if class $c_1$ contains a field $e$ that is a collection of objects of
type $c_2$, we may represent this as $e(c_1,c_2)$ and use multi-valued
edges in the object graph, rather than introduce the notion of
collections into our model.

We might propose the additional condition that if $\class(o_1) \le c_1$
and $e(c_1,c_2)$, then there exists an $o_2$ such that $e(o_1,o_2)$ and
$\class(o_2) \le c_2$.  This means that every edge predicted by the
class graph exists in the object graph.  All the theorems in the paper
are true under this stronger definition, but the additional condition
is undesirable because it rules out null fields.


\section{The Problem}
\label{sec:the-problem}

The computational problem we wish to study is the following:

We are at an object $o$ of class $c$ in object graph $O$  and we wish
to find all reachable objects of type $c'$.  However, we have no
information about the object graph other than that it is a legal
object graph for $C$. Which edges must we explore in order to find all
these objects?

We can formalize the problem as follows.  For each pair of classes $c$
and $c'$, we need to find a set $\FIRST(c,c')$ such that $e \in
\FIRST(c,c')$ iff it is possible for an object of class $c$ to reach
an object of type $c'$ by a path beginning with an edge $e$.  More
precisely,
$$
\FIRST(c,c') = \{ e \in E \alt 
\begin{tabular}[t]{@{}l}
there exists an object graph $O$ of $C$ and \\
objects $o$ and $o'$ such that: \\
\quad 1. $\class(o) = c$, \\
\quad 2. $\class(o') \le c'$ \\
\quad 3. $o \mathrel{e}  \mathrel{O^*} o'$
\}
\end{tabular}
$$
The last condition,  $o \mathrel{e}  \mathrel{O^*} o'$ says that there
is a path from $o$ to $o'$ in the object graph, consisting of an edge
labelled $e$, followed by any sequence of edges in the graph.

Our lack of information about the actual object graph is represented
by the existential operator.  Since we cannot search explicitly over
all object graphs, our goal is to find a static algorithm to compute
these sets.



\section{Traversal Algorithms}
\label{sec:algorithms}

Before considering an algorithm for finding the \FIRST sets, we
consider some applications of these sets. We can use these sets to
find not just reachable objects of a given type, but paths that pass
through objects of a given type.

\begin{definition}
  Let $R = (c_1, \dots, c_K)$ be a non-empty sequence of classes.  Let
  $p = (o_1, \dots ,o_N)$ be a path in $O$.  We say that $p$ is an
  \emph{$R$-path} iff there is a subsequence $o_{j_1} , \dots,
  o_{j_K}$ ($K \ge 1$) of $p$ such that for each $i$, $o_{j_i}$ has type $c_i$,
  and $j_K = N$ (that is, the last element of the path must be part of
  the subsequence).  We say an $R$-path is \emph{minimal} iff it has
  no initial segment that is also an $R$-path.
\end{definition}

Thus an $R$-path is a path through the object graph that passes
through objects of the types specified by $R$ in the order specified
by $R$.  It may pass through objects of other classes along the way,
but it must end at the endpoint of $R$. 


Given an object $o$, we can visit all the endpoints of minimal R-paths
starting at $o$ as follows:

\begin{algorithm}
\textbf{Algorithm} $\search(o,R)$: 
  
  Let $c_1$, \dots, $c_n$ be the classes such that $R = (c_1,\dots, c_n)$.
  \begin{enumerate}
  \item If $o$ does not have type $c_1$, then for each $e$ in
    $\FIRST(class(o),c_1)$, and each $o'$ such that $e(o,o')$, do
    $\search(o', R)$.   
  
  \item If $o$ has type $c_1$, and $R = (c_1)$, then do $\visit(o)$.

  \item If o has type $c_1$, and $R = (c_1, c_2,\dots ,c_n)$, then do
    $\search(o,(c_2,\dots ,c_n))$.
  \end{enumerate}
\end{algorithm}
  
In case 1 we are not yet on a path, so we follow the \FIRST edges to
guide us to the first goal type $c_1$.  We can find the required
objects $o'$ by retrieving the value of $o$'s $e$-field.  In case 2,
we have reached the last goal type, so we visit the object.  In case 3
we have reached the first goal type, so we recur on the rest of the
goal path.



We could find all $R$-paths, instead of just the minimal ones, by
modifying step 2 to continue searching:

\begin{algorithm}
  \begin{enumerate}
  \item[$2'$]  If $o$ has type $c_1$, and $R = (c_1)$, then do  $\visit(o)$.
    Then for each $e$ in
    $\FIRST(class(o),c_1)$, and each $o'$ such that $e(o,o')$, do
    $\search(o', R)$. 
\end{enumerate}
\end{algorithm}

If the object graph is acyclic, these algorithms will always
terminate, since every step either decreases the longest chain of
links in the object graph or decreases the length of $R$.  If the
graph may be cyclic, then we need to mark each searched object with
the state $R$ in which it was reached, or else carry around the set of
pairs $(o, R)$ that we have already searched.  This will allow us to
avoid repeated visits to the same object in the same traversal state.



DJ allows the use of a graph, called a \emph{strategy graph}, to
specify a more complex traversal \cite{strategies-tr:LP97}.  A
strategy graph can be modeled as a non-deterministic finite-state
automaton: 

\begin{definition}
  A \emph{strategy graph} is given by a set of states $Q$, a relation $S$
  on states, a map $\class : Q \to C$, a set $QI \subseteq Q$ of initial
  states, and a set $QF \subseteq Q$ of final states.  We denote such a
  strategy graph by $S$.
\end{definition}


% \begin{verbatim}

\begin{definition}
  A path $p = (o_1,\dots ,o_N)$ in $O$ is an \emph{$S$-path} iff there
  is a subsequence $o_{j_1} ,\dots ,o_{j_K}$ ($K \ge 1$) of $p$ and a path
  $(q_1,\dots ,q_K)$ in $S$ such that for each $i$, $o_{j_i}$ has type
  $\class(q_i)$, $j_K = N$, and $q_K \in QF$.  As before, we say that
  an $S$-path is \emph{minimal} iff it has no initial segment that is
  also an $S$-path.
    \end{definition}

Given an object $o$, we can visit all the endpoints of minimal $S$-paths
starting at $o$ as follows, where $Q'$ ranges over subsets of $Q$:



\begin{algorithm}
  \textbf{Algorithm} $\search(o,S): \search'(o,QI)$, where for any $Q'
  \subseteq Q$, $\search'(o,Q')$ is defined by:

\begin{algorithm}
  \textbf{Procedure $\search'(o,Q')$}:

\begin{enumerate}
\item If $class(o) \not\le \class(q)$ for any $q \in Q'$, then for
  each $q \in Q'$, for each $e$ in $\FIRST(class(o),class(q))$, and
  each $o'$ such that $e(o,o')$, $\search'(o', Q')$.
  
\item If $class(o) \le \class(q)$ for some $q \in Q' \cap QF$, then
  $\visit(o)$.
  
\item Otherwise let $Q'' = \setof{q \in Q' \alt o$ does not have type
    $class(q)} \cup \setof {q'| \exists\ q \in Q' $ such that $o$ has
    type $\class(q)$ and $S(q,q')}$.  Then $\search'(o,Q'')$.
  \end{enumerate}
\end{algorithm}
\end{algorithm}

This algorithm is much like the preceding one, except that the set
$Q'$ maintains the state of a run through the non-deterministic
automaton $S$.  In step 1, we are not at a point on the subsequence,
so we use the \FIRST sets to search any edge that may lead us to any
of the first goal classes.  In step 2, we have reached a final state,
so we visit the object we have reached.  In step 3, we are at a set of
states $Q'$.  Some of those states represent goal classes that we have
not yet reached.  Other states are goal classes that we have now
reached.  We create the next set of goal classes by carrying along
those that we have not yet reached, and taking a step in the automaton
$S$ for those that we have reached.

We typically start the algorithm with a unique start state and an
object $o$ that matches that state.

As before, when the object graph is acyclic, this always terminates.
If the object graph can be cyclic, then we need to carry around a
``seen'' set, as above.

\section{Calculating \FIRST}
\label{sec:static}

We develop a static description of \FIRST using a sequence of lemmas.
This allows us to compute \FIRST from the class graph.
We start with a fixed class graph $C$.

\begin{lemma}\label{lemma1}
  There exists an object graph \O of \C and objects $o_1$, $o_2$
  such that $O(o_1,o_2)$ iff $\class(o_1) \mathrel{\le C \ge} \class(o_2)$.
\end{lemma}

\proof The forward direction is immediate from the definition of an
object graph of \C.  For the reverse direction, construct an object
graph with two objects $o_1$ and $o_2$ and the specified link.  The
result is an object graph of \C.

\begin{lemma}\label{lemma2}
  There exists an object graph \O of \C and objects $o_1$, $o_2$
  such that $O^*(o_1,o_2)$ iff $class(o_1) \mathrel{(\le C \ge)^*}
  class(o_2)$ . 
\end{lemma}

\proof Again, the forward direction is immediate.  For the reverse,
apply induction on the standard definition of $(-)^*$ (reflexive transitive
closure), using the preceding lemma.

A picture of this situation is shown in figure~\ref{fig:paths}.

\begin{figure}[htbp]
  \begin{center}
    \diagram{paths.eps}
    \caption{A path in an object graph. If there is a path in the graph
      from $o_1$ to $o_2$, then $class(o_1) \mathrel{(\le C \ge)^*}
      class(o_2)$. }
    \label{fig:paths}
  \end{center}
\end{figure}


\begin{lemma}\label{lemma3}
  Let $c_1$ and $c_2$ be classes.  Then there exists an object
  graph \O of \C and objects $o_1$, $o_2$ such that $\class(o_1) \le c_1$ and
  $\class(o_2) \le c_2$ and $O^*(o_1,o_2)$ iff $c_1 \ge \class(o_1)
  \mathrel{(\le C \ge)^*} \class(o_2) \le c_2$.
\end{lemma}

\proof Immediate.

\begin{theorem}
  $$\FIRST(c,c') = \setof{e \alt c \mathrel{(\le e \ge) (\le C \ge)^* \le} c'} $$
\end{theorem}

\proof   We must show that there exists an object graph \O of \C and
objects $o$ and $o'$ such that 
\begin{enumerate}
\item $\class(o) = c$ 
\item $\class(o') \le c'$
\item $o \mathrel{e} \mathrel{O^*} o'$
\end{enumerate}
iff $c \mathrel{(\le e \ge) (\le C \ge)^* \le} c'$.

The forward direction is immediate from the definition of an object
graph of \C.  For the reverse definition, consider a sequence of
classes such that

$$c (\le e \ge) c_1 (\le e_1 \ge) c_2 (\le e_2 \ge) \dots (\le e_{n+1} \ge) c_n \le c'$$

Then construct an object graph with $n+2$ objects, of classes $c$,
$c_1$, \dots ,$c_n$, $c'$, with a link labelled $e$ from the
$c$-object to the $c_1$ object, a link labelled $e_1$ from the $c_1$
object to the $c_2$ object, etc. Let $o$ be the first object in this
sequence and $o'$ be the last. This object graph
satisfies the requirements.  \qed

Similarly, to find all the edges from an object of class $c$ that
might lead to an edge $e$, we compute

$$\FIRST(c,e) = \setof{e' \alt (\exists c')(c \mathrel{(\le e' \ge)
    (\le C \ge)^* \le e} c')}
$$


\section{A Simple Example}
\label{sec:simplexample}

We illustrate automated traversal generation with a simple example,
written in Java using the DJ library.  

Consider an equation system that contains equations with a 
left-hand side and a right-hand side.
A typical set of equations might be

\begin{verbatim}
X1 = (X2 + X3)
X2 = (X5 + (X3 * X1))
X3 = (X5 + (7 * 5))
\end{verbatim}

A possible class diagram for these structures is shown in Figure
\ref{Eq1}, using notation similar to that of our previous figures. The
asterisks indicate one-many relations; they are included for UML
compatibility but are not part of our model; in our model has-as-part
relations are always possibly one-many.

\begin{figure}
%\diagram{figs/equations.eps}
\centerline{\epsfig{file=figs/equations.eps,height=3in}}
%\centerline{\epsfig{file=figs/equations.eps}}
%\centerline{\epsfig{file=figs/Eq1.eps,height=4in,width=\textwidth,angle=270}}
\caption{Class graph for {\sf EquationSystem}.}
\label{Eq1}
\end{figure}



In DJ, we search for all minimal $(c_1, \dots, c_K)$-paths using the
search criterion \def\c#1{\ma{c_{#1}}}
\begin{alltt}
through \c1 through \c2 \dots to \c{K}
\end{alltt}
DJ traversal specifications allow several extensions of this notation.
The criterion
\begin{alltt}
from \c0 through \c1 through \c2 \dots to \c{K}
\end{alltt}
has the same behavior as the search above, but fails if its starting
point is not of type \c0.  DJ also allows edge specifications of the
form \texttt{through $e$}.  This can be implemented by extending
$\FIRST(c_1, c_2)$ to $\FIRST(c, e)$ as defined in
section~\ref{sec:static} and modifying the algorithm in
section~\ref{sec:algorithms} correspondingly.

A a DJ traversal specification may include a clause \texttt{bypassing
  \c1} specifying that the portion of the path in which this clause
occurs may not pass through an object of class \c1.  The DJ library
also includes ways to specify strategy graphs
\cite{strategies-tr:LP97}.

In our example, let us say that a variable is \emph{defined} if it
appears on the left-hand side of an equation and that it is
\emph{used} if it appears on the right-hand.  In the example,
variables \texttt{X1}, \texttt{X2} and \texttt{X3} are defined, and
variables \texttt{X2}, \texttt{X3} and \texttt{X5} are used.  The
purpose of our Java program is to collect the defined and used
variables.

To solve this problem, we identify two traversals.  The first
traversal may be written as \texttt{from EquationSystem through rhs to
  Variable}.  The second may be written as \texttt{from Equation
  System through lhs to Variable}.

There are of course many ways of specifying the same traversals.  For
example the first could have been written as \texttt{from
  EquationSystem through rhs through Expression to Variable}, and the
second could have been written as \texttt{from EquationSystem
  bypassing Expression to Variable}.

The DJ library uses reflection to compute the relevant \FIRST sets.
This has two advantages: first, it allows the same code to be reused
even if the class structure changes.  Second, it allows the system to
be implemented as a pure Java library rather than as a preprocessor.
This is an example of \emph{adaptive} behavior \cite{karl:demeter}.

\begin{figure}\small
\begin{verbatim}
class EquationSystem{
  static ClassGraph cg = new ClassGraph(); 
  // constructors omitted ...
  HashSet collectVars(String travspec){
    Visitor v = new Visitor(){
      HashSet return_val = new HashSet();
      void before(Variable v1){return_val.add(v1);}
      public Object getReturnValue(){return return_val;}
    };
    cg.traverse(this, travspec, v);
    return (HashSet)v.getReturnValue();             
  }
  HashSet getUsedVars() { 
    return this.collectVars
     ("from EquationSystem through Expression"
       + "through rhs to Variable"); 
  }
  HashSet getDefinedVars() {
    return this.collectVars
     ("from EquationSystem bypassing Expression"
      + "to Variable"); 
    }
}

\end{verbatim} 
\caption{Finding the variables in an equation system}
\label{1x}
\end{figure}

The code for this task is shown in figure~\ref{1x}.  The static member
\texttt{cg} is bound to a \texttt{ClassGraph} object that contains a
representation of the current class diagram\footnote{In a real
  application, this would probably done globally rather than on a
  per-class basis.}.  The method \textsf{collectVars} takes a string
\texttt{travspec} that specifies the traversal to be performed.  It
first creates a visitor \texttt{v} to implement the behavior that is
to be performed during the traversal. Once the visitor is created,
\texttt{collectVars} then initiates the traversal by calling the
\texttt{traverse} method of the current class graph \texttt{cg},
passing as arguments the object at which to start the traversal
(\texttt{this}), the traversal to be performed (\texttt{travspec}),
and the visitor to be executed along the way (\texttt{v}).

The execution of \texttt{cg.traverse(this, travspec, v)} follows the
visitor pattern \cite{gang-of-4}, augmented by reflection.  Every time
the traversal reaches a node $n$ for which a \texttt{before} method
has been defined, it calls \texttt{v.before($n$)}.  The traversal
algorithm uses reflection to call the \texttt{before} method only on
objects for which a \texttt{before} method has been defined.  Thus,
when the node is a variable, the method \texttt{before(Variable v1)}
(the ``visitor method'') is called, adding the node to the set.
%on nodes of
%other classes, the call to  \texttt{v.before($n$)} will be handled by
%the \texttt{Visitor} class, with the empty behavior. 
Similarly, on
completion of the search, the traversal calls
\texttt{v.getReturnValue()}, which returns the collected hash set.

The DJ library includes a powerful language of traversal
specifications and methods, including the possibility of precomputing
the \FIRST sets, and of attaching methods to edge traversals; see
\cite{DJ}.   An introduction to DJ, with emphasis on its reflective
properties, may be found in \cite{OrleansLieberherrReflection01}.

\section{Related Work}
\label{sec:related-work}

% \input related-work.tex

The {\em Visitor} design pattern is discussed in
many software-engineering works (e.g.,
\cite{gang-of-4}).  In contrast with the ordinary
visitor pattern, no preparation in the underlying
objects is necessary in order to use the DJ
library.

%While this
%approach identifies and isolates the task of traversal, no mechanism
%to automate the task and make it adaptive was previously proposed,
%except in \cite{karl:demeter,strategies-tr:LP97}. 
                                                  


The concept of automated traversal generation
using succinct representation was introduced in
\cite{karl:comp-enh}.  It was formalized in
\cite{lieber-palsberg-xiao94} and is extensively
treated in \cite{karl:demeter}, where it is the
key idea of Adaptive Programming (AP), and is used
to implement the Adaptive Visitor pattern
\cite[pages 426--427]{karl:demeter}.

Our most complex language of traversal
specifications, called \emph{strategy graphs}, is introduced in
\cite{strategies-tr:LP97} together with an efficient implementation.
This paper provides a self-contained description of the semantics and
algorithms for Adaptive Programming in a few pages.  Instead of
refering to paths in the class diagram as \cite{strategies-tr:LP97} does, the
basic meaning is defined directly in terms of object graphs.  By
dealing directly with the search algorithm in the object graph, it
avoids the complications of the traversal histories of
\cite{lieber-palsberg-xiao94}.


Our use of the visitor pattern may be regarded as an example of
Aspect-Oriented Programming \cite{CACM01}.  Aspect-oriented
programming systems typically consist of a framework that identifies a
set of events during the computation at which procedures may be
invoked (the join points), a declarative language for specifying a set
of such events (the "point cut descriptors"), and a means of
associating procedures with such declaratively-specified sets.  In DJ,
computation is the traversal, and a join point is the passage of the
traversal through a node of the object graph.  Each visitor method has
a signature that plays the role of a point cut descriptor, a body that
plays the role of the associated procedure.

In the context of {\em object-oriented databases}, traversals are
heavily used. Some automation of traversals was suggested in
\cite{ns:vldb88,bv:navigation,MarSho:abbrev-query-int93,querying-oodb:kim92,ioannidis:path-expr}.
Roughly speaking, the idea in these papers is to traverse to a target
without specifying the full path leading to it.  Most of this work
concerns what we call minimal $R$-paths; however the primary concern
is how to complete the abbreviation when it is ambiguous, sometimes
using heuristics.  DJ takes advantage of reflection to complete its
queries.

Mendelzon and Wood \cite{mendelzon89finding} show
the computational intractability of finding actual
paths in an object graph, even without the
presence of inheritance.  We avoid this difficulty
because we are searching only for potential paths
based on local meta-information, and we therefore
accept the possibility of traversing nodes that
are not actually on a path to the target class.

The XPath \cite{XPath} language is used to specify sets of elements in
XML documents.  XPath uses a succinct notation somewhat like ours; for
example, the XPath expression {\sf A//B} refers to the set of all {\sf
  B}-objects reachable from the {\sf A}-object.  The semantics of an
XPath expression is an unordered set of objects.  This closely matches
the algorithms as presented in section~\ref{sec:algorithms}, although
our implementation in DJ also captures the paths by allowing visitors
to be called on all objects in those paths.  

%More important, our algorithms work in the
%presence of inheritance, which is absent in XML.

%While XPath is a powerful language to address parts of an XML
%document, it does not have a formally defined semantics
%for document navigation and this paper makes a contribution
%to the semantics of succinct XPath specifications.

\section{Conclusions and Future Work}
\label{sec:conclusions}

We have presented a semantics for a declarative specification of
object-graph traversals.  Our organization separates the concerns of

\begin{itemize}
\item The search or traversal to be performed, 
\item The visits to objects of different classes to be executed along
  the way, and
\item The details of the organization of the object graph, which is
  reconstructed by reflection.
\end{itemize}

Our derivation of an effective computation for \FIRST illustrates that
a relational formalism is the right approach to express the different
path concepts that are needed; we believe that this approach will be
helpful in other contexts as well.


This paper was motivated by the need to have a simpler semantics
for traversal strategies than the one presented in \cite{strategies-tr:LP97}.
Now that we have a simpler technical core, we can hope to explore
questions like: When the class graph changes, does the adaptive
program have to change?  For what kind of contexts is an adaptive (or
aspect-oriented) program correct?  In an aspect-oriented program, what
kinds of changes in the base program require changes in the aspect
code, or conversely, when do changes in aspect code require changes in
the base code?  We hope to explore questions of this kind first in the
context of DJ and eventually in the more general setting of AOP.

\bibliographystyle{plain}
\bibliography{karl,paper}
% \bibliography{}                 % use the manually-generated .bbl file




\end{document}

