\documentstyle[11pt,psfig]{article}
%\documentclass[11pt]{article}
%\usepackage{psfig}



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% PREAMBLE
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%  Page length parameters %%%%%%%%%%%%%
% Dense format
\def\denseformat{
\setlength{\textheight}{9in}
\setlength{\textwidth}{6.9in}
\setlength{\evensidemargin}{-0.2in}
\setlength{\oddsidemargin}{-0.2in}
\setlength{\headsep}{10pt}
\setlength{\topmargin}{-0.3in}
\setlength{\columnsep}{0.375in}
\setlength{\itemsep}{0pt}
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% mid format
\def\midformat{
\setlength{\textheight}{8.9in}
\setlength{\textwidth}{6.7in}
\setlength{\evensidemargin}{-0.2in}
\setlength{\oddsidemargin}{-0.2in}
\setlength{\headheight}{0in}
\setlength{\headsep}{10pt}
\setlength{\topsep}{0in}
\setlength{\topmargin}{0.0in}
\setlength{\itemsep}{0in}       % 10pt is too big with the 1.2 stretch
\renewcommand{\baselinestretch}{1.1}
\parskip=0.070in
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Spacy format
\def\spacyformat{
\setlength{\textheight}{8.8in}
\setlength{\textwidth}{6.5in}
\setlength{\evensidemargin}{-0.18in}
\setlength{\oddsidemargin}{-0.18in}
\setlength{\headheight}{0in}
\setlength{\headsep}{10pt}
\setlength{\topsep}{0in}
\setlength{\topmargin}{0.0in}
\setlength{\itemsep}{0in}      % 10pt is too big with the 1.2 stretch
\renewcommand{\baselinestretch}{1.2}
\parskip=0.080in
}

%%%%%%%%%%%%  Defining theorem-like environments %%%%%%%%%%
\newtheorem{theorem}{Theorem}[section]
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{definition}{Definition}[section]
\newtheorem{algorithm}{Algorithm}[section]
\newtheorem{claim}[theorem]{Claim}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{conjecture}{Conjecture}
\newtheorem{notation}[definition]{Notation}
\newtheorem{corollary}[theorem]{Corollary}
%\newtheorem{fact}[claim]{Fact}
\newtheorem{observation}{Observation}
%\newtheorem{comment}{Comment}
%\newtheorem{assumption}{Assumption}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\def\weizprinter{\setlength{\topmargin}{0.3in}}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\def\boldhead#1:{\par\vskip 7pt\noindent{\bf #1:}\hskip 10pt}
\def\ithead#1:{\par\vskip 7pt\noindent{\it #1:}\hskip 10pt}
\def\inline#1:{\par\vskip 7pt\noindent{\bf #1:}\hskip 10pt}
%
%The next command is essentially equivalent to \section*,
%except smaller font
\def\heading#1{\par\medskip\noindent{\bf #1}\par\smallskip\noindent}
\long\def\comment #1\commentend{}
\long\def\commfull #1\commend{#1}
\long\def\commabs #1\commenda{}
\long\def\commtim #1\commendt{#1}
\long\def\commb #1\commbend{}
\def\blackslug{\hbox{\hskip 1pt \vrule width 4pt height 8pt
    depth 1.5pt \hskip 1pt}}
\def\QED{\quad\blackslug\lower 8.5pt\null\par}
\def\Qed{\QED}
\def\Proof{\noindent{\bf Proof:~}}
\def\proof{\Proof}
\def\Sketch{\noindent{\bf Proof Sketch}: }
\def\Method{\noindent{\bf Proof Method}: }
\long\def\PPP#1{\noindent{\bf Proof:}{ #1}{\quad\blackslug\lower 8.5pt\null}}
\long\def\PP#1{\noindent {\bf Proof}:{ #1} $\Box$ \\}
\long\def\P#1{{\Proof}{#1}\Qed\\}
\def\Remark{\noindent{\bf Remark:~}}
%\def\Comment{\noindent{\bf Comment:~}}
\long\def\denspar #1\densend
{#1}
%{{\renewcommand{\baselinestretch}{0.8}\small #1\par\medskip}}
\def\DEF{\stackrel{\rm def}{=}}
\newcommand{\Eqr}[1]{Eq.~(\ref{#1})}
\newcommand{\diff}{\ne}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\OO}[1]{O\left( #1\right)}
\newcommand{\OM}[1]{\Omega\left( #1 \right)}
\newcommand{\Prob}[1]{\Pr\left\{ #1 \right\}}
\newcommand{\Set}[1]{\left\{ #1 \right\}}
\newcommand{\Seq}[1]{\left\langle #1 \right\rangle}
\newcommand{\Range}[1]{\left\{1,\ldots, #1 \right\}}
\newcommand{\ceil}[1]{\left\lceil #1 \right\rceil}
\newcommand{\floor}[1]{\left\lfloor #1 \right\rfloor}
\newcommand{\ignore}[1]{}



\newcommand{\inclaim}[2]{\trivlist \item[\hskip \labelsep{\bf Claim #1.}{\em #2}]}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\def\cA{{\cal A}}
\def\cB{{\cal B}}
\def\cC{{\cal C}}
\def\cD{{\cal D}}
\def\cE{{\cal E}}
\def\cF{{\cal F}}
\def\cG{{\cal G}}
\def\cH{{\cal H}}
\def\cI{{\cal I}}
\def\cJ{{\cal J}}
\def\cK{{\cal K}}
\def\cL{{\cal L}}
\def\cM{{\cal M}}
\def\cN{{\cal N}}
\def\cO{{\cal O}}
\def\cP{{\cal P}}
\def\cQ{{\cal Q}}
\def\cR{{\cal R}}
\def\cS{{\cal S}}
\def\cT{{\cal T}}
\def\cU{{\cal U}}
\def\cV{{\cal V}}
\def\cW{{\cal W}}
\def\cX{{\cal X}}
\def\cY{{\cal Y}}
\def\cZ{{\cal Z}}
%%%%%%%%%%%%%%%%%%
\def\hA{{\hat A}}
\def\hB{{\hat B}}
\def\hC{{\hat C}}
\def\hD{{\hat D}}
\def\hE{{\hat E}}
\def\hF{{\hat F}}
\def\hG{{\hat G}}
\def\hH{{\hat H}}
\def\hI{{\hat I}}
\def\hJ{{\hat J}}
\def\hK{{\hat K}}
\def\hL{{\hat L}}
\def\hP{{\hat P}}
\def\hQ{{\hat Q}}
\def\hR{{\hat R}}
\def\hS{{\hat S}}
\def\hT{{\hat T}}
\def\hX{{\hat X}}
\def\hY{{\hat Y}}
\def\hZ{{\hat Z}}
%%%%%%%%%%%%%%%%%
\def\eps{\epsilon}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%  Marginal notes for communicating with coauthors
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\setlength{\marginparwidth}{1in}
\setlength{\marginparpush}{-5ex}
\newif\ifnotesw\noteswtrue% T to show box & marginal notes; F supresses.
\newenvironment{change}%
   {\ifnotesw\marginpar[\hfill\(\top\)]{\(\top\)}\fi}%
   {\ifnotesw\marginpar[\hfill\(\bot\)]{\(\bot\)}\fi}
\newcommand{\deletion}{\ifnotesw\marginpar[\hfill\(\vartheta\)]{\
(\vartheta\)}\fi}
\newcommand{\mnote}[1]%
    {\ifnotesw\marginpar%
        [{\scriptsize\begin{minipage}[t]{\marginparwidth}
        \raggedleft#1%
                        \end{minipage}}]%
        {\scriptsize\begin{minipage}[t]{\marginparwidth}
        \raggedright#1%
                        \end{minipage}}%
    \fi}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\newenvironment{denselist}{
\begin{list}{(\arabic{enumi})}{
\usecounter{enumi}
\setlength{\topsep}{0pt}
\setlength{\partopsep}{0pt}
\setlength{\itemsep}{0pt}
}}{\end{list}}

\newenvironment{denseitemize}{
\begin{list}{$\bullet$}{
\setlength{\topsep}{0pt}
\setlength{\partopsep}{0pt}
\setlength{\itemsep}{0pt}
}}{\end{list}}

\newenvironment{subdenselist}{
\begin{list}{(\arabic{enumi}.\arabic{enumii})}{
\usecounter{enumii}
\setlength{\topsep}{0pt}
\setlength{\partopsep}{0pt}
\setlength{\itemsep}{0pt}
}}{\end{list}}

\newcommand{\handoutformat}{
        \addtolength{\topmargin}{-1in}
        \setlength{\oddsidemargin}{-.1 in}
        \setlength{\textwidth}{6.5in}
        \setlength{\textheight}{9in}
}
\newcommand{\talkformat}{
        \addtolength{\topmargin}{-1in}
        \setlength{\oddsidemargin}{-.1 in}
        \setlength{\textwidth}{6.5in}
        \setlength{\textheight}{9in}
        \parindent=0.0in
}
\def\lectureformat{
\setlength{\textheight}{8.8in}
\setlength{\textwidth}{6.5in}
\setlength{\evensidemargin}{-0.18in}
\setlength{\oddsidemargin}{-0.18in}
\setlength{\headheight}{0in}
\setlength{\headsep}{10pt}
\setlength{\topsep}{0in}
\setlength{\topmargin}{0.0in}
\setlength{\itemsep}{0in}      % 10pt is too big with the 1.2 stretch
\renewcommand{\baselinestretch}{1.2}
%\parskip=0.0in
%\parindent=0.0in
}

\newcommand{\headersmall}[3]{
   \pagestyle{plain}
   \noindent
      \vbox{
    \hbox to 6.28in { \Course \hfill #2} 
\hrule

\vspace{1mm}

Prof. Patt-Shamir

       \vspace{7mm}

       \hbox to 6.28in {\Large\bf \hfill #3 \hfill} 
      }
   \vspace*{8mm}
}
\newcommand{\lectureheader}[3]{
   \pagestyle{plain}
   \noindent
      \vbox{
    \hbox to 6.28in { \Course
                        \hfill #2 } 
\hrule
\vspace*{1mm}
Lecturer: #3
       \vspace{7mm}\\
       \hbox to 6.28in {\LARGE\bf \hfill Lecture #1 \hfill} 
        }
   \vspace*{4mm}
}

\newcommand{\newlectureheader}[3]{
   \pagestyle{plain}
   \noindent
      \vbox{
    \hbox to 6.28in { 6.04s Design and Analysis of Distributed Protocols
                       \hfill #2} 
\hrule
\vspace*{1mm}
Lecturer: #3
       \vspace{7mm}\\
       \hbox to 6.28in {\LARGE\bf \hfill Lecture #1 \hfill} 
        }
   \vspace*{4mm}
}


\newcommand{\talkheader}[3]{
   \pagestyle{plain}
   \vbox{
    \hbox to 6.28in {Lecturer: #3 \hfill #2 } 
    \hrule
    \vspace*{7mm}
    \hbox to 6.28in {\LARGE\bf \hfill #1 \hfill} 
   }
   \vspace*{4mm}
}

\def\hd{\hat{\delta}}
\def\Lr{\Leftrightarrow}

% \def\If{{\bf if }}
% \def\Then{{\bf then }}
% \def\Else{{\bf else }}
% \def\Do{{\bf do }}
% \def\While{{\bf while }}
% \def\Continue{{\bf continue }}
% \def\Repeat{{\bf repeat }}
% \def\Until{{\bf until }}

\def\DRAFT{
\markright{DRAFT (\today)}
\pagestyle{myheadings}
\setlength\headsep{30pt}
\setlength\topmargin{-\headsep}
}

%% slew of macros used in karl's stuff.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\def\C++{\leavevmode\rm{\hbox{C\hskip -0.1ex\raise 0.5ex\hbox{\tiny ++}}}}

\def\({\begin{eqnarray*}}
\def\){\end{eqnarray*}}

\hyphenation{data-base}

%\oddsidemargin 0cm
%\textwidth 17cm
%\addtolength{\topmargin}{-2.5cm}
%\raggedbottom
%\textheight 22cm

\newcommand\sink{{Rome-node}}

\newcommand\sel{\; \; | \; \;}
\newcommand\WF{\mbox{\sf WF}}
\newcommand\Source{\mbox{\sf Source}}
\newcommand\Target{\mbox{\sf Target}}
\newcommand\Graph{\mbox{\sf Graph}}
\newcommand\PG{{PG}}
\newcommand\Prog{\mbox{\sf P}}
\renewcommand\Method{\mbox{\sf M}}
\newcommand\Paths{\mbox{\sf Paths}}
\newcommand\PathSet{\mbox{\sf PathSet}}
\newcommand\NoShortcut{\mbox{\sf NoShortcut}}
\newcommand\NoZigzag{\mbox{\sf NoZigzag}}
\newcommand\MinReq{\mbox{\sf MinReq}}
\newcommand\Linear{\mbox{\sf Linear}}
\newcommand\Abstract{\mbox{\sf Abstract}}
\newcommand\Edges{\mbox{\sf Edges}}
\newcommand\Root{\mbox{\sf Root}}
\newcommand\Comp{\mbox{\sf Comp}}
\newcommand\Leaf{\mbox{\sf Leaf}}
\newcommand\Subclass{\mbox{\sf Subclass}}
\newcommand\Reduce{\mbox{\sf Reduce}}
\newcommand\Head{\mbox{\sf Head}}
\newcommand\Select{\mbox{\sf Select}}
\newcommand\Car{\mbox{\sf Car}}
\newcommand\Cdr{\mbox{\sf Cdr}}
\newcommand\First{\mbox{\sf head}}
\newcommand\Chop{\mbox{\sf tail}}
\newcommand\Simple{\mbox{\sf Simplify}}
\newcommand\Class{\mbox{\sf Class}}
\newcommand\ClassName{\mbox{\sf ClassName}}
\newcommand\Main{\mbox{\sf Main}}
\newcommand\Run{\mbox{\sf Run}}
\newcommand\Traverse{\mbox{\sf Traverse}}
\newcommand\TraverseIntersect{\mbox{\sf Traverse2}}
\newcommand\SimpleTraverse{\mbox{\sf SimpleTraverse}}
\newcommand\Execute{\mbox{\sf Execute}}
\newcommand\Language{\mbox{\sf Language}}
\newcommand\Simplify{\mbox{\sf Simplify}}
\newcommand\SimplifyPath{\mbox{\sf SimplifyPath}}
\newcommand\Auto{\mbox{\sf Auto}}
\newcommand\Det{\mbox{\sf Det}}
\newcommand\Outgoing{\mbox{\sf Outgoing}}
\newcommand\interval[1]{\langle{#1}\rangle}
\def\edge#1#2#3{#1\stackrel{#2}{\imp}#3}
\def\imp{\rightarrow}
\def\eps{\epsilon}

\def\Pre{{\it pre\/}}
\def\Post{{\it post\/}}
\def\GPaths{{\sf PG\_Paths\/}}
\def\CC{compositionally consistent}
\def\CS{compositionally satisfiable}
\def\GS{{\it ground\_set\/}}
\def\PR{{\it primitives\/}}
\def\SOP{{\rm SOP}}

\def\Visit{\mbox{\sf visit}}
\def\Program{\mbox{\sf Prog}}
\def\Final{\mbox{\it final}}
\def\Switch{\mbox{\it Switch}}
\def\Words{\mbox{\sf Words}}


\def\True{{\sc true}}
\def\False{{\sc false}}
%%%%%%%%%% NEW MACROS
%%%%%%%%%%%%%%%%%%%%%

\def\Sep{\wr}
\def\Tostop{{\it tostop\/}}
\def\Rect{{\it rect\/}}
\def\In{{\it in\/}}
\def\Out{{\it out\/}}
\def\Prim{{\it prim\/}}

\def\This{{\tt this}}
\def\star{^{\textstyle *}}

\def\DD{{\cal D}}
\def\LL{{\cal L}}


\def \oo{object-oriented}

%construction vertex
\newcommand{\cv}{\ifmmode \Box \else  $\Box$ \fi}

%alternation vertex
\newcommand{\av}{\psfigurepath{/home/cunxiao/figures/} 
\psfig{figure=hexagon.ps}}

%construction edge
\newcommand{\ce}[1]{\ifmmode \stackrel{{}_{#1}}{\longrightarrow}
                          \else $\stackrel{{}_{#1}}{\longrightarrow}$ 
           \fi}

%alternation edge
\def \ae{\ifmmode \Longrightarrow \else $\Longrightarrow$ \fi}

\newenvironment{remark}{\begin{trivlist}\item[]{\bf Remark\ \ }}{\end{trivlist}}
%\newenvironment{proof}{\begin{trivlist}\item[]\hspace{\parindent}%
%                      {\em Proof.}}{\qed\end{trivlist}}
%\def\qed{\mbox{}~\hfill~$\Box$}

\newcommand{\irule}[2]%
    {\mkern-2mu\displaystyle\frac{#1}{\vphantom{,}#2}\mkern-2mu}
\newcommand{\air}{\hspace{0.8cm}}


\def\Bypass{\hbox{\sl\ bypassing\ }}
\def\Thru{\hbox{\sl\ thru\ }}





\def\Car{\hbox{\tt Car}}
\def\Dept{\hbox{\tt Dept}}
\def\Thing{\hbox{\tt Thing}}
\def\Comp{\hbox{\tt Computer}}
\def\owns{\hbox{\tt owns}}
\def\leases{\hbox{\tt leases}}
\def\refers{\hbox{\tt refers}}
\def\contains{\hbox{\tt contains}}
\def\with{\hbox{\tt within}}


\def\From{\hbox{\sl from }}
\def\To{\hbox{\sl to }}
\def\Stop{\hbox{\sl stop }}

\def\TG{{\sl TG}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% PAPER STARTS HERE
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\midformat
%\DRAFT

\title{
Traversals of Object Structures: Specification and  Efficient
Implementation%
\thanks{
Research supported by  Defense
Advanced Projects Agency (DARPA) and Rome Laboratory under agreements
F30602-96-0239 and F33615-00-C-1694.
}
}

\author{
Karl Lieberherr\\
{\tt lieber@ccs.neu.edu}\\
{College of Computer Science}\\
{Northeastern University}\\
{Boston, MA~~02115}\\
USA
\and
Boaz Patt-Shamir\\
{\tt boaz@eng.tau.ac.il}\\
Dept.\ of Electrical Engineering\\
Tel-Aviv University\\
Tel-Aviv 69978\\
ISRAEL
}




\begin{document}

\def\thepage{}

\maketitle

\begin{abstract}
Separation of concerns and loose coupling of 
concerns are important issues in software enginnering.
In this paper we show how to separate traversal concerns from other concerns,
how to loosely couple traversal concerns to the structural concern
and
how to efficiently implement traversal concerns.

Traversal of object structures is one of the ubiquitous routines in
most types of information processing. Ad-hoc implementations
of traversals lead to scattered and tangled code and in this paper
we present a new approach, called {\em traversal strategies,} to neatly
modularize traversals.
In our approach traversals are defined using a high-level
directed graph description, which is compiled into a dynamic road map to
assist run-time traversals.
The complexity of the compilation algorithm is polynomial in the
size of the strategy graph and the class graph of the given
application.
The implementation is practical and 
allows for dynamically creating and modifying the 
traversal strategy.
A prototype of the system has been developed and is being successfully
used in two application areas: implementing traversals and generating
adapters for software components.
Previous approaches to traversal specifications were less general
(corresponding to either a series-parallel or a tree graph), 
and their compilation algorithms were of exponential complexity
in some cases.
In an additional result we show that this bad behavior is inherent to
the static traversal code generated by previous implementations,
where traversals are carried out by invoking methods without parameters.
\end{abstract}

\newpage
\pagenumbering{arabic}

\section{Introduction}
\label{sec-intro}


\subsection{The Idea of Adaptive Traversals}

% [[[argue that traversals are practically
% useful because they give a further abstraction
% of the object structure. (help with existing software, portability).]]]

The run-time state of application programs, particularly of
object-oriented programs, can be represented as
 a directed graph, where objects are represented as nodes and field
names are represented as edges. To a large extent, program execution
can be viewed as traversing that graph.
%: in most cases,
%a basic traversal step consists of going over a single edge to use a field
%directly (as in ``$a.b$'').
%\footnote{
%We believe that it is good practice that
%only private methods access data directly \cite{karl-ian:soft1}. 
%Therefore, when
%we say ``use a field {\it a\/}'' we mean ``invoke the {\it get\_a}
%or {\it put\_a}  method.''
%}
% But in many other cases, a basic traversal step is deeper than
%that (as in ``$a.a1.a11.b$''): this is a typical outcome of {\em
%program evolution}, wherein a simple
%single-edge traversal  evolves
%into a more complex traversal, 
%as the class structure becomes more  refined.
Examples of traversals are that
sub-objects with certain properties
are sought; or it may be desired to compute
 a function of certain sub-objects of a given object.
In standard programming techniques,
expressing traversals involves a strong commitment to the whole class
structure traversed (since each hop in the traversal is explicitly coded as in
``$a.b$''), even if the task to be performed by the traversal  depends only on
the start and the target objects.

We call a concern that deals with traversing objects for implementing
some behavior of those objects a traversal concern.
A typical program operating on large objects contains many traversal concerns.
Those traversal concerns already exist at the design level and become more
refined as we move from the design object structure to the implementation
object structure.
The ad-hoc way for an experienced programmer to implement a traversal concern is
to write methods for each of the classes whose objects are traversed.
Unfortunately, this leads to a scattered and tangled implementation because the
code that implements the concern is spread across multiple classes 
and tangled with code from other concerns.

In this paper we propose a new paradigm, called {\em strategies,}
which helps us to not only cleanly modularize 
traversal concerns but also to minimally bind them to the structural concern, i.e.,
strategies allow the programmer to specify
 traversals in  a localized manner with minimal binding to the class structure.
Informally, the idea is to specify the high-level topology of the traversal,
in which only the key ``milestones'' are explicitly mentioned;
given a concrete class structure,
an executable traversal code is compiled, with all details filled in.
For example, using the current implementation of our system, 
using the DJ Library (see section \ref{ssec-TAO} and \cite{OrleansLieberherrReflection01,adaptive-methods-cacm-2001,DJ}),
a Java programmer may write 

\begin{verbatim}
ClassGraph cg = new ClassGraph();
ClassA o = new ClassA(...);
String whereToGo = "from ClassA to ClassB";
SomeClass whatToDo = new SomeClass(...);
cg.traverse(o,whereToGo,whatToDo)
\end{verbatim}
 
which means that when the {\sf traverse} method of class graph  {\sf cg} is invoked with
an object $o$ of class {\sf ClassA} as first argument, 
the constituents of $o$ will be recursively  
scanned according to the instructions in
{\sf whereToGo} to find all sub-objects whose class is {\sf ClassB}, executing 
what needs to be done in addition to the scanning as specified by {\sf whatToDo}.
For example, {\sf whatToDo} 
may produce an array which contains all {\sf ClassB} sub-objects
of $o$.
Note that an ad-hoc implementation of the above code may cut across
many classes and would be tangled with other code.

A traversal concern (expressing: where to go) deals with how to traverse through
objects. A traversal is a function that maps each object graph
rooted at object o to a subgraph rooted at o.  This subgraph may
then be traversed using one of several techniques, e.g.,
depth-first traversal that we use with DJ. We use a
language
tailored to the traversal concern and sentences in this language
we
express as strings in Java programs.
The simplest expression in our language is of the form 
{\sf "from ClassA to ClassB"}. 
This means: given an object of class {\sf ClassA} (an {\sf ClassA}-object),
select
a subgraph of the {\sf ClassA}-object that contains all {\sf ClassB}-objects. The
subgraph
of the {\sf ClassA}-object is not a
minimum subgraph, but it is minimum relative to the information
in the
class graph without look-ahead in the object graph.

The traversal relies on the following primitive: Given
an object graph of some class graph, and a root object o of class {\sf ClassA}
from
which the traversal {\sf from ClassA to ClassB} starts, we need to decide which objects to
visit from o, i.e., we need to compute first(o), the set of
edges that we need
to traverse from o. Our goal is to make the traversal efficient
in the sense that we don't want to look ahead in the object
graph whether going to an object in first(o) will eventually
lead us to a {\sf ClassB}-object. We only look ahead in the class graph
because we have complete information
about the shape of objects. So first(o) will contain all those
edges after which, according to the class graph information,
there is still a possibility
of reaching a {\sf ClassB}-object. We accept that for this
particular object some of the traversals might result in a dead end.
A source of confusion is that a traversal may traverse any number of paths from 
a {\sf ClassA}-object to {\sf ClassB}-objects especially if in the class
graph there are several paths from {\sf ClassA} to {\sf ClassB}.
But in all those cases, the semantics is precisely defined 
and this has already been the case in our earlier paper \cite{PXL-95} 
on a subset of the traversal strategy language discussed here.
The semantics is also clearly defined in the case of multiple
paths in the traversal strategy.

The previous paragraph gives the simple, yet  complete meaning of the traversal 
{\sf "from ClassA to ClassB"}. Adaptive programmers need to know
only this meaning of traversals which is further detailed
in \cite{adaptive-methods-cacm-2001} but this paper is also for 
implementors and we are studying efficient implementations of this meaning
in the case that the traversal strategy is a general directed graph.


Back to our example:
{\sf whatToDo} is called a visitor object.
A visitor object is an ordinary object that has methods that are executed
as objects of classes specified in the visitor class are visited by the traversal. 
Visitor objects are named after the Visitor design pattern 
\cite{gang-of-4}
but are much simpler
than visitor objects described by the Visitor design pattern because
none of the scaffolding is needed.

%\footnote{
%The generated traversal  will
%visit the same objects that manually-written code would visit.
%This means that all sub-objects which
%{\em may} contain an object of class {\sf ClassB} will be scanned:
%In some cases, there
%is no way other than traversing to find out 
% whether the desired target exists in a 
%particular location, since
%the actual object structure may be determined only at run-time.
%}
One obvious advantage of strategies is that the traversal 
is created automatically; another advantage is that the traversal
specification is loosely coupled to the class structure: it works for
 the case where all {\sf ClassB} objects are directly contained in 
the {\sf ClassA} object, as well as for the case where the {\sf ClassB} objects
are deeply buried in sub-objects of {\sf ClassA} objects.

More specifically, the basic concept of strategies can be
defined as follows. 
Given a class graph $G=(V,E)$, a strategy is a subgraph $S$ of the
transitive closure of $G$, that is, there may be an edge $(v,u)$ in
$S$ only if $u$ is reachable from $v$ in $G$. 
Thus, each edge $(u,v)$ in the strategy graph defines a set of paths
in the class 
graph: the paths from node $u$ to node $v$. The next step is observing
that by the natural notion of concatenation, we get that each
{\em path} in the strategy graph defines a set of paths in the class graph.
Finally, using specially marked nodes
in the strategy graph, called {\em source} and {\em
  target}, one can define a set of paths in the strategy graph:
the paths from the source node to the target node. Since each path in
the strategy graph defines a set of paths in the original class graph,
we have that a strategy defines a set of paths in the class
graph.
The paths in the class graph are expansions of the paths in the strategy graph.
A path $p$ is an expansion of a path $p1$ if $p1$
can be obtained by deleting some elements from $p$.

We can apply the concept of expansion also to define the semantics
of a traversal strategy directly at the object graph level.
When a traversal reaches a target node in the object graph,
the path traversed from the source, with suitable substitution 
of subclasses by superclasses, must be an expansion of an $s-t$ path
in the strategy graph. $s$ is the source and $t$ the target of the strategy.
When a traversal reaches a final node in the object graph without
being at a target node, the path traversed, with suitable substitution
of subclasses by superclasses, must be 
a prefix of an expansion of an s-t path in 
the strategy graph.
The prefix is the longest prefix such that there is still a
possibility of reaching a target node as determined by the class graph.
Thus, a strategy effectively selects a set of paths in an object
graph, by specifying relatively little detail.
The union of all those object graph 
paths selects a subgraph of the object graph.

This basic idea is extended in many respects, as described in Section
\ref{sec-strategy}. 
To give a more concrete flavor for the usefulness of strategies, let
us demonstrate with the following simple example.
\ignore{
*NEW* 

To give a more precise intuition behind strategies, we introduce 
a simplified form of strategies. Understanding those simplified strategies
will help to understand the more general strategies 
explained later in the paper.
For the purpose of this simplified explanation only, a strategy is a directed
graph S. We call a graph G to be an instance of S, if S is a 
connected subgraph of the transitive closure of G. 
The transitive closure of G=(V.E) is the graph G*=(V,E*), where 
E*={(v,w): there is a path from vertex v to vertex w in G}.
It is useful to think of S as an "abstract" graph
that defines a path set in the "concrete" graph G that is an instance of S.
The concrete graph typically represents a UML class diagram \cite{xy} or 
an XML schema \cite{xy}. The purpose of the path set defined by S is to control
the navigation through objects of the class diagram or through documents of the
schema. The paper explains in detail, how two graphs S and G are compiled
into a traversal graph that encodes a potentially infinite path set in G
succinctly. The paper also explains how the traversal graph is used to
efficiently control the traversal through objects and documents. 
The challenge is that the traversal graph is inherently a non-deterministic
finite state machine and conversion to a deterministic state machine
would be too expensive.
Consider the graph in Fig. 1 as a graph G. As a graph S we choose the graph
with nodes BusRoute, BusStop and Person that are connected by an
edge from BusRoute to BusStop and an edge from BusStop to Person.
S is an example of a strategy graph for G because it is a subgraph
of the transitive closure of G.

*END NEW* 

}


\begin{figure}
\centerline{\psfig{file=busstop1.ps,width=\textwidth}}
\caption{\em Bus simulation class graph. 
Squares and hexagons denote  classes (concrete and abstract, 
respectively), regular arrows denote fields and are 
labeled by the field name, and heavy arrows (labeled with $\diamond$) 
denote the subclass relation (for the shading, see text). 
}
\label{fig-bus1}
\end{figure}

% In this example, we use a notation
% similar to the UML notation \cite{grady-jim:95}
% for class graphs. The changes are:
% Abstract classes are inside hexagons, instead of squares and
% inheritance edges are drawn in the opposite direction and
% are called subclass edges. They are also labeled with a diamond.

\subsection{Example}

Consider a program simulating  bus route management. 
For expressing class graphs,
we use the graphical
notation of our current implementation (called {\em DemeterJ}, 
\cite{cse:preventive}). Alternative notations would be 
the UML class diagram
notation \cite{grady-jim:95} or an XML schema notation \cite{URL:XMLschema}.
For expressing behavior, we use standard Java and the DJ library \cite{DJ}.
Consider the class graph depicted in Fig.~\ref{fig-bus1},
which defines a data structure describing a bus route. 
A bus route object consists of two lists: a list of
bus objects, each containing a list of passengers; and a list of bus stop 
objects, each containing a list of people waiting.
Suppose that as a part of a simulation, we would like to determine
the set of person objects corresponding to people 
waiting at any bus stop on a given bus route.
The group of collaborating classes which is needed
for this task is shaded in Fig.~\ref{fig-bus1}. 
To carry out the simulation, an object-oriented program
should contain a method for each of these shaded classes.
These methods that are scattered across several classes would traverse 
bus route objects.
However, using the technique of strategies, one can solve the  
problem in a much more elegant way, by modularizing the code
and keeping it in one place, rather than scattered through several
classes and tangled with other code. 
We define a strategy graph 
with nodes {\sf BusRoute}, {\sf BusStop} and {\sf Person} that are
connected by an edge from {\sf BusRoute} to {\sf BusStop}, and an edge
from {\sf BusStop} to {\sf Person}. (Note that this is a  strategy
graph because it is a subgraph 
of the transitive closure of the class graph.)
In our formalism, the strategy can be expressed as follows.

\begin{denselist}
\item
\label{sol-1}
{\tt
from BusRoute through BusStop to Person
}
% \item
% \label{sol-2}
% {\tt
% from BusRoute bypassing Bus to Person;
% }
\end{denselist}

\begin{figure}
\centerline{\psfig{file=busstop2.ps,width=\textwidth}}
\caption{\em
Evolved bus simulation class graph.
}
\label{fig-bus2}
\end{figure}
\noindent
% Both strategies choose the desired set of classes.

The benefit of strategies is apparent when considering the following
scenario:
Suppose that the bus route class has been modified so that the 
bus stops are grouped by  villages.
The revised class graph is depicted in Fig.~\ref{fig-bus2}. To 
implement the same requirement of finding all people waiting for a bus,
an object-oriented program
must now contain one method for each of the classes shaded in
Fig.~\ref{fig-bus2}, and thus the previous object-oriented implementation  
becomes invalid. The traversal strategy (\ref{sol-1}), however, is
up-to-date and does not require any rewriting. 
In fairness, the revision to the class graph must preserve the
class names referred to in the traversal strategy and the 
meaning of the traversal strategy must be correct for the new class graph.
When a class graph is changed, it is important to check the
correctness of all traversal strategies that depend on that class graph.
Sometimes it is necessary to refine the strategies to make them
correct in the new class graph. But this is easier than updating
all traversal methods manually \cite{karl:demeter}.

The actual work on the objects is done by the so-called {\em visitor
  methods\/}: these are methods that can be associated with stategy
points, specifying what to do when the traversal arrives at the
object. The invocation order of the methods can be controlled for each
object. Some standard visitor methods (e.g., printing, copying,
testing for equality etc.) are provided as
part of our implementation.

Strategies effectively filter out the noise in the class graph
which is irrelevant to the implementation of the current task.
For the class graph in Fig.~\ref{fig-bus2},
the simple strategy (\ref{sol-1}), which mentions only three classes,
replaces methods for ten classes: {\sf BusRoute, VillageList,
NonEmptyVillageList,
Village, BusStopList, NonEmptyBusStopList, BusStop, 
PersonList, NonEmptyPersonList, Person}.

To show how to program with strategies, we complete
the Java program (using the DJ library)
of finding all people waiting at any bus stop
on a particular bus route. 

%\begin{verbatim}
%BusRoute { 
%  //...
%  traversal WP(PrintVisitor) {
%    through BusStop to Person        //  "From BusRoute" is implicit
%  };
%
%  void printWaitingPersons () =      // definition of an adaptive method
%     WP(PrintVisitor); 
%}
%\end{verbatim} 

\begin{verbatim}
class BusRoute {
// ...
  ClassGraph cg = new ClassGraph();
  String whereToGo2 = "from BusRoute via BusStop to Person";
  void printWaitingPersons() { cg.traverse(
    this, whereToGo2, new PrintVisitor()); }
}
\end{verbatim} 

The program above defines a method called {\sf printWaitingPersons} for
the class {\sf BusRoute}. This method will execute the traversal specified 
by traversal strategy (\ref{sol-1}),
and it will print the object of class {\sf BusRoute} using
the visitor class {\sf PrintVisitor}. % (the 
% Demeter/Java system generates
% automatically a few generic visitors for printing, copying, testing
% for equality, etc.). 
Note that the definition of {\sf printWaitingPersons}
works without any change for both class graphs, which is the reason
for calling it an {\em adaptive method} 
\cite{adaptive-methods-cacm-2001}.
%The constructor {\sf ClassGraph()} uses Java reflection to construct 
%the class graph object underlying a given Java program.

Notice that the adaptive method is expressed in plain Java using the 
DJ library of which we use the classes {\sf ClassGraph} and {\sf Visitor}, the super
class of all visitor classes, such as {\sf PrintVisitor}.
A {\sf ClassGraph}-object is a graph whose nodes are classes and whose
edges are is-a and has-a relationships
between classes. Class {\sf ClassGraph} provides methods to create and
maintain a class graph.
%and it defines four methods whose ad-hoc
%implementation would cut across many classes.
The simplest way
to create a {\sf ClassGraph}-object is to call the constructor
{\sf ClassGraph()} without arguments which will create the class graph
using Java reflection by taking 
all classes in the default package \cite{OrleansLieberherrReflection01}.
A traversal specification
may be applied to both a {\sf ClassGraph}-object and a Java
object.
From the point of view of a {\sf ClassGraph}-object, a
traversal specification
is a subgraph of the transitive closure of the {\sf ClassGraph}-object
if it is applied to a class graph
it selects a subset of the paths in the class graph.
If applied to
a Java object, a traversal specification
defines a
subgraph of the object graph representing the Java object.



In this implementation of adaptive programming with DJ the class graph 
and the traversals are computed dynamically. 
In previous implementations of adaptive programming for 
C++ \cite{karl:demeter} and Java \cite{cse:preventive,DemeterJava:98},
the traversals are computed statically.

To show the details of visitors, we write a Java method that counts
(instead of prints) all people waiting at any bus stop
on a particular bus route. 
Because the traversals for {\sf printWaitingPersons} and {\sf countWaitingPersons}
are identical, we reuse the same traversal strategy {\sf whereToGo2}.
We also reuse the class graph {\sf cg}.

\begin{verbatim}
// plain Java code using DJ
// inside class BusRoute
  int countWaitingPersons() {
    Integer result = (Integer) cg.traverse(this, whereToGo2, new CountVisitor());
    int r = result.intValue();
    return r;
  }
class CountVisitor extends Visitor {
  int c;
  public void start() { c=0; }
  public void before(Person o) { c++; }
  public Object getReturnValue() { return new Integer(c); } 
}
\end{verbatim}

Class {\sf Visitor} has a simple interface: with the {\sf start} method
we say what needs to be done before the traversal starts.
With the {\sf getReturnValue} method we express what needs to be returned when
the traversal completes. With a {\sf before} method we express what
needs to be done before we visit an object of a specific class.
There are also {\sf after} and {\sf around} methods;
the complete API is documented in \cite{DJ}.

%Please note that this example only shows one way of using traversal strategies
%in programs. An alternative way would be to generate traversal methods
%for each class being traversed. This alternative would be more efficient
%and is implemented in DemeterJ 
%\cite{cse:preventive}.

\subsection{Context}

\paragraph{Related work.}
It is surprising to see that despite the universality of traversals in
programming, only very little work has been done in this direction.
Until recently, the automation of traversal of object structures
using succinct representations is unique to Demeter 
(\cite{lieber-nacho-cun:pp-cacm}, see
below); the rising popularity of markup languages in general, and XML
in particular, created a new interest in traversals. In this section
we list some work relevant to traversals.

%*NEW*

XML is a new standard for defining and processing markup languages
for the web \cite{XML}. %(http://www.w3.org/TR/REC-xml).
XML uses grammars (also called {\em document type definitions} or {\em
  schemas\/})
to define a markup language for a class of documents.
To select subsets of XML document elements, the W3 Consortium
  recently  introduced a language called 
XPath \cite{XPath}. % (http://www.w3.org/TR/xpath) in November 1999.
The way elements are selected in XPath is by navigation, somewhat resembling
the way one selects files from an interactive shell, but with a much
richer language.
Recently \cite{XPath-traversals}, XPath has been
proposed as input to a universal object model walker
for arbitrary Java objects.
XPath expressions are used to describe {\em sets of objects}, in
the sense that the value of an expression is an {\em unordered}
collection of objects {\em   without duplicates}. 
This is in contrast to traversals, whose value
is a set of {\em paths}, so that the objects of each path are
explictly ordered and may appear more than once, even on the same
path. It is quite easy to implement XPath using strategies, using
specialized ``visitors.'' The converse, however, does not hold, due to
the lack of structure in XPath expression values.
While XPath is a powerful language to address parts of an XML
document, 
%and even though its expressive power is sufficient to
%describe any graph, 
there are cases in which strategies can be used to
select the same 
sets with {\em exponentially} shorter representation than the
representation of XPath.

A bad example for XPath is (currently) as follows (XPath is 
in the process of being extended moving closer to the
traversal strategy model to make this also easily expressible). 
Take a strategy graph with start node
$S$ and target node $T$ and nodes $A_i$ and $B_i$ for $i$ from 1 to $n$ and
nodes $C_j$ for $j$ from 1 to $n-1$.
There are edges from $S$ to $A_1$ and $B_1$; from $C_i$ to $A_{i+1}$
and $B_{i+1}$ and from $A_i$ and $B_i$ to $C_i$
for  $i$ from 1 to $n-1$; from $A_n$ and $B_n$ to T.
Note that there exponentially many paths from $S$ to $T$ and if we want to
express the $T$ nodes that we want to select in XPath, we have to enumerate 
all those paths using the XPath notation. The size of the strategy graph
solution is linear, while the size of the XPath solution is exponential.

This example may lead to exponential running times for some input objects
both for the XPath and the traversal strategy case.
It is the responsibility of the programmer to recognize this
possibility and deal with it using appropriate visitor objects.


% Bad example: standard Bellman-Ford

% it contains as a special case a specialized 
% form of the traversal strategies described in this paper.
% Therefore, the algorithms presented in this paper provide for a formal
% foundation for compiling an important aspect of XPath expressions.
% Implementors of XPath need to find solutions for the algorithmic
% problem solved in this paper.

% It is important to remark that XPath and traversal strategies are not directly
% comparable. XPath is a much larger language than traversal strategies and therefore more general,
% yet traversal strategies can express navigation that XPath cannot.
% The reason is that XPath only deals with
% series-parallel traversal strategies introduced in  \cite{PXL-95}.

% A brief example of an XPath expression as follows. We assume that the
% current note is a Library object.
% etc.


% *END NEW*


In the context of {\em object-oriented databases},
 traversals are heavily used. Some automation of traversal was
suggested in
\cite{ns:vldb88,bv:navigation,MarSho:abbrev-query-int93,querying-oodb:kim92,ioannidis:path-expr}. 
Roughly speaking, the idea in these papers is to
traverse to a target without specifying the full path leading to it.
Cast in our terms, one can view these techniques as a variant of
line-graph strategies (i.e., strategy graphs with a single path) ;
 however, their goal is to allow the 
user to abbreviate the laborious specification of a full query, and
their main concern is how to complete the abbreviation
 when it is ambiguous, sometimes using heuristics.
Another complication these approaches confront is that queries are
specified on-line and can therefore refer to run-time structures.
By contrast, our approach ignores the ambiguity problem by traversing
all qualified objects, and requires traversal specifications to refer only
to compile-time structures. On the other hand,  strategies allow
for general graph specification, and entail (when combined with visitors)
the power of a full-fledged
programming language.

In the context of programming languages,
traversals are frequently used as a part of {\em attribute grammars},
for traversing abstract syntax trees \cite{WaiteGoos84}.
Using conventional programming techniques, the details of traversals must be
hard-coded in the attribute grammar;
this fact makes attribute grammars hard to maintain, say in the case
of some modifications in the grammar \cite{kastens:waite}.
In the Eli system \cite{ELI}, this problem is addressed by
separating the details of the grammar from the underlying algorithm,
using traversal specifications which basically correspond  to
single edge strategy graphs.
Meta-programming techniques have also been developed for
traversals. For example,
in \cite{cameron-ito:gramps}, a simple kind of traversal
(corresponding to a one layer tree graph) is used in a
meta-program; this traversal scans all objects and executes the
specified code at the desired targets.

The {\em Visitor} design pattern is discussed in many
   software-engineering works (e.g., \cite{gang-of-4}). While this
   approach identifies and isolates the task of traversal, no mechanism
   to automate the task and make it adaptive was previously
proposed. Moreover, no 
   formal treatment of traversal was offered.
As a side remark from the software engineering perspective, we note that
our approach of separating the traversal task from the class-structure
of an object oriented program can be viewed as a special case of 
{\em aspect oriented programming} \cite{sdcr:aop}, 
where the idea is to try to align
different conceptual aspects of programming with actual code modules.
%
% *NEW*
Another tool for aspect-oriented programming is 
AspectJ from Xerox PARC \cite{AspectJ}. Generally speaking, AspectJ
allows the programmer to manipulate {\em pointcuts}, which are a collection
of points in the execution. On one hand, pointcuts are more general than
traversals, since they allow more entities to be included (e.g., an
arbitrary  set
of method invocations). On the other hand, pointcuts are less
structured than traversals, as the members of pointcuts are not
ordered, while traversals impose a specific navigation sequence on its
members.

%  to define execution points
% that cut across the program.
% While traversal strategies are currently not used in AspectJ, they
% are an excellent tool for defining class-graph-connectivity-related pointcuts.
% *END* *NEW*



The concept of automated traversal generation using succinct
representation was introduced
in \cite{lieber-nacho-cun:pp-cacm,PXL-95} and is extensively treated in \cite{Lieber:book}. 
The traversals using the syntax of \cite{PXL-95}
essentially describe series-parallel graphs, which seem to be an
important special case in practice.
However, the algorithm provided
by \cite{PXL-95} cannot be applied for a significant
subset of combinations of class structures and traversal specifications.
A subsequent paper \cite{PPL-96} removed this restriction
for the same specification language using  a finite-automata based approach
 to compilation. Unfortunately,
the algorithm of \cite{PPL-96} has exponential running time in the worst
case, since it may generate exponentially many traversal methods.
In \cite{LPP-97}, the idea of tree-strategy is introduced, and
a polynomial-time compilation algorithm is presented.

\paragraph{New contributions.}
The contributions of this paper are three-fold:
 an extension to the traversal specification language,
 a polynomial-time compilation algorithm for the extended language,
 and a lower bound result which explains
 the shortcomings of the previous algorithms.
More specifically, we allow the underlying specification of a traversal to
have any topology, generalizing the series-parallel and tree
topologies considered previously.
The compilation algorithm generates code whose running time may be
slightly worse than the running time of the code generated by 
previous compilation algorithms (when they apply), since the previous
algorithm generated traversal methods which did not pass arguments at all. 
However, this minor penalty in running time is unavoidable if we
want the size of the traversal code to be reasonably bounded: we prove
that if no arguments are passed by the traversal methods, then there are
cases where the number of distinct traversal methods must be exponential
in the size of the strategy specification.

\paragraph{Empirical experience.}
A prototype of the system, called Demeter/\C++,
and its subsequent incarnations
have been used at Northeastern University since 1992, and in other places, 
including at Citibank, IBM, Bell Northern Research, Credit Suisse
and at several universities. See \cite{URL:demeter} for an extensive
description of the system and relevant references.
The first version of Demeter allowed only very simple traversals (corresponding
to a single-edge strategy graphs), and generated code in \C++.
The current version of  Demeter/\C++ compiles traversals
which can be described by a series-parallel graph,
 but only for a restricted set of
class graphs.
Since 1996 the main development effort has focused on a system which
generates Java code, called DemeterJ \cite{cse:preventive}.
The current version of DemeterJ (formerly called Demeter/Java) and DJ
support strategies as described in this
paper. 
%Aside from all the development of the Demeter systems (which, 
%of course, is being carried out on a Demeter system) the Demeter system
%and its underlying ideas
%have been used in several commercial development
%environments. 
%For example, StructureBuilder from Tendril Software
%\cite{URL:tendril} is a tool which 
%supports automatic generation of traversal methods
%based on pointing and clicking in a class graph.

Hewlett-Packard has reported a positive experience 
in using the traversal/visitor style of programming for writing 
installation software for the HP printers family \cite{URL:hp-praise}.

\paragraph{Serialization/Marshalling}

Object serialization/marshalling is a very common example of object traversal.
A serializer is a tool transforming partial graphs of objects into a stream of bytes.
In \cite{Lopes96a}, simple traversal directives (which are
single edge strategy graphs) are used to specify
which parts of a compound object should be copied and which should be
passed by reference when using remote
method invocations.
Our work on traversals is useful to current work in 
object serialization \cite{phillipsen:CPE,bartoli:SPE,hoschka:marshalling} in the following way:
A common concern in serialization is how to generate serialization code
with minimal impact on the code size of the application,
or alternatively, how to arrange dynamic traversal with minimal
run-time impact.
We address this concern by showing in \ref{sec-lb} that 
if the traversal methods are not generated carefully from a partial object specification
(as described
in this paper), one might end up with exponential code size.
Our implementation of DJ shows how to arrange dynamic traversal
and the DemeterJ implementation shows how to generate efficient code efficiently.
We also show a general way to specify partial objects.
This paper solves the problem where we use a marshalling language (our traversal strategy language)
to specify partial object graphs.


\subsection{Paper Organization}
The remainder of this paper is organized as follows.
In Section \ref{sec-prel} we introduce 
the basic concepts, terminology and notation
 we use throughout the paper.
In Section \ref{sec-traversals} we give a definition for
 the concept of traversals, based on \cite{PPL-96}.
In Section \ref{sec-strategy} we define the new concept of strategies.
In section \ref{sec-alg} we specify and analyze the algorithm which
translates strategies into traversal code.
In Section \ref{sec-lb} we prove a lower bound for traversal methods
that do not pass arguments.
In Section \ref{sec-notes} we comment about some practical aspects of
the implementation of the strategies approach.
We give a few concluding thoughts in Section \ref{sec-conc}.

\section{Preliminaries}
\label{sec-prel}
%%%%%%%%%%%%%%%%%%%%%%%%%
In this section we formally define the basic concepts, terminology and
 notation  we use throughout this paper. All notions in this section
 are standard, 
 with the exception of Subsection \ref{ssec-assumptions}.

\subsection{Graphs and paths}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
A directed graph is a pair $(V,E)$ where $V$ is a set of
{\em nodes}, and $E\subseteq V\times V$ is a set of {\em edges}.
A  directed labeled graph is a triple $G=(V,E,L)$ where $V$ is a set of 
{\em nodes}, $L$ is a set of {\em labels},
and $E\subseteq V \times L \times V$ is a set of {\em edges}.
If $e=(u,l,v) \in E$, then $u$ is the {\em source} of $e$, $l$ is the 
{\em label} of $e$, 
and $v$ is the {\em target} of $e$.
We denote an edge $(u,l,v)$ by $\edge u l v$.

Given a directed labeled graph $G=(V,E,L)$,
a {\em node-path} is a sequence $p=\Seq{v_0v_1\ldots v_n}$,
where $v_i\in V$ for $1\le i\le n$, and $\edge{v_{i-1}}{l_i}{v_i}\in E$ 
for some $l_i\in L$ for all $0<i\le n$.
Similarly, a {\em path\/} is a sequence 
$v_0l_1v_1l_2\ldots l_nv_n$ where $v_0\ldots v_n$ is a node-path, 
and $\edge {v_{i-1}}{l_{i}}{v_{i}}\in E$
for all $0< i\le n$.
Unlabeled graphs have only node-paths.
Paths of the form $\Seq{v_0}$ are called {\em trivial.}
The first node of a path (or a node-path) $p$
is called the {\em source} of $p$, and the last node in $p$
is called the {\em target} of $p$,  
denoted $\Source(p)$ and $\Target(p)$, respectively.
The elements other than the source and the target of a path 
(nodes for a node-path, nodes and edges for a path) are 
the {\em interior} of the path.
For a graph $G$ and nodes $u,v$
we define $P_G(u,v)$ to be the set of all paths in $G$ with source
$u$ and target $v$.

If $p_1 = v_0 \ldots l_i v_i$ and $p_2 = v_{i} l_{i+1}\ldots v_n$
are paths with the target of $p_1$ identical to
the source of $p_2$, we define the concatenation 
$p_1 \cdot p_2 = v_0 \ldots v_{i-1}l_i v_{i}l_{i+1}v_{i+1} \ldots v_n$.
Notice that $p_1 \cdot p_2$ contains only one copy of the meeting point
$v_i$. Concatenation of node paths is defined similarly.
Let $P_1$ and $P_2$ be sets of paths such that for some node $v$,
$\Target(p_1)=v$ for all $p_1\in P_1$, 
 and $\Source(p_2)=v$ for all $p_2\in P_2$.
Then we define
$$
P_1 \cdot P_2 = 
\Set{ p_1 \cdot p_2\mid p_1 \in P_1\mbox{ and }p_2 \in P_2}~.
$$

\subsection{Class graphs and object graphs}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
In this paper we will be interested in special kinds of graphs, called class
graphs and object graphs, defined as follows.

Fix a finite set $\cC$ of {\em class names}.
Each class name is either {\em abstract} or {\em concrete}.
% with a predicate $\Abstract$ defined over it. 
Fix a finite set $\cL$ of {\em field names}.
We sometimes call field names {\em labels}.
%with a total order $\preceq$ defined over it.
We assume  the existence of two distinguished symbols:
$\This\in\cL$ and $\diamond\notin\cL$.
Class graphs model the class structure of object-oriented programs.
Formally,
{\em class graphs} are graphs $G=(V,E,L)$ such that
\begin{itemize}
\item
$V\subseteq\cC$, i.e., the nodes are class names.
\item
$L\subseteq\cL\cup\Set{\diamond}$, i.e., edges are labeled by field names
or ``$\diamond$''. Edges labeled by a field name are called %{\em construction}
{\em reference} edges, and edges labeled by $\diamond$
are called {\em subclass} edges.
\item
For each $v\in V$, the field names of all edges outgoing from $v$ are 
distinct (but there may be many edges labeled by $\diamond$ outgoing from $v$).
\item
For each $v\in V$ such that $v$ is concrete, $\edge v\This v\in E$.
\item
The set of subclass edges is acyclic.
\end{itemize}
%
We shall use the (reflexive) notion of a {\em superclass\/}: 
given a class graph $G=(V,E,L)$,
we say that $v\in V$ is a  superclass of $u\in V$ if there is
a (possibly empty)
path of subclass edges from $v$ to $u$. The collection of all super-classes
of a class $v$ is called the {\em ancestry} of $v$.
Multiple inheritance conflicts are disallowed:
we require that the following condition holds true.
\begin{quote}
{\bf Single Inheritance Condition:}
For all nodes $v$, if $v$ has two super-classes $u$ and $w$
with outgoing edges labeled by the same label, then either $u$ 
is in the ancestry of $w$ or $w$ is in the ancestry of $u$.
\end{quote}
The {\em induced references} of a given class $v$ is the set of all
reference edges outgoing from its ancestry, with the usual overriding rule:
for each label $l$ used in edges outgoing from the
ancestry of $v$, only the edge labeled $l$ closest to $v$ is
 in the induced references of $v$. The notion of ``closest'' is well defined
by the Single Inheritance Condition above.
%
%\footnote{
%We note that in accordance with recent trends (notably Java), our
%model does not allow multiple inheritance. This requirement is not
%necessary: the only thing we would need to change is to disallow
%inheriting the same label from unrelated super-classes.
%}
Note that since a class is a superclass of itself, the induced edges
include both the direct references and the inherited references.

Next, we 
define {\em object graphs}, which model
the instantiations of class graphs.
An  object graph is a labeled directed graph $\Omega=(V',E',L')$,
where nodes are called {\em objects}, and $L'\subseteq\cL$.
An object graph $\Omega=(V',E',L')$ is an {\em instance} of a
class graph $G=(V,E,L)$ under a given
 function $\Class$ mapping objects to classes, 
if the following conditions are satisfied.
\begin{itemize}
\item
For all objects $o\in V'$, $\Class(v)$ is concrete.
\item
For each object $o\in V'$, the labels of edges outgoing from $o$
is exactly the set of labels of the induced references of $\Class(v)$.
(In particular, this means that the edges outgoing from $o$
have distinct labels.)
\item
For each edge $\edge o l {o'}\in E'$, $\Class(o)$ has an induced reference edge
$\edge v l u$ such that $v$ is a superclass of $\Class(o)$ and $u$ 
is a superclass of $\Class(o')$.
\end{itemize}
For the greater part of this paper, we shall assume that object graphs
are acyclic. We discuss an extension to cyclic object graphs in Section
\ref{sec-ext}.

\subsection{Non-standard notions}
\label{ssec-assumptions}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

In this paper, we assume that class graphs are
{\em simple}, formally defined as follows.

\begin{definition}
\label{def-simple}
A class graph $G=(V,E,L)$ is {\sf simple} if 
\begin{enumerate}
\item
for all edges $\edge u l v \in E$, we have that $l=\diamond$ if and
only if $u$ is abstract, and
%IE: all edges outgoing from  abstract classes are subclass edges
%    and all edges outgoing from concrete classes are reference edges
\item
for all edges $\edge u\diamond v \in E$, we have that $v$ is concrete.
%IE: all subclass edges are incoming into concrete classes.
\end{enumerate}
\end{definition}
%
The first requirement says that 
 all edges outgoing from  abstract classes are subclass edges
    and all edges outgoing from concrete classes are reference edges.
This property is called {\em flatness}.
Flatness helps us map paths in a class graph $G$ to paths
in an object graph which is an instance of $G$. The second requirement
says that all subclass edges are incoming into concrete classes; this
helps us find all subclasses of a given class quickly.
Note that no generality is lost by the assumption that class graphs are simple,
as the following proposition asserts.
%
\begin{proposition}
\label{prop-simple}
Let $G=(V,E,L)$ be an arbitrary class graph. Then there exists a class graph
$\Simple(G)=(V',E',L)$ such that
an object graph $\Omega$ is an instance of $G$ if and only if $\Omega$
is an instance of $\Simple(G)$. Moreover, $|V'|=O(|V|)$ and $|E'|=O(|E|^2)$.
\end{proposition}
The $\Simple$ transformation is outlined in Appendix \ref{sec-simpleten}.

Define a {\em concrete path} to be an alternating sequence of concrete class
names and labels (excluding $\diamond$).
We shall map paths in class graphs to concrete paths
by omitting abstract classes and subclass edges.
We refer to this mapping as the {\em natural correspondence},
and denote it by $X(p)$, where $p$ is a path in a class graph $G$ and
$X(p)$ is the corresponding concrete path.
Similarly, we denote the concrete path resulting from taking the
sequence of class names and edge labels in an object-graph path $p'$
by $Y(p')$, and (overloading the term) we call this mapping also
a {\em natural correspondence}.
The motivation for these definitions is that if $p$ is a path in a class
graph $G$, then there is some object graph $\Omega$ which is an instance
of $G$, and a path $p'$ is $\Omega$, such that $X(p)=Y(p')$.

 For a class-graph path set $P$, define
$X(P)\DEF\Set{X(p)\mid p\in P}$.

\section{Definition of traversals}
\label{sec-traversals}
%%%%%%%%%%%%%%%%%
%This paper revolves around the notion of traversals of object graphs.
We now arrive at the central topic of this paper: traversals of object graphs.
Informally, a traversal is a (possibly infinite) set of concrete
paths; when used in conjunction with an object graph,
it results in a sequence of objects, called the {\em traversal history}. 
The traversal history is a depth-first
traversal of the object graph along object paths agreeing with the
given concrete path set.
To make the traversal useful, each object has a special 
{\em visit} method attached to it;
when an object is added to the traversal history, this
method is invoked. 
% A few examples for traversals are given in Section
% \ref{ssec-examples} below.
 (A more comprehensive discussion
of the visitor programming pattern
and  visitor methods can be 
found in \cite{gang-of-4,spl:context-conf,seiter:ieee-se-98}.)

But first, we define traversals formally.
The definition here is adapted from
 the ``simplified semantics'' from \cite{PPL-96}.
We use a few technical notions.
For a set of sequences $R\subseteq\Sigma^*$ for an alphabet $\Sigma$, define
\(
\First(R) &=& \{x\in\Sigma \mid \exists\alpha.(x\alpha\in R)\} \\
\Chop(R,x)&=& \{\alpha \mid x\alpha\in R\mbox{ for some }x\in\Sigma\}\ .
\)
Intuitively, $\First(R)$ is the set of all first elements of $R$, and
$\Chop(R,x)$ is the set of all ``tails'' of sequences of $R$ that
start with $x$ (where a tail of a sequence is the whole sequence
except its first element).
%
%For a given a class graph $G$, define an
%{\em object path} for $G$ to be a path in some object graph
%which is an instance of $G$.

In the definition below, we assume that there exists a total order
$\prec$ on the set of field names $\cL$
(this assumption may be weakened somewhat). We first give the formal
definition, then explain it in words.

\begin{definition}[from \cite{PPL-96}]
\label{def-traversal}
Fix a class graph $G$.
If $\Omega$ is an acyclic object graph which is an instance of $G$, 
$o$ an object in $\Omega$,
$R$ a set of concrete paths corresponding to paths of $G$,
and $H$ a sequence of objects,
then the judgment
$$\Omega \vdash_s o:R \rhd H$$
means that when traversing the object graph $\Omega$ starting with $o$,
and guided by the concrete path set $R$, 
then $H$ is the {\sf traversal history}.%
\footnote{
The label $s$ of the turnstile indicates ``semantics.''
}
This judgment holds when it is derivable using
the following rules:
\begin{equation}
\label{eq-traversal-empty}
\irule{}
      {\Omega \vdash_s o: R \rhd \epsilon}
   ~~~~~~
      \parbox{3in}{
       if $\Chop(R,\Class(o))=\emptyset$,}
\end{equation}
where $\epsilon$ denotes the empty history, and
\begin{equation}
\label{eq-traversal-recursive}
\irule{\Omega \vdash_s o_i: \Chop(\Chop(R,\Class(o)),l_i) \rhd H_i
       \air \forall i \in 1..n}
      {\Omega \vdash_s o: R \rhd o \cdot H_1  \cdot ... \cdot H_n}
   ~~~~~~
      \parbox{3in}{
       if $\First(\Chop(R,\Class(o))) = \{l_i \mid i \in 1..n\}$, \\
                   $\edge o {l_i} {o_i} \mbox{ is in } \Omega,
                   i \in 1..n$, and \\
                   $l_j \prec l_k \mbox{ for } 1 \leq j < k \leq n$.}
\end{equation}
\end{definition}
%
In other words, a traversal of an object graph $\Omega$ starting with an
object $o$ guided by a path set $R$, is done as follows.
First, the first elements of the sequences of $R$ are compared to
$\Class(o)$: sequences beginning with another element are immediately
thrown out 
of consideration. If the remaining path set is not empty, then $o$
becomes  the first
element of the history; it is followed by the histories resulting from
starting a traversal from each descendent of $o$, guided by the
remainder of the path set after ``peeling off'' the first two
elements (corresponding to $o$ and the edge outgoing to the
descendent). Intuitively, this procedure is depth-first search on
$\Omega$ with $R$ used to determine how to prune the search.

\paragraph{Remarks.}
% First, note that if the object graph is cyclic, the judgment
% \ref{eq-traversal-recursive}
% is not well defined.
% (MITCH OBJECTS) since the histories $H_i$ might be infinite.
Note that the guarantee made by a traversal guided
by a path set $R$ is the following: A path $p$ in the object graph
is followed so long as there is
a path in $q\in R$ such that $q$ has a prefix which is equal to the current
prefix of $p$ (taking the $\Class(o)$ instead of $o$ in $p$). 
In other words, the decision whether the traversal
  takes a certain
branch in the object graph depends only on the portion of the graph
visited so far and on the current branch, and not on the links
further ahead. This means, for example, that even if all paths in $R$ end 
with the same class $A$, some of the traversal paths may end with a
node $o$ with $\Class(o)\ne A$
just because the path to $o$ is a prefix of a path in $R$.
This relaxation is necessary to enable efficient implementation of traversals
by looking only ahead in the class graph and not in the 
object graph as discussed earlier.


\ignore{
\subsection{Examples}
\label{ssec-examples}
%%%%%%%%%%%%%%%%%%%%%%
[[[ Java awt/different OS's system call example for finding an object; 
tax form example for  collecting items with a certain property.]]]
}

\section{Strategies: Specification of traversals}
\label{sec-strategy}
%%%%%%%%%%%%%%%%%%%%
In this section we define strategies,
which are a graph-based language for expressing traversals. 
In Section \ref{ssec-simple-strategy} we give a basic definition of
 strategies  and explain how strategies express traversals.
Then, in Section \ref{ssec-full-strategy}, we
give the full definition of strategies using the additional concept of
a constraint map. This extended notion is the one we shall be using in
the remainder of the paper. In Section \ref{ssec-strategies-ext},
we discuss a few possible additional refinements of the concept of
strategies.

\subsection{Strategies}
\label{ssec-simple-strategy}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 Traversals are defined in terms
of sets of concrete paths. 
Strategies select
class-graph paths and then derive concrete paths by
applying the natural correspondence.
Intuitively, a strategy selects class-graph paths
by specifying a high-level topology which spans all
paths in the selected set.
Formally,  strategies are defined as follows.


\begin{definition}
A {\sf  strategy} $\cS$ is a triple $\cS=(S,s,t)$, where
$S=(C,D)$ is a directed unlabeled graph called the {\sf strategy graph},
where $C$ is the set of {\sf strategy-graph nodes} and $D$ is the set
of {\sf strategy-graph edges},
and $s,t\in C$ are  the {\sf source} and {\sf target} of 
$\cS$, respectively. 
\end{definition}
%
The connection between strategies and class graphs is done by a name map,
defined as follows.

\begin{definition}
Let $S=(C,D)$  be a  strategy graph and let $G=(V,E,L)$ be a class
graph. A {\sf name map} for $S$ and $G$ is a function
$\cN:C\rightarrow V$. If $p$ is a sequence of strategy-graph nodes,
then $\cN(p)$ is the sequence of class nodes obtained by applying $\cN$
to each element of $p$.
\end{definition}

The basic idea of strategies is that
under a name map, a path in the strategy graph
is an abstraction of a set of paths in the class graph. 
This
is done by viewing each strategy-graph edge $\edge a{}b$
as representing
 the set of paths in the class graph starting with node $\cN(a)$ and
ending at node $\cN(b)$.
This representation naturally extends to paths in the strategy graph:
A path in the strategy graph represents a set of paths in the
class graph obtained by concatenating the sets of class-graph paths
obtained from each strategy-graph edge.


We now make this intuition formal using the 
concept of path expansion, defined as follows.

\begin{definition}
Given a sequence $p$, a sequence $p'$ is called an {\sf expansion} of $p$
if it can be obtained by inserting elements between the elements of $p$.
\end{definition}
%
Note that if $p'$ is a path which is an expansion of another path $p$
(possibly in another graph), then
$\Source(p)=\Source(p')$ and $\Target(p)=\Target(p')$.

We now formally
define the basic way strategies express paths in object graphs.
Recall that $P_G(s,t)$ denotes that set of all paths in $G$ starting at $s$
and ending at $t$.

\begin{definition}
Let $\cS=(S,s,t)$ be a  strategy, let
 $G=(V,E,L)$ be a class graph, and let $\cN$ be a name map for $\cS$
and $G$. Then 
$$
\cS[G,\cN]=\Bigl\{
X(p')\mid p'\in P_G(\cN(s),\cN(t))\mbox{ and }
 \exists p\in P_S(s,t)\mbox{ such that }p'\mbox{ is an expansion of }\cN(p)
\Bigr\}~.
$$
\end{definition}
%
Note that $\cS[G,\cN]$ is a set of concrete paths: 
intuitively, first a set of class-graph paths is selected,
and then the natural correspondence is applied to obtain concrete
paths.
These concrete paths can be used (playing the role of ``$R$'') in Definition
\ref{def-traversal}.


\subsection{Using a constraint map}
\label{ssec-full-strategy}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Strategies
impose positive constraints on paths, in the sense that they specify
which nodes must be traversed in which order. 
It turns out that it is quite useful to also have negative constraints:
what nodes and edges cannot be used between the specified milestones.
We formalize this idea with the concepts of element predicates
and constraint maps.

\begin{definition}
Given a class graph $G=(V,E,L)$, an {\sf element predicate} $EP$ for $G$
is a predicate over $V\cup E$. 
% such that if $EP(\edge u l v)=\True$ then $EP(u)=\True$ and $EP(v)=\True$.
Given  a strategy graph $S$,
a function $\cB$ mapping each edge of $S$ to an element predicate for $G$ is
called a {\sf constraint map} for $S$ and $G$.
\end{definition}
%
(Of course, some predicate specification languages may be very hard to
compute. For computational complexity purposes, 
we assume that there exists a parameter, denoted  $\tau$,  such that
given an element of $G$, determining whether it satisfies
an element predicate can be done in no more than
$\tau$ time units.)

%Element predicates select subgraphs
%(the condition $EP(\edge u l v)\Longrightarrow EP(u)\wedge EP(v)$
%ensures that if a node is not selected, then its incident edges cannot
%be selected).
The constraint map is used to specify, for each edge in the strategy graph,
 which elements of the class graph
may be used in the traversal corresponding to that edge.
Formally, we have the following definition.
% But to formalize this interpretation,
% we encounter some subtle problems  in the way positive
% constraints of the  strategy graphs and the negative constraints
% of the element predicates interact.
% Roughly speaking,
% the problems stem from the fact that there may be many paths
% in the strategy graph which can be expanded to the same
%  path in the class graph.
% We therefore force a specific way to decompose the paths using
%  the following definition.

\begin{definition}
\label{def-sat}
Let $S$ be a strategy graph, 
let $G$ be a class graph,
let $\cN$ be a name map for $S$ and $G$,
and let $\cB$ be a constraint map for $S$ and $G$.
Given a strategy-graph path $p=\Seq{a_0a_1\ldots a_n}$,
we say that a class-graph path $p'$  is
a {\sf satisfying expansion} of $p$ with respect to $\cB$ 
under  $\cN$ if there exist paths $p_1,\ldots,p_n$ such that
 $p'=p_1\cdot p_2\cdots p_n$ and:
\begin{enumerate}
\item
\label{cond-st}
For all $1\le i\le n$, $\Source(p_i)=\cN(a_{i-1})$ and 
$\Target(p_i)=\cN(a_i)$.
%\item
%\label{cond-1}
%For all $1\le i<n$, $\cN(a_i)$ does not occur in in the interior of $p_i$.
\item
\label{cond-sat}
For all $1\le i\le n$, the interior
elements of $p_i$ satisfy the element predicate 
$\cB(\edge{a_{i-1}}{}{a_i})$.
\end{enumerate}
\end{definition}
%Note that as a consequence of conditions \ref{cond-st} and \ref{cond-1},
%the decomposition in the definition above is unique (if it exists).
Note that there may be many ways to decompose a path in accordance
with Condition \ref{cond-st} in the definition above; 
a path $p'$ is a satisfying expansion of a path $p$ if for one of
these decompositions, Condition \ref{cond-sat} holds as well.%
\footnote{
Other definitions are possible, for example to require that a subpath
ends when its target node is reached. We have found the
non-deterministic definition above to be the most useful.
}
Note also that
the element constraints never apply to the ends of the sub-paths.
One consequence of our definition is that if $\cN(a_{i-1})=\cN(a_i)$
then the single node path $\Seq{\cN(a_i)}$ always satisfies
Condition \ref{cond-sat}, regardless of the element predicate
$\cB(\edge{a_{i-1}}{}{a_i})$.

Using the constraint map, we now define a more elaborate way 
in which a strategy expresses paths
in object graphs.

\begin{definition}
\label{def-strategy-paths}
Let $\cS=(S,s,t)$ be a strategy, let
 $G=(V,E,L)$ be a class graph,  let $\cN$ be a name map for $S$ and $G$,
and let $\cB$ be a constraint map for $S$ and $G$.
Then $\cS[G,\cN,\cB]$ is  the set of concrete paths defined by
$$
\begin{array}{ll}
\cS[G,\cN,\cB]=\Bigl\{
%&\mbox{\hspace*{-3mm}}
X(p')\mid&
 p'\in P_G(\cN(s),\cN(t))\mbox{ and }\\
&\exists p\in P_S(s,t)\mbox{ such that $p'$ is a satisfying expansion of $p$
w.r.t. $\cB$}\;
\Bigr\}~.
\end{array}
$$
\end{definition}
%
Note that $\cS[G,\cN]=\cS[G,\cN,\cB_\True]$ for the constraint map $B_\True$
which maps all strategy graph edges to the trivial element predicate
that is always $\True$.

% \paragraph{Examples.} [[[long get: non-recursive traversal; game
% example to show usefulness of cycles]]]

\subsection{Remarks}
\label{ssec-strategies-ext}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\paragraph{Encapsulated strategies.}
The way strategies are presented above, a constraint map can be specified
only when the class graph is given, as the element predicates are
expressed in terms of 
class graph nodes and edges. An important design consideration, however,
is to encapsulate the constraint map with the strategy and
use the name map as the only interface to the class graph; we call
this approach ``encapsulated strategies.'' The advantage of
encapsulated strategies is that they allow one
 to have a clean interface between the  strategy and
the class graph, captured completely by the name map.

We only outline the details
of the concept here, since it is not central to the algorithmic
issues we focus on in the remainder of this paper.
The idea is that instead of letting the element predicates range over
the (yet unspecified) class graph, they range over variables called
{\em symbolic names}.
Binding to actual class-graph elements is done only later, when the
name map is introduced.
Technically, we have an additional level of indirection
 in the encapsulated strategy: instead of explicit 
references to the class graph elements in the constraint map, 
the element predicates
 are predicates over {\em symbolic nodes} and {\em symbolic edges.}
These are denoted using a set  $\cM$ of
strings, which are used as place-holders for class names and labels
(symbolic edges are constructed from a pair
of symbolic node names and a symbolic label).
More formally, an {\em encapsulated strategy} is a tuple $\cE=(\cS,\cM,\cB')$, 
where $\cS$ is a 
strategy, $\cM$ is a set of symbolic names, and $\cB'$ is a function
mapping edges of the strategy graph to predicates over the symbolic elements.
To support encapsulated strategies,
the name map is 
 extended to map also symbolic names to actual class names and 
label names in the class graph.
%The resulting set of paths is denoted
%by $\cE_{\cN}(G)$.

\paragraph{Wildcard notation in predicate specification.}
We left the issue of how to specify the predicates open. One naive
way of doing it is to enumerate all elements to be used,
or alternatively to enumerate all elements to be excluded
(cf.\ ``use-only'' and ``bypassing'' clauses presented in Section 
\ref{sec-notes}).
More expressive power is given by allowing wildcard symbols to be used
in the predicate specification. For example, an element predicate may
be $\False$ for all elements of the form $\edge * l *$, which means
that no edges labeled $l$ can be traversed.
The unique feature of this notation is that it allows the programmer to refer
to elements whose identity is not necessarily known at predicate-specification
time. Even when using encapsulated strategies as above, the programmer
can only refer to symbolic names, which are later mapped to only
a subset of the elements of the actual class graph, while the wildcard
notation is implicitly mapped to all elements in the class graph as 
appropriate.



\section{Compilation algorithm}
\label{sec-alg}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
In this section we show how to implement traversal strategies 
efficiently by compiling them into  executable programs.
Formally, the compilation problem is defined as follows.

\begin{description}
\item[Input:]
A  strategy $\cS=(S,s,t)$, a simple 
class graph $G=(V,E,L)$, a name map $\cN$ for 
$S$ and $G$, and a constraint map $\cB$ for $S$ and $G$.
\item[Output:] 
A set of methods such that for any object graph $\Omega$, 
invoking the traversal method 
at an object $o$ in $\Omega$ yields a traversal history $H$
satisfying the judgment $\Omega\vdash_{s}o:\cS[G,\cN,\cB]\rhd H$.
\end{description}
%
%In the algorithm below,
%we assume that the class graph is simple (otherwise,
%the simplification algorithm of Appendix \ref{sec-simpleten} should be
%applied as pre-processing).
Recall that $\cS[G,\cN,\cB]$ is a path set which can guide traversals
of object graphs directly. Our  compilation consists of  two
algorithms. 

Algorithm 1 is based on the following idea of a reduction:
For traversal strategies of the form ``from A to B'', the paths 
defined in the class graph can be represented by a subgraph of the class graph:
Compute all edges reachable from A and from which B can be 
reached (from-to computation). Edges
in the intersection form the graph which represents the traversal.
Any strategy can be reduced to this case but for a graph that
is much larger than the original class graph. This larger graph,
called the traversal graph, will contain as many copies of the class graph
as the traversal strategy graph has edges.
The size of the traversal graph will be reduced by a from-to computation.

In other words, the from-to computation (which can be implemented, e.g.,
with a forward and a backward depth-first search) is fundamental
to computing the traversal graph. 

\begin{enumerate}

\item We first invoke an algorithm (called Algorithm 1 below) which uses
$\cS$, $G$, $\cN$, and $\cB$ to construct a graph
which expresses the traversal $\cS[G,\cN,\cB]$ in a more convenient way; we
call this graph the {\em traversal graph}, and denote it by
$\TG(\cS,G,\cN,\cB)$. 

\item The traversal methods employ another algorithm (called below
  Algorithm 2), which uses $\TG(\cS,G,\cN,\cB)$---the result of
  Algorithm 1.
\end{enumerate}
The remainder of this section is organized as follows.
In Section \ref{ssec-TG} we describe Algorithm 1.
In Section \ref{ssec-TM} we describe Algorithm 2.
In Section \ref{ssec-analysis} we analyze the computational complexity
of the algorithms.
We conclude this section with numerous extensions and variants for the
basic algorithm, listed in Section \ref{sec-ext}.

\subsection{The traversal graph}
\label{ssec-TG}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
In this section we explain how the {\em traversal graph} is computed,
based on a  strategy $\cS=(S,s,t)$, a simple 
class graph $G=(V,E,L)$, a name map $\cN$ for 
$S$ and $G$, and a constraint map $\cB$ for $S$ and $G$.
The traversal graph, denoted $\TG(\cS,G,\cN,\cB)$,
is created by a series of transformations based on the
class graph, the strategy, the name map, and the constraint map. 
The basic idea is to replace each strategy-graph edge by a copy of
appropriately pruned class graph.

The reader may follow a running example presented in Figure \ref{fig-comp}
(a DemeterJ code for the example is given in Appendix \ref{app-example}).

\begin{figure}
\centerline{\psfig{file=comp3.ps,height=7in}}
\caption{\em
An example of traversal graph computation.
{\sf 1}: the input class graph. 
Edge labels are omitted from subsequent graphs.
{\sf 2}: The input strategy (the name map is indicated).
In this example, the constraint map is as follows.
$\cB(e_1)(x)=\True$ and $\cB(e_2)(x)=\True$ for all $x$;
$\cB(e_3)(\edge A d D)=\False$ and $\cB(e_3)(x)=\True$ 
for all $x\ne\edge A d D$; and $\cB(e_4)(x)=\False$
if $x=A$ or if $x$ is an edge incident to $A$,
and $\cB(e_4)(x)=\True$ otherwise.
{\sf 3}: $G'$ after Steps 
\protect\ref{step-replicate} and \protect\ref{step-predicate}. 
{\sf 4}: $G'$ after Step \protect\ref{sstep-inter}.
Intercopy edges are dashed.
{\sf 5}: $G'$ after Step \ref{step-divert}.
{\sf 6}: $G'$ after Steps %\protect\ref{step-predicate},
\protect\ref{step-source} 
and \protect\ref{step-target}.
{\sf 7 }:
The final traversal graph, as returned in Step
%after steps \protect\ref{step-cleanup} and
\protect\ref{step-return}.
The shaded $A$ nodes are the start set $T_s$,
and the shaded $E$ nodes are the finish set $T_f$.
}
\label{fig-comp}
\end{figure}


\vbox{\vspace*{3mm}\hrule width \linewidth height 0.5pt\vspace*{-1.5mm}}
\noindent
{\bf Algorithm 1:} Traversal graph computation.
Let the strategy graph
be $S=(C,D)$, and let the strategy graph edges
be $D=\Set{e_1,e_2,\ldots,e_k}$.
\begin{enumerate}
\item
\label{step-replicate}
Create a graph $G'=(V',E')$ by taking $k$ copies of $G$, one for each
strategy graph edge.
Denote the $i$th copy as $G^i=(V^i,E^i)$. We will use the correspondence
between each strategy-graph edge $e_i$ and $G^i$.
The nodes in $V^i$ and edges in $E^i$ 
will be denoted with a superscript $i$, as in
 $v^i,e^i$ etc. 
Each class-graph node $v$ corresponds
 to $k$ nodes in $V'$, denoted $v^1,\ldots,v^k$.
We extend the $\Class$ mapping to apply to the nodes of $G'$ 
by setting $\Class(v^i)\DEF v$, where $v^i\in V'$ and $v\in V$.
\item
\label{step-predicate}
For each strategy-graph edge $e_{i}=\edge a{}b$:
Let $\cN(a)=u$ and $\cN(b)=v$. 
Leave in $G^i$ exactly the elements which satisfy
 $\cB(e_i)$. More precisely, set 
$$
\begin{array}{lll}
V^i&\gets&\Set{v^i,u^i}\cup\Set{w^i\mid\cB(e_i)(w)=\True}~\mbox{, and}\\
E^i&\gets&\Set{\edge{u^i}{l}{y^i}\mid\cB(e_i)(\edge{u}{l}{y})=
\cB(e_i)(u)=\cB(e_i)(y)=\True}\cup\\
&&\Set{\edge{w^i}{l}{v^i}\mid\cB(e_i)(\edge{w}{l}{v})=
\cB(e_i)(w)=\cB(e_i)(u)=\True}\cup\\
&&\Set{\edge{w^i}{l}{y^i}\mid\cB(e_i)(\edge{w}{l}{y})=
\cB(e_i)(w)=\cB(e_i)(y)=\True}~.
\end{array}
$$
\item
\label{step-divert}
\begin{enumerate}
\item
\label{sstep-inter}
For each strategy-graph node $a\in C$: Let 
$I=\Set{e_{i_1},\ldots,e_{i_n}}$ be the set of
strategy-graph edges coming into $a$, and let
$O=\Set{e_{o_1},\ldots,e_{o_m}}$ be the set of strategy-graph
edges  outgoing from $a$.
Let $\cN(a)=v\in V$.
Add $n\cdot m$ edges $\edge{v^{i_j}}{}{v^{o_l}}$ for $j=1,\ldots, n$ and 
$l=1,\ldots,m$. Call these edges {\em intercopy edges.}
\item
\label{sstep-add}
For each node $v^i$ in $G'$ with an outgoing intercopy edge:
Add edges $\edge{u^i}l{v^j}$ for all 
$u^i$ such that $\edge{u^i}{l}{v^i}\in E^i$, and for all
$v^j$ which are reachable from $v^i$ through intercopy edges only.
\item
Remove all the intercopy edges added in Step \ref{sstep-inter}.
\end{enumerate}
\item
\begin{enumerate}
\item
\label{step-source}
Add a node $s^*$ and
an edge $\edge{s^*}{}{\cN(s)^i}$ for each edge $e_i$ outgoing from $s$
in the strategy graph, where $s$ is the
source of the strategy.
\item
\label{step-target}
Add a node $t^*$ and
an edge $\edge{\cN(t)^i}{}{t^*}$ for each edge $e_i$ incoming into $t$ 
in the strategy graph, where $t$ is the
target of the strategy.
\item
\label{step-cleanup}
Mark all nodes and edges in
$G'$ which are both reachable from $s^*$ and from which 
$t^*$ is reachable, and remove unmarked nodes and edges from $G'$.
Call the resulting graph $G''=(V'',E'')$.
\end{enumerate}
\item
\label{step-return}
Return the following objects:
\begin{itemize}
\item
The set of all nodes $v$ such that $\edge{s^*}{}v$ is an edge in $G''$.
This is the {\em start set}, denoted $T_s$.
\item
The graph obtained from $G''$ after removing $s^*$ and $t^*$ and all
their incident edges. This is the 
{\em traversal graph}, denoted $\TG(\cS,G,\cN,\cB)$.
\end{itemize}
\end{enumerate}
\vbox{\vspace*{-1.5mm}\hrule width \linewidth height 0.5pt
\vspace*{3mm}}
%
For the purpose of analysis,
we also define the {\em finish set} of the traversal graph,
denoted $T_f$, to
be
the set of all nodes $v$ such that $\edge v{}{t^*}$ is an edge in $G''$.

\subsubsection*{Correctness}
We now prove that Algorithm 1 is correct, in the sense that the set of
paths in the traversal graph (from the start set of the finish set) is
exactly the set of paths defined by the strategy. This property is
formally stated in Lemma \ref{lem-TG}.

First, we show a basic property of paths in the traversal graph.

\begin{lemma}
\label{lem-TG-paths}
If $p$ is a path in the traversal graph, then 
under the extended $\Class$ mapping,
$p$ is  a path in the class graph.
\end{lemma}
\Proof
Note that for any edge $\edge{u^i}l{v^{j}}$
in the traversal graph,
we have that the corresponding edge $\edge ulv$
is in the class graph.
This can be verified by inspection: 
the only edges added to the graph which remain after Step \ref{step-return}
 are added in Step 
\ref{step-divert}. 
\QED

By Lemma \ref{lem-TG-paths}, we can apply the natural correspondence
$X$ to paths in the traversal graph to obtain concrete paths.
This allows us to state 
the main property of the traversal graph in the following
lemma.
\begin{lemma}
\label{lem-TG}
Let $\cS$ be a strategy, let $G$ be a class graph, let $\cN$ be a name map,
and let $\cB$ be a constraint map.
Let $TG=\TG(\cS,G,\cN,\cB)$,
let $T_s$ be the start set and let $T_f$ be the finish set
generated by Algorithm~1.
Then $X(P_{TG}(T_s,T_f))=\cS[G,\cN,\cB]$.
\end{lemma}
\Proof
Let  $p\in P_{TG}(T_s,T_f)$ be a path in the traversal graph.
To see that $X(p)\in \cS[G,\cN,\cB]$, we decompose $p$ according
to the different copies of $G$ it passes through.
Intuitively, we take the maximal segments of $p$ which are contained
in the same copy of $G$, and the next node (which is in
another copy).
Formally, we decompose $p=p_1\cdot p_2\cdots p_n$ inductively by 
the following algorithm: 

%CODE
{%\bf
\begin{tabbing}
88888888\=\+888\=888\=888\=8888\=\kill
$v\gets\First(p)$; $i\gets 0$\\
repeat\\
\> $i\gets i+1$; $p_i\gets\epsilon$\\
\>let $j(i)$ be such that $v\in G^{j(i)}$\\
\>repeat \` // {\em accumulate prefix of $p$ until exiting $G^{j(i)}$}\\
\>\> $v\gets\First(p)$\`//{\em always add current node}\\
\>\> $p_i\gets p_i \cdot\Seq{v}$\\
\>\> $p\gets \Chop(p)$\\
\>\> if $v\in G^{j(i)}$  then\\
\>\>\> $p_i\gets p_i\cdot\Seq{\First(p)}$\`//{\em add edge only if node still in $G^{j(i)}$}\\
\>\>\> $p\gets\Chop(p)$\\
\> until $v\notin G^{j(i)}$ or $p=\Seq v$\\
\> output $p_i$\\
until $p=\epsilon$ 
\end{tabbing}
}
%
Suppose that the algorithm above outputs $n$ sub-paths $p_1,\ldots,p_n$.
For $i=1,\ldots,n$, let $\edge{v_{i-1}}{}{v_i}=e_{j(i)}$ with $j(i)$
as defined by the algorithm,
i.e., $e_{j(i)}$ is the edge corresponding to the index of the
copy of $G$ through which $p_i$ is passing.
With this notation,
consider the sequence of strategy graph nodes
 $q=v_1v_2\ldots v_nv_{n+1}$.
By construction, $q$ is
 a path in the strategy graph: 
this is because the only edges in the traversal graph which go from
one copy of $G$ to another are created in Step \ref{step-divert} of Algorithm 1,
where an edge goes from $G^{i}$ to $G^{j}$ only if 
$\Target(e_{i})=\Source(e_{j})$.
Next, note that since $\Source(p)\in T_s$
we have by Step \ref{step-source} and the definition of $T_s$ that
$\Class(\Source(p))=\cN(s)$, where $s$ is the source of $\cS$,
and similarly, $\Class(\Target(p))=\cN(t)$ where $t$ is the target of
$\cS$.
Finally, note that $p$ is a satisfying expansion of $q$ with respect to $\cB$.
It therefore follows that $X(p)\in \cS[G,\cN,\cB]$.

Suppose now that $p\in \cS[G,\cN,\cB]$. By Definition \ref{def-strategy-paths},
there exists a path $p'$ in the strategy graph
and a path $p''$ in the class graph
such that $p=X(p'')$ and $p''$ is a satisfying  expansion of $p'$.
Hence  $p''$ can be decomposed into sub-paths  %$p'=p_1\cdot p_2\cdots p_n$
as in Definition \ref{def-sat}. 
It is straightforward to verify from Definition \ref{def-sat}
and the specification of the traversal graph, that $p''\in P_{TG}(T_s,T_f)$.
\QED


\subsection{Traversal methods algorithm}
\label{ssec-TM}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
To carry out traversals, we attach a traversal method definition to
each class. In this section we describe the algorithm of these
methods.

Intuitively, the idea is to traverse the object graph
while using the traversal graph as a road map that tells 
the traversal which of the possible branches to take.
To do that, the algorithm maintains a set of tokens
placed on the traversal graph.
When a traversal method is invoked at an object, it gets
the set of tokens  as a parameter; the interpretation of a token
placed on a node 
$v$ in the traversal graph is roughly
``the traversal made so far places $\This$ on $v$.''
The fact that there may be more than one token simultaneously is a reflection
of the fact that the path leading to an object in the object graph
may be (under the natural correspondence $Y$) a prefix  of several
distinct paths in the $\cS[G,\cN,\cB]$.
This matters, because if there are several tokens, we might 
have more possibilities for selecting the next traversal step.

The traversal method is denoted below by
$\Traverse(T)$, where $T$ is the set of tokens,
i.e.,  a set of nodes in the traversal graph.
When the traversal method invokes the $\Visit$ 
method  at an object, that object is added
to the traversal history.
%\footnote{
%Visiting an object is when the real work is
%done for the application, but our interest here is just the traversals
%which are the vehicle for that work.
%}
The description below is generic in the sense that
the same method is used for all objects; it can be used for different
traversals, using different traversal graphs.


We assume that each object can find its class name
and can iterate through all its constituent fields at run time.
This assumption can be fulfilled either by some minor preprocessing,
or by ``reflection'' methods if they are available (e.g., \cite{Java-1.1}).

\vbox{\vspace*{3mm}\hrule width \linewidth height 0.5pt\vspace*{-1.5mm}}
\noindent
{\bf Algorithm 2:}
$\Traverse(T)$, guided by a traversal graph $TG$.
\hfill\ignore{
This algorithm  is attached to each object in the the object graph
under the name   $\Traverse$.%
\footnote{
If reflection is not available, the same method code applies to all
objects of the same class for all possible traversal graphs; 
and if reflection is available, the same code applies universally to all
objects and all traversal graphs.
In any event inheritance should be used to avoid duplication.
}
 It carries out the traversal, guided by a given
 traversal graph.
} 
\begin{enumerate}
\item
\label{step-abs}
Define a set of traversal graph nodes $T'$ by
$$
T'\gets\Set{v\mid\Class(v)=\Class(\This)\mbox{ and $\exists u\in T$ such that
$u=v$ or $\edge u\diamond v$ is an edge in $TG$}}~.
$$
\item
\label{step-empty}
If $T'=\emptyset$, return.
\item
\label{step-visit}
Call $\This.\mbox{\tt visit()}$. 
%(In practice, it seems useful to pass
%arguments for the visiting methods, possibly $t$.)
\item
\label{step-recurse}
Let $Q$ be the set of labels which appear both on  edges outgoing from
a node in $T'$ in $TG$ and on edges outgoing from $\This$
in the object graph.
For each label $l\in Q$, let
$$T_l=\Set{v\mid\edge u l v \in TG \mbox{ for some }u\in T'}~.$$
\item
Call $\This.l.\Traverse(T_l)$ for all $l\in Q$, ordered by ``$\prec$'',
the ordering of the labels.
\end{enumerate}

\vbox{\vspace*{-1.5mm}\hrule width \linewidth height 0.5pt \vspace*{3mm}}

Step \ref{step-abs} of Algorithm 2 makes sure that the token set
corresponds to the class of the current object: the tokens in $T$ placed on
concrete classes appear in $T'$ only if they are placed on a node
corresponding to $\Class(\This)$. And the tokens in
$T$ placed on abstract classes are moved in $T'$ to their subclass node
whose class is $\Class(\This)$ (if there is one; otherwise, they are simply
discarded). In any event, all tokens in $T'$ are placed on nodes
corresponding to $\Class(\This)$.

An example run of the algorithm is given in Figure \ref{fig-run}, based
on the traversal graph of Figure \ref{fig-comp}.

\begin{figure}
%\centerline{\psfig{file=run.ps,width=\textwidth}}
%\centerline{\psfig{file=run.ps}}
\centerline{\psfig{file=run2.ps,height=8in}}
\caption{\em
An example of an execution of traversal using the traversal of
Fig.~\protect\ref{fig-comp}. 
At each step, the left hand side 
shows the object tree with the currently active object shaded, 
and the right hand side shows the traversal graph with the token set shaded.
Note that in Step 6, the token set is empty and hence
the middle $E$ object is not visited.
}
\label{fig-run}
\end{figure}

\subsubsection*{Correctness}

The following lemma states the main property of the traversal algorithm.
\begin{lemma}
\label{lem-main}
Let $\Omega$ be  an object tree, and let $o$ be an
object in $\Omega$.
Suppose that the $\Traverse$ methods are guided by a traversal graph
$TG$ with finish set $T_f$. Let $H(o,T)$ be the sequence of objects which
invoke {\tt visit} while $o.\Traverse(T)$ is active, where $T$ is a set
of nodes in $TG$. Then 
$$
\Omega\vdash_s o:X(P_{TG}(T,T_f))\rhd H(o,T)~.
$$
\end{lemma}
\Proof
By induction on $|H(o,T)|$.
For the base case, suppose that $H(o,T)=\epsilon$. By the algorithm,
this can occur
only if after Step \ref{step-abs},  $T'=\emptyset$,
which means that for all concrete nodes $v\in T$, $\Class(v)\ne\Class(o)$,
and that no abstract node in $T$ has a child whose class is $\Class(o)$.
It follows from Definition \ref{def-traversal} that
$\Chop(X(P_{TG}(T,T_f)),\Class(o))=\emptyset$ and hence
$\Omega\vdash_s o:X(P_{TG}(T,T_f))\rhd \epsilon$, as required.

For the induction step, assume that $|H(o,T)|>0$. 
Let $l_1,\ldots,l_n$ be the set of labels of traversal graph
edges which start with a node in $T'$, and let $o_i=o.l_i$ for $i=1,\ldots,n$.
In this case, by the algorithm
we have that $H(o,T)=o\cdot H(o_1,T_1)\cdots H(o_n,T_n)$,
where $T_i$ is the set of traversal graph nodes $v$ such that 
$\edge{u}{l_i}{v}$ for some $u\in T'$ and such that $\edge o {l_i}{o'}$
is an edge in the object graph.
It is follows directly from the definitions 
 that $X(P_{TG}(T_i,T_f))=\Chop(\Chop(X(P_{TG}(T,T_f)),\Class(o)),l_i)$,
and hence, by the induction hypothesis,
$\Omega\vdash_s o:X(P_{TG}(T_i,T_f))\rhd H(o_i,T_i)$ and we are done.
\QED

We summarize in the following theorem.
\begin{theorem}
Let $\cS$ be a strategy, let
 $G$ be a class graph, let $\cN$ be a name map, and let $\cB$ 
be a constraint map.
Let $TG$ be the traversal graph generated by Algorithm 1, and let $T_s$ and
$T_f$ be the start and finish sets, respectively.
Let $\Omega$ be an object tree  and let $o$ be an object in $\Omega$.
Let $H$ be the sequence of nodes visited when $o.\Traverse$ is called
with argument $T_s$, guided by $TG$.
Then
$$
\Omega\vdash_s o:X(\cS[G,\cN,\cB])\rhd H~.
$$
\end{theorem}
\Proof
By Lemma \ref{lem-main}, the judgment
$\Omega\vdash_s o:X(P_{TG}(T_s,T_f))\rhd H$ holds true.
The claim of the theorem follows from the fact that 
$\cS[G,\cN,\cB]=X(P_{\TG(\cS,G,\cN,\cB)}(T_s,T_f))$
by  Lemma \ref{lem-TG}, and from the definitions of the start set $T_s$
and the finish set $T_f$.
\QED
%
From the theorem above it is clear how to start a traversal at an object $o$:
Call $o.\Traverse$ with argument $T_s$, where $T_s$ is the start set of the 
traversal.




\subsection{Computational complexity of the algorithm}
\label{ssec-analysis}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
It is easy to see that the time complexity of Algorithm 1
is polynomial in the size of its input.
All steps run in time linear in the size of their input and output.
Steps \ref{step-replicate} and \ref{step-predicate}
 take time linear in $|G'|=O(|\cS|\cdot|G|)$ and in $\tau$,
where $\tau$ is the time bound for evaluating an element predicate
for a given element.
To bound the size of the traversal graph,  let $d_o$ be the maximal 
number of edges outgoing from a node in the class graph.
Note that all edges added in Step \ref{step-divert} correspond to
class graph edges. It follows that  the number
of outgoing edges added to a traversal graph node in Step \ref{step-divert}
is $d_o$ times the number of copies of $G$ in $G'$.
Hence Step \ref{step-divert}
 may increase the size of of the graph to 
$O(|\cS|^2\cdot|G|\cdot d_o)$ in the worst case. 
Steps \ref{step-source}, \ref{step-target}
and \ref{step-cleanup} run in time linear in the size of 
$|G''|=O(|\cS|^2\cdot|G|\cdot d_o)$.

As for Algorithm 2, we note that the size of the argument $T$ 
is bounded by the size of the strategy graph. This follows
from the observation that in all recursive invocations of $\Traverse$ made
by the algorithm, for all $v,u\in T$ we have that $\Class(v)=\Class(u)$. Since
each copy of the class graph in the traversal graph contains at most one
node of each class, it follows that the number of nodes in $T$ is never
more than the number of edges in the strategy graph.%
\footnote{
Note that if we allow for users to call the
initial traversal with arbitrary values of $T$ (to allow multiple sources,
see Section \ref{ssec-rem}), then it may be the case,
at the first call only, that $|T|$ is greater 
than the number of strategy-graph
edges.
}


\subsection{Extensions}
\label{ssec-rem}
\label{sec-ext}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\paragraph{Multiple sources.}
As evident in the statement of Lemma \ref{lem-main}, the 
initial set of nodes in the traversal graph from which the traversal starts
can be arbitrary: the set of paths traversed would change, but in accordance
with the traversal strategy, using an appropriate definition.
In particular, one may have more than one start node in the strategy
graph, which is interpreted as several optional ``entry points'':
it may be the case that the same traversal is sometimes started with a node
of class $A$ and at another time with a node of class $B$ (or, more generally,
with different nodes in the strategy graph). This situation of multiple
sources can be easily handled by our algorithm: suppose
that we have a set $A$ of start nodes for the strategy.
All we need to do is to change Step \ref{step-source} of Algorithm 1 to be
\begin{enumerate}
\item[\ref{step-source}$'$.]
Add edges $\edge{s^*}{}{\cN(u)^i}$ for each edge $e_i=\edge{u}{}{v}\in D$, 
where $u\in A$.
\end{enumerate}
%
Of course, in this case
a traversal should start with invoking $\Traverse$ with argument
$A$.


\paragraph{Multiple targets.}
Similarly, it may be the case that we don't need all traversal paths to
end with the same target node. This can be useful,
for example, if we want to traverse a tree of objects, rather
than traverse all paths leading towards the same target.
Using the same idea we had for multiple sources, it is simple
to have multiple 
targets for a strategy: if we want to traverse all paths which may lead
to any of the strategy graph nodes
in a set $B$, all we need to do is to change Step \ref{step-target} of 
Algorithm 1:
\begin{enumerate}
\item[\ref{step-target}$'$.]
Add an edge $(t^i,t^*)$ for each edge $e_i=(v,u)\in D$, where $u\in B$.
\end{enumerate}
%
This extension is particularly useful when the target of the traversal
is an abstract class: suppose we want to traverse to a class $A$ 
which happens to be abstract. The natural interpretation is that
the traversal should end at whatever subclass of $A$ which happens to
be in the object graph. However,
with the semantics specified above, if the target of the strategy is $A$,
then the object (whose class is concrete)  substituting for $A$ is {\em not}
 visited.
To visit the object substituting for $A$ regardless of its actual class,
we can simply state that the target of the strategy is the set of all
subclasses of $A$.


\paragraph{Intersection of traversals.}
It is sometimes convenient to traverse only the intersection of two
path sets, each described by a traversal strategy. 
This can be done with a minor modification to the traversal method algorithm:
all it takes is doing both traversals in parallel, and proceeding
only along paths which  
both traversal graphs allow us to traverse.
Specifically, to traverse the intersection of traversal graphs
$TG_1=\TG(\cS_1,G,\cN_1,\cB_1)$ and $TG_2=\TG(\cS_2,G,\cN_1,\cB_2)$, 
the following algorithm can be used.

\vbox{\vspace*{3mm}\hrule width \linewidth height 0.5pt
\vspace*{-1.5mm}}
{\bf Algorithm 3:}  $\TraverseIntersect(T_1,T_2)$, guided by $TG_1$ and $TG_2$.
The  input arguments $T_1$ and $T_2$ are sets of nodes of $TG_1$ and
$TG_2$, respectively.
\begin{enumerate}
\item
%\label{step-abs}
Define a set $T_1'$ of  nodes of $TG_1$ and a set $T_2'$ of nodes $TG_2$ by
\begin{eqnarray*}
T'_1&\gets&
\Set{v\in TG_1\mid\Class(v)=\Class(\This)
 \mbox{ and $\exists u\in T_1$ such that $u=v$ or $\edge u\diamond v$
is an edge in $TG_1$}}\\
T'_2&\gets&\Set{v\in TG_2\mid\Class(v)=\Class(\This)
 \mbox{ and $\exists u\in T_2$ such that $u=v$ or $\edge u\diamond v$ 
is an edge in $TG_2$}}~.
\end{eqnarray*}
\item
%\label{step-empty}
If $T_1'=\emptyset$ or $T_2'=\emptyset$, return.
\item
%\label{step-visit}
Call $\This.\mbox{\tt visit()}$. 
\item
%\label{step-recurse}
Let $Q$ be the set of labels which appear on  edges outgoing from
a node in $T_1'$ in $TG_1$, and on edges outgoing from
a node in $T_2'$ in $TG_2$, and on edges outgoing from $\This$
in the object graph.
For each label $l\in Q$, let
\begin{eqnarray*}
T_1(l)&\gets&\Set{v\in T_1\mid\edge u l v\in TG_1 \mbox{ for some }u\in T_1'}\\
T_2(l)&\gets&\Set{v\in T_2\mid\edge u l v\in TG_2 \mbox{ for some }u\in T_2'}~.
\end{eqnarray*}
\item
Call $\This.l.\TraverseIntersect(T_1(l),T_2(l))$ for all $l\in Q$, 
ordered by ``$\prec$'',
the ordering of the labels.
\end{enumerate}

\vbox{\vspace*{-1.5mm}\hrule width \linewidth height 0.5pt
\vspace*{3mm}}


It is straightforward to verify that given an object graph $\Omega$,
the sequence of objects visited
by the above algorithm is the traversal guided by the path set
$\cS_1[G,\cN_1,\cB_1]\cap\cS_2[G,\cN_2,\cB_2]$.



\paragraph{Composition of strategies.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Composition of strategies is very simple, due to the fact that
strategy graphs have a source and a target:
we can replace an edge 
in a strategy graph by a complete strategy. This can be viewed as
 the process of refining the traversal specification. 
Formally, given strategies $\cS_1=(S_1,s_1,t_1)$ and $\cS_2=(S_2,s_2,t_2)$,
with disjoint node sets,
we define the {\em composed strategy} $\cS_1(\cS_2,\edge{a}{}b{})$ where
$\edge{a}{}{b}$ is an edge in $S_1$ 
by identifying $a=s_1$ and $b=s_2$.


\paragraph{``Before,'' ``after'' and ``around'' methods.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
The semantics presented in Section \ref{sec-traversals} imposes a
pre-order of visiting the objects selected by the traversal, as
evident in Algorithm 2: first the object is verified to be on a
traversal path, then it is visited, and then the traversal proceeds
down the tree. We call such visitors {\em before visitors}.
It is sometimes useful to have the visitor methods
invoked in post-order, namely first descend down the tree and then
invoke the visitor method. These visitors are accordingly called {\em
after visitors}. It is a simple exercise to adapt the definition of
traversals to deal with after visitors.

Both before and after visitors are generalized by the notion
of {\em around visitors}, whose code
is interleaved with the traversal method code of Algorithm 2.
This allows for before and after methods (which can
communicate directly by shared data structures), and it also allows
the visitor to directly manipulate the traversal, e.g., by invoking it
multiple times, or by pruning it. This flexibility is  very powerful,
but its precise definition is involved, and is left for future work.
 
\paragraph{Cyclic object graphs.}
One of the apparent disadvantages of the approach presented in the current
paper is that it deals only with tree (or forest) object graphs.
This problem can be solved in many ways, depending on the intended 
semantics. 
In the current implementations, we use visitor objects to
make sure that a visited node is not revisited.
The main point is that we already have all the machinery to carry
out a depth-first traversal of a part of the object graph as selected by
the strategy, so it is quite easy to vary the implementation slightly to
accommodate for our needs. In a sense, what we need is a specialized
around method (see above).

For example,
one reasonable choice is that no object is visited twice. This can be 
easily implemented by associating a ``{\tt visited}'' bit with each object,
and using it as expected, namely to execute the following as the first
step in the traversal method (Algorithm 2):

\begin{enumerate}
\item[0.] 
If $\This.\mbox{\tt visited}=\True$, return. 
Else $\This.\mbox{\tt visited}\gets\True$.
\end{enumerate}
%
where initially $o.\mbox{\tt visited}=\False$ for all objects $o$.




\section{The limits of static traversal code}
\label{sec-lb}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
One appealing approach to compiling executable
code from traversal strategies
 is to use only static analysis: in this context, this means that
only method invocations are used to traverse the graph, with no further
computation while the program is running. 
The advantage of the static approach
is that the run time overhead due to traversals is minimal;
the possible disadvantages are
 larger compile time and higher space requirement for the executable
code, but how large can they be?
Early implementations of traversals were static, but they suffered
from either being limited in scope \cite{PXL-95,PPL-96}, or inefficient.
In particular, the automata-based algorithm presented in \cite{PPL-96}
may result in exponential compilation time and exponential number of
traversal methods in the executable code.

In this section we show that this phenomenon is not accidental: 
for some  strategies and class graphs,
 static compilation algorithms must output exponentially many methods,
thereby making the space requirement of the code, as well as the running
time of the compiler, infeasible in the worst case.
We remark that our proof technique is similar to the standard technique of 
simulating non-deterministic finite automata in polynomial space and time
\cite{dragon-nfa-sim}.

To state the result formally, we first define the notion of static traversal
compilation. We then give an example of a traversal strategy and a class graph
where static compilation must result in an exponential number of methods.
We remark that the strategy graph we use is not cyclic; in fact, a tree 
strategy is sufficient to prove the same result.


\subsection{The target language}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
An algorithm is said to compile a traversal strategy and a class graph to
{\em static traversal code} if it generates traversal code in a language
which supports only method invocation without parameter passing.
The target language of a static compilation algorithm is formally
defined in \cite{PXL-95,PPL-96}, and is given in Appendix \ref{sec-target}.
Informally, a program attaches method definitions to each class,
and a method body is a list of (qualified) method names. There are
no arguments passed to the methods and no return values.
Executing a method in a given object graph is done simply by unfolding
the method definition. To perform a traversal starting with a given object,
a special method 
attached to this object is invoked. 
When a method
is invoked, the corresponding object may be added to the traversal history.

\subsection{The lower bound}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
We now prove the main result of this section.
\begin{theorem}
\label{thm-lb}
For any $n>0$ there exists
 a  traversal strategy $\cS_n$ with $|\cS_n|=O(n)$ 
and a class graph $G_n$ with $|G_n|=O(n)$ such that
the number of methods in a static traversal code corresponding
to $\cS_n$ and $G_n$ is at least $2^n$.
\end{theorem}
\begin{figure}
\newlength{\figwidth}
\setlength{\figwidth}{\textwidth}
\addtolength{\figwidth}{-5mm}
\centerline{\psfig{file=lb.ps,width=\figwidth}}
\caption{\em 
Example considered in lower-bound proof.
Left: the strategy graph. 
The source of the strategy is the node labeled {\sf A}
and the target is the node labeled {\sf D}.
Middle: the class graph. The name map is indicated
by the node labels on the strategy graph. 
Right: a typical object tree. The shaded regions represent a recursive
occurrence of the tree.
}
\label{fig-lb}
\end{figure}
\Proof
By contradiction.
Consider the strategy graph and the class graph
depicted in Figure \ref{fig-lb}. 
Intuitively, starting with an object of class $A$, an object of class
$C_i$ can be visited only if it has an ancestor of class $B_i$.
The strategy graph has $2n+2$ nodes and  $3n$ edges;
The class graph has $2n+3$  nodes and $4n+2$ edges.
Note that we can construct object trees where an $A$-object has any
desired set of $B$-ancestors.
We claim that in a static traversal code,
there are at least $2^n$ methods attached 
to objects of class $A$.
For suppose not.
Then there exists a set $S_0\subseteq\Set{C_1,C_2,\ldots,C_n}$
such that there is no method attached to $A$ which consists of calls
precisely to the methods in the objects pointed to by the elements of
$S_0$. 
Let $I_0$ be the set of indices in $S_0$.
Consider  an object tree containing an object $o$ with $\Class(o)=A$
 such that $o$ has an ancestor of class  $B_i$ if and only
if $i\in I_0$. Put differently, we think of an object tree
which satisfies the following condition:
$$
\Set{\Class(o')\mid o' \mbox{ is an ancestor of }o}\cap
\Set{B_1,B_2,\ldots,B_n}= \Set{B_i\mid C_i\in S_0}~.
$$
As noted before, such an object graph exists.
By definition, when $o$ is invoked, it should call all its children
whose class is in $S_0$. But by assumption, no such method is attached to $A$.
\QED

We note that the strategy graph in the proof of Theorem \ref{thm-lb}
is acyclic (in fact, it is a series-parallel graph expressible in the
syntax for traversal specifications of \cite{PPL-96}).
The proof extends directly to the case of tree strategies (see Section
\ref{sec-ext}) by omitting the node corresponding to $D$ in the strategy
graph.

It may be instructive to see how the algorithm described in Section
\ref{sec-alg} avoids the exponential lower bound. In that algorithm,
the traversal graph serves as a ``road map,'' and whenever a traversal
method is called in an object $o$, 
it gets as an input argument a set of ``tokens.''
The token set reflects the current location of the traversal, i.e., 
what prefixes of paths have already been covered when the traversal reached
$o$.
 This set controls the next traversal actions
while being updated as the traversal continues.
As the argument of Theorem \ref{thm-lb} implies, the number of possible
continuations of the traversal may be exponential; however, this
only means that the number of possible
 configurations of the token set must be exponential,
which can be achieved with an argument whose size
 is linear in the size of the strategy graph.

\section{Implementation notes}
\label{sec-notes}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

In this section we describe some of the practical issues and design
decisions taken in the course of development of DemeterJ 
\cite{cse:preventive}, a
prototype system based on the idea of strategies as described in this
paper.

\subsection{User-level representation}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
To aid specifying inputs, the system contains a graphical editor which
allows the user to enter class
and strategy graphs via an intuitive interface. The idea is that the
class graph is either generated automatically from a given Java code,
or that the user draws it manually. The representation we use is
similar to that of UML \cite{grady-jim:95}. In the next step, the user
specifies a strategy graph by pointing at the class graph nodes. This
method, aside for the obvious ease-of-use, also implicitly defines the
name map: a strategy node is mapped to the class node from which it
was derived.
The constraint map predicates are defined by clicking on the undesired
elements.

In addition to the graphical representation, an LL(1) grammar has been
developed to support textual representation of strategies (see
examples in Section \ref{sec-intro} and in Appendix
\ref{app-example}). 
In fact, the textual representation is a much more effective way to specify the
constraint map. Specifically, for a given strategy edge, a typical
element predicate might have either of the following forms.
\begin{verbatim}
 bypassing A
 bypassing -> * l *
 only-through -> A l B
\end{verbatim}
The first predicate is true for all elements except for node $A$.
The second predicate is true for all elements except for edges whose
label is $l$. The third predicate is false for all elements except
for the edge $\edge A l B$.


%*NEW*
\subsection{AP Library}
An experimental implementation of strategies, called the {\em AP
  library}, is used at Northeastern 
University for a few years \cite{APLibrary}. 
The AP Library is an implementation of the core concepts described in
  this paper: it includes an interface and
implementation for the concepts of class graphs, traversal strategy
graphs, traversal graphs and an algorithm to compute traversal graphs
from class graphs and traversal strategy graphs. 

% To make the algorithms presented in this paper available in Java, Doug Orleans
% has implemented them as an
% easy-to-use library, called the AP library
% http://www.ccs.neu.edu/research/demeter/AP-Library/
% that uses the following interfaces:
% ClassGraphI, ConstraintMapI, EdgeI, NameMapI, StrategyGraphI, StrategyI 
% and class TraversalGraph and exceptions
% NoSuchClassGraphEdgeException,
% NoSuchClassGraphEdgeLabelException,
% NoSuchClassGraphNodeException,
% TraversalGraphException. 

The AP Library is used by DJ 
\cite{DJ} (see Section \ref{ssec-TAO})
%(http://www.ccs.neu.edu/research/demeter/DJ/)
as well as by DemeterJ
\cite{cse:preventive}.


%*END NEW*

\subsection{Traversals as objects}
\label{ssec-TAO}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
The implementation outlined above does the traversal compilation in a
pre-processing stage, and requires the usage of a modified Java language.
Traversal as objects (TAO) pursues a different avenue of
implementation. The idea in TAO is to add traversals 
to Java without extending the language; instead, traversal is done
using some predefined classes.
More specifically, 
traversal graph objects are created by calling the constructor of the 
TraversalGraph class
with a textual representation of the traversal strategy as a string argument. 
The TraversalGraph constructor implements the new algorithm described
in this paper 
which translates a strategy and a class graph into a traversal
graph which is interpreted when a traversal executes.
A traversal graph object $t$ is typically called by an expression of the form
{\tt    t.traverse(o,{v1,v2, ... ,vn})},
where $o$ is the object to be traversed
and $\Seq{v1,v2,\ldots,vn}$ is a vector of visitors.
The TAO approach also allows to compute traversals at run-time.

% We remark that a commercial product called StructureBuilder
% \cite{URL:tendril} is based on ideas similar  to TAO.

%*NEW*
We have implemented traversals as objects in a tool that integrates
generic programming with adaptive programming. The tool is called
DJ \cite{DJ}
%(http://www.ccs.neu.edu/research/demeter/DJ/)
and supports the adaptive definition of iterators that are used
by generic algorithms. The tool works with both the Java Generic Library
as well as with the Java collection classes.
%*END NEW*


\ignore{
\subsection{Traversal state in visitors}

It is sometimes the case 
that the type of work we would like the visitor methods to do depends
on the location of the visited object in the current traversal.
To this end, 
Visitors do the ``interesting'' work during traversals.
Before and after methods specify code to be executed on object entry
and exit, respectively (cf.\ Section \ref{sec-ext}).
Around methods allow us to do conditional traversals.
For all those traversal methods
we might want to execute different code 
depending  on the traversal state. The traversal state 
is described by a set of tokens in the traversal graph.
There are two ways to establish communication between
visitors and the traversal state:

\begin{enumerate}
\item
Pass set of tokens as argument

The visitors can query the state and take appropriate action.

\item
Qualify visitor methods with strategy graph information

For example, we could express (in verbose form):

\begin{verbatim}
before entering object of class C 
guided by strategy graph edge X -> Y
\end{verbatim} 
\end{enumerate}

The first solution is more general and requires less new syntax
but it increases the number of conditionals in visitor methods.


\subsection{Unified Modeling Language}

The technology proposed in this paper is general enough to deal
with traversals of objects defined by
class diagrams of various design and analysis methods,
specifically with UML class diagrams \cite{grady-jim:95}.
We briefly discuss how features of UML can be handled.
\begin{itemize}
\item
cardinalities

The corresponding traversal code needs to check for presence of
parts and needs to use iteration to visit all elements. For example,
cardinality [0..1] needs a check for existence of the part and 
[1..*] needs a loop to visit all elements.

\item
bidirectional edges

Associations in UML are bidirectional. They are represented
by two directed associations in both directions.

\item
non-abstract super classes

In UML, super classes may not be abstract. We might have

\begin{verbatim}
class B inherits A { ... }
class A { ... }
\end{verbatim}

with A being instantiable. We introduce a new class A\_abs which
has the behavior of the former A, but which is abstract.
Both A and B inherit from A\_abs.

\begin{verbatim}
class A inherits A_abs { ... }
class B inherits A_abs { ... }
abstract class A_abs { ... }
\end{verbatim}

We can apply this rule whenever a super class is not abstract
making all super classes abstract.
\end{itemize}


% [[[ 
% graphic and text representation of graphs (through, bypassing); 
% star in predicate spec's; 
%  default name map; traversals as objects; verifying class graph properties
% (reachability, path uniqueness); ...]]]
} %% END IGNORE


\section{Conclusion}
\label{sec-conc}
%%%%%%%%%%%%%%%%%%%%
Traversals are fundamental to object-oriented programming
and programming in general. In order to process an object
we need to traverse through a part of it and perform appropriate
actions during the traversal.
The importance of traversals of object structure 
is well recognized in the literature.
For example, the Visitor design pattern 
\cite{gang-of-4}
and its variants \cite{Vlissides:visitor,Vlissides:observer}
attest to this fact.
We believe that the notion of strategies is a significant
contribution  to software developers---both
by providing a more intuitive and conceptually simpler
 programming model, and by automating the frequent-and-tedious task of 
programming traversals.

In this paper we have extended the
state of knowledge regarding traversals by providing a general
definition as well as an efficient implementation and a working prototype.
We improve on all previous implementations and at the same time
we present a model that is more general than previous traversal models.
In addition, the lower bound result
improves the understanding of the inherent
properties of run-time traversals, whose implementation has
been notoriously  tricky.



 U.S. patent 5,946,490 %(filed in 1996 and granted in 1999)
covers the algorithms in this paper.



\vfill

\subsection*{Acknowledgments}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
We wish to thank Dean Alemang,  Lars Hansen,
Jens Palsberg,  Kedar  Patankar, 
Salil Pradhan, Johan Ovlinger, Binoy
Samuel,
 and Mitch Wand
for numerous discussions and suggestions. Special thanks are due to Michal
Young, for bringing \cite{dragon-nfa-sim} to our attention, and to Doug
Orleans, for careful debugging of Algorithm 1.
Many thanks to Don Batory and the anonymous IEEE reviewers
for his insistence in making the paper more
accessible.

\newpage
\bibliographystyle{plain}
\bibliography{dist,biblio}

\newpage
\appendix
\section*{APPENDICES}

\section{Class-graph Simplification}
\label{sec-simpleten}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
In this appendix we prove Proposition \ref{prop-simple}.
For the convenience of the reader, we reproduce it below.

\noindent
{\bf Proposition \ref{prop-simple}.}
{\em
Let $G=(V,E)$ be an arbitrary class graph. Then there exists a class graph
$\Simple(G)=(V',E')$ such that
an object graph $\Omega$ is an instance of $G$ if and only if $\Omega$
is an instance of $\Simple(G)$. Moreover, $|V'|=O(|V|)$ and $|E'|=O(|E|^2)$.
}

\noindent
The proposition is proven by the following transformation algorithm
(see example in Figure \ref{fig-simplify}).
\begin{figure}
\centerline{\psfig{file=simplify.ps,width=6.5in}}
\caption{\em
An example of class-graph simplification. 
{\sf A}: the original class graph.
Concrete classes are depicted as squares, and abstract classes are hexagons.
Reference edges are regular, and subclass edges are heavy.
{\sf B}: after step \protect\ref{step-introduce}.
{\sf C}: after step \protect\ref{step-simpleten}.
{\sf D}: after  step \protect\ref{step-contract}.
}
\label{fig-simplify}
\end{figure}
\begin{enumerate}
\item
\label{step-introduce}
For each concrete class $v\in V$ with an outgoing subclass edge
$\edge v\diamond u\in E$, add a new abstract node $v'$ into $V$, along with
the following changes of the edge set:
\begin{itemize}
\item
Divert all edges incoming into $v$ to end at $v'$. That is, 
$$
E\gets E\cup\Set{\edge u l v'\mid \edge u l v\in E}
 \setminus\Set{\edge u l v\mid  \edge u l v\in E}
$$
\item
Divert all subclass edges outgoing from $v$ to originate at $v'$. That is,
$$
E\gets E\cup\Set{\edge {v'} \diamond u\mid \edge v \diamond u\in E}
   \setminus\Set{\edge v \diamond u\mid \edge v \diamond u}
$$
\item
Make $v$ a subclass of $v'$:
$$E\gets E\cup\Set{\edge {v'}\diamond v}$$
\end{itemize}
When this step is completed, no concrete class has subclass edges outgoing 
from it.

\item
\label{step-simpleten}
For each concrete class $v$: add edges so that the set of edges outgoing
from $v$ is exactly the induced edges of $v$.
Then, delete all reference edges outgoing from abstract classes.

\item
\label{step-contract}
Contract long inheritance chains.
For each abstract class $v$:
find all concrete classes $u$
which can be reached from $v$ using subclass edges
only, and add a subclass edge $\edge v\diamond u$ if one does not exist
already. Finally, delete all subclass edges leading to abstract classes.
\end{enumerate}

Informally, Step \ref{step-introduce} decouples the sub-classing
role from concrete classes by introducing an additional abstract class
for each class which has both subclass and reference edges outgoing
from it. 

Step \ref{step-simpleten} unfolds inherited reference edges by pushing them 
down the subclass hierarchy.
This can be done efficiently by traversing the subclass edges
in a top-down fashion, starting with nodes with no subclass edges
incoming into them, and ``collecting'' reference edges as we go down. Details
are omitted. 

Step \ref{step-contract} can be viewed as taking the transitive 
(non-reflexive) closure of the subclass relation. This step can be done
in parallel with Step  \ref{step-simpleten}.

For the bound on the size of the resulting graph,
note first that only Step \ref{step-introduce} may change the number
of nodes by at most doubling it.
Next, note that since Steps \ref{step-simpleten} and \ref{step-contract}
do not change the connectivity structure of the graph, we can deal with
each connected component separately. Consider such a component with
$n$ nodes. Since it is connected, there are at least $n-1$ nodes in the 
component before Steps \ref{step-simpleten} and \ref{step-contract}.
Since these steps do not introduce nodes or parallel edges, 
they may introduce at most $O(n^2)$ new edges.
We may therefore conclude that the number of nodes in $\Simple(G)$
is at most doubled and the number of edges is at most squared.

\section{Code for the example in Figure \ref{fig-comp}}
\label{app-example}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
In this section we show how to program the example of
Fig.~\ref{fig-comp} in DemeterJ \cite{cse:preventive}.
We show how to write a program which traverses a
given {\tt A}-object according to the given strategy
and also counts the number of {\tt E}-objects found
during the traversal.

The the code for the strategy given in pane 2 of Figure
\ref{fig-comp}, 
including the name map and and the
constraint map given below. The strategy is named {\tt S}.

\begin{verbatim}

strategy 
S = { A -> D
           D -> E
      A -> Z          bypassing -> A,d,D
           Z -> E     bypassing A
}

\end{verbatim} 

The notation for strategy graphs is an edge list, where each
edge may have constraints associated with it. 
% SOURCES and TARGETS!
The default name map associates a strategy node with a class with the
same label (this cannot be used if multiple strategy nodes are mapped
to the same class).

The class graph code is annotated with some syntactic sugar to make it
an LL(1) grammar which is given to the Java Compiler Compiler to 
create an object for a given input sentence. The code for the class
graph of pane 1 of Figure \ref{fig-comp} is as follows \cite{Lieber:book}:

\begin{verbatim}
A = "a" <b> B <c> C  <d> D .
B = "b" <z> Z .
D = "d" <y> Y.
C = <e> E .
Y : A | B.
Z : D | E.
E = "e" .
\end{verbatim} 
%
Essentially, the graph is represented as a list of nodes, where each
node is represented by a list of its outgoing edges.
%We will not explain this EBNF-style grammar notation in detail. 

To write the program which counts the number of {\tt E}-objects contained
in an {\tt A}-object, we first define a visitor class:

\begin{verbatim}
CountingVisitor {
  int r;
  init (@ r = 0; @)
  before E (@ r++; @)
  return int (@ r; @)
}
\end{verbatim} 

This visitor class is encapsulating the counting behavior, including
how to initialize (the init method)
and what to return (the return method) after the traversal is complete.

Next we define a traversal/visitor combination {\tt t}, 
called a traversal method {\tt t}, 
using the strategy {\tt S} we defined earlier and the {\tt CountingVisitor}
defined above. 
In general, a traversal method is a strategy combined with a list
of parameter types (visitor classes).
Finally, we define an adaptive method {\tt CountCertainEs} for class {\tt A} 
which defines
with which subclass of {\tt CountingVisitor} we want to do the work.
In this case we use {\tt CountingVisitor} itself.

\begin{verbatim}
A {
  traversal t(CountingVisitor v) { do S; }
  int CountCertainEs() = t(CountingVisitor);
}
\end{verbatim}
%
We call CountCertainEs in class Main as follows.

\begin{verbatim}
      A a = A.parse(System.in);
      int result = a.CountCertainEs(); 
\end{verbatim} 

To summarize the adaptive programming approach used above, called
the strategy/visitor style, we note
that behavior is expressed in terms of strategies and
visitors. Strategies are abstract path specifications, i.e.,
not attached to a particular class structure or visitor
class. Visitors are specifications of behavior which needs
to be executed while the paths are traversed.
Strategies and visitors are combined in two steps:
1. First a strategy/visitor combination, 
called a traversal method,
is defined. A traversal method is attached to a class and used
to traverse an object graph. 2. A specific adaptive method is chosen
from the traversal method by specifying
which specific visitor subclasses to use.

The strategy/visitor style is a good approach 
to express behavior since strategies,
visitors, traversal methods and adaptive methods 
are reusable and the resulting programs can 
be more easily evolved by changing strategies or visitors or class graphs.
The coupling between strategies and visitors is loose, much looser
than in the tangled mess of the visitor design pattern described in 
\cite{gang-of-4} where the class graph information is spread all over.


%[[[code for class graph, strategy, name map, and constraint map]]]

\section{Target language for static compilation}
\label{sec-target}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Static traversal compilers  compile strategies and class graphs
 into an object-oriented program where the sequence of
methods invoked by an object depends only on the object structure
and the method name (no parameter passing is allowed).
Formally, the language is defined as follows.

A {\em program} in the target language is a partial function which maps
a class name and a method name to a method.
A method is a tuple of the form 
$\interval{l_1.m_1,\ldots, l_n.m_n}$, where $l_1\ldots l_n \in\cL$
and $m_1\ldots m_n$ are method names.
When invoked, such a method executes by invoking $l_i.m_i$ in order.
We distinguish two kinds of methods: {\em visiting} and {\em non-visiting},
prescribed by a predicate $\Visit$ defined on the set of method names.

An invocation of a program is defined as follows.
If $\Omega$ is an object graph, $o$ a node in $\Omega$,
$m$ a method name,
$\Prog$ a program in the target language,
and $H$ a sequence of objects, 
then the judgment 
\[
\Omega \vdash_c o:m:\Prog \rhd H
\]
means that when sending the message $m$ to $o$,
we get a traversal of the object graph $\Omega$ starting in $o$
so that $H$ is the traversal history.
Formally, this holds when the judgment is derivable using
the following rules:
\[
\hspace*{1.5cm}
\irule{\Omega \vdash_c o_i:m_i: \Prog \rhd H_i \air \forall i \in 1..n}
      {\Omega \vdash_c o:m: \Prog \rhd o \cdot H_1  \cdot ... \cdot H_n} 
   ~~~~~~
      \parbox{3in}{
       if $\Prog(\Class(o),m) = \interval{l_1.m_1\ldots l_n.m_n}$, and \\
          $\Visit(m)$,
         and $\edge o {l_i} {o_i} \mbox{ is in } \Omega
                \mbox{ for all }     i \in 1..n$.}
\]
and 
\[
\hspace*{1.5cm}
\irule{\Omega \vdash_c o_i:m_i: \Prog \rhd H_i \air \forall i \in 1..n}
      {\Omega \vdash_c o:m: \Prog \rhd  H_1  \cdot ... \cdot H_n} 
   ~~~~~~
      \parbox{3in}{
       if $\Prog(\Class(o),m) = \interval{l_1.m_1\ldots l_n.m_n}$, and \\
          $\neg\Visit(m)$, and $\edge o {l_i} {o_i} \mbox{ is in } \Omega 
                  \mbox{ for all } i \in 1..n$.}
\]
The label $c$ of the turnstile indicates ``code''.
Intuitively, the rule says that when sending the message $m$ to $o$,
we check if $o$ understands the message, and if so, 
 we invoke the method. The object $o$ is added to the traversal history only 
if $\Visit(m)$ is true.
Notice that for $n=0$, the rule is an axiom; in the case that 
 $\Visit(m)$ is true, it is simply
\[
\hspace*{3.5cm}
\irule{}
      {\Omega \vdash_c o:m: \Prog \rhd o }
   ~~~~~~
      \parbox{3in}{
       if $\Prog(\Class(o),m) = \interval{}$}
\]
and if $\Visit(m)$ is false, then it is
\[
\hspace*{3.5cm}
\irule{}
      {\Omega \vdash_c o:m: \Prog \rhd \epsilon }
   ~~~~~~
      \parbox{3in}{
       if $\Prog(\Class(o),m) = \interval{}$,}
\]
where $\epsilon$ denotes the empty history.

Given a program in the target language, 
it is straightforward to generate, for example, a \C++ or a Java program.

\end{document}

