@TECHREPORT{ahmed:wysiwyg-relaxed,
AUTHOR = "Ahmed Abdelmeged and Karl Lieberherr",
TITLE = "{Abbreviated Path Expressions With Iterated Wild Cards: WYSIWYG Semantics and Efficient Implementations}",
YEAR = "2010",
INSTITUTION = "CCIS/PRL, Northeastern University, Boston",
MONTH = "May",
NUMBER = "NU-CCIS-10-12"
}
The following work on cover languages is related to our paper. The idea is also to enlarge the regular language of interest with harmless words while reducing the state complexity.
A language L(k) over Sigma is called a cover language for the finite language L if L(k) intersect Sigma(less than k) = L. We use also cover languages. See: An Incremental Algorithm for Constructing Minimal Deterministic Finite Cover Automata, by Cezar Campeanu, Dndrei Paun and Jason Smith, in implementation and Application of Automata. CIAA 2005. Also in: Theoretical computer science Y. 2006, vol. 363, No. 2, pages 135-148 [14 pages] [bibl. : 22 ref.]
@article{504535,
title = {Minimal cover-automata for finite languages},
journal = {Theor. Comput. Sci.},
volume = {267},
number = {1-2},
year = {2001},
issn = {0304-3975},
pages = {3--16},
doi = {http://dx.doi.org/10.1016/S0304-3975(00)00292-9},
publisher = {Elsevier Science Publishers Ltd.},
address = {Essex, UK},
}
@article{1217611,
author = {C\^{a}mpeanu, Cezar and Paun, Andrei and Smith, Jason R.},
title = {Incremental construction of minimal deterministic finite cover automata},
journal = {Theor. Comput. Sci.},
volume = {363},
number = {2},
year = {2006},
issn = {0304-3975},
pages = {135--148},
doi = {http://dx.doi.org/10.1016/j.tcs.2006.07.020},
publisher = {Elsevier Science Publishers Ltd.},
address = {Essex, UK},
}
@BOOK{polya:solve-it,
AUTHOR = "George Polya",
TITLE = "How to solve it",
PUBLISHER = "Princeton University Press",
YEAR = "1949"
}
C. Campeanu et al., Finite languages and cover automata, Theoret. Comput. Sci. 267 (2001) (1-2), pp. 3-16.
Abstract: A cover-automaton A of a finite language Lsubset of or equal ta Sigma* is a finite deterministic automaton (DFA) that accepts all words in L and possibly other words that are longer than any word in L. A minimal deterministic finite cover automaton (DFCA) of a finite language L usually has a smaller size than a minimal DFA that accept L. Thus, cover automata can be used to reduce the size of the representations of finite languages in practice. In this paper, we describe an efficient algorithm that, for a given DFA accepting a finite language, constructs a minimal deterministic finite cover-automaton of the language. We also give algorithms for the boolean operations on deterministic cover automata, i.e., on the finite languages they represent.
The idea of cover languages may be viewed as an instance of Polya's Inventor's Paradox which has numerous other instances. See Poly'a Inventor's Paradox. The problem we want to solve is: find a small automaton for language L. The invention we have to make is to find a cover Cover(L,K) which generalizes language L with harmless words outside some set K called the universe or the effective set. The invention consists of finding a suitable K as well as defining Cover(L,K) abbreviated as [K, Cover(L,K)] so that Cover(L,K) intersect K = L. The metric used is the size of the automaton. We want StateComplexity(Cover(L,K)) << StateComplexity(L).
Should we worry about StateComplexity(Cover(L,K)) as well as StateComplexity(Cover(L,K) intersect K) and StateComplexity(K)? No, if we only check elements of K for membership in L. Then only StateComplexity(Cover(L,K)) is important.
The generalization then is: Problem: Given languages L, K over the same alphabet Sigma. Find a fast recognizer for L for words in K. Solution: Choose any language X satisfying X intersect K = L so that StateComplexity(X) << StateComplexity(L).
The solution is correct because for words in K, a recognizer for X is also a recognizer for L. Given L and K, we want the language X so that StateComplexity(X) = MinStateComplexity(L,K)
MinStateComplexity(L,K) = min [over all X satisfying X intersect K = L] StateComplexity(X)
By making a proper invention [K, Cover(L,K)] for L, the automaton for Cover(L,K) is smaller. In this sense, solving the more general problem is easier, because it needs fewer states. Defining a super language is clearly a form of generalization. Mapping back from the general problem to the concrete problem is easy: do the intersection with K or give the automaton only inputs in K.
Polya's inventor paradox is used at two levels: by the programmer (invent abbreviated path expressions) and by the implementation technology which uses a generic cover language.
All references to bibtex entries are given above.
Polya's Inventor Paradox (\cite{polya:solve-it}) finds applications at two levels in our paper: at the programmer level and at the implementation level. First, finding a WYSIWYG abbreviated path expression is a little invention in the sense of Polya. A proper invention leads to a simplification of the programming task by writing a Generic Program. This applies to all three programming models: AP, XML and AOP. The original problem becomes easier because now the program is written in terms of the paths defined by the abbreviated path expression which are free of all the noise in the original problem.
The second application of Polya's Inventor's Paradox is hidden from the programmer: it is used at the implementation level to construct efficient path recognizers. The problem we want to solve is: find a small automaton for language L. The invention we have to make is to find a cover Cover(L,K) which generalizes language L with harmless words outside some set K called the universe or the effective set. The invention consists of finding a suitable K as well as defining Cover(L,K) abbreviated as [K, Cover(L,K)]. The metric used is the state complexity of the automaton. By making a proper invention [K, Cover(L,K)] for L, the automaton for Cover(L,K) is smaller. In this sense, solving the more general problem is easier, because it needs fewer states. Defining a super language is clearly a form of generalization. Mapping back from the general problem to the concrete problem is easy: do the intersection with K or give the automaton only inputs in K. The idea of a cover is a generalization of the cover idea in \cite{1217611}.
XML related work
@article{958947,
author = {Diao, Yanlei and Altinel, Mehmet and Franklin, Michael J. and Zhang, Hao and Fischer, Peter},
title = {Path sharing and predicate evaluation for high-performance XML filtering},
journal = {ACM Trans. Database Syst.},
volume = {28},
number = {4},
year = {2003},
issn = {0362-5915},
pages = {467--516},
doi = {http://doi.acm.org/10.1145/958942.958947},
publisher = {ACM},
address = {New York, NY, USA},
}
@inproceedings{653613,
author = {Fernandez, Mary F. and Suciu, Dan},
title = {Optimizing Regular Path Expressions Using Graph Schemas},
booktitle = {ICDE '98: Proceedings of the Fourteenth International Conference on Data Engineering},
year = {1998},
isbn = {0-8186-8289-2},
pages = {14--23},
publisher = {IEEE Computer Society},
address = {Washington, DC, USA},
}
@article{1042051,
author = {Green, Todd J. and Gupta, Ashish and Miklau, Gerome and Onizuka, Makoto and Suciu, Dan},
title = {Processing XML streams with deterministic automata and stream indexes},
journal = {ACM Trans. Database Syst.},
volume = {29},
number = {4},
year = {2004},
issn = {0362-5915},
pages = {752--788},
doi = {http://doi.acm.org/10.1145/1042046.1042051},
publisher = {ACM},
address = {New York, NY, USA},
}
Automata are widely used to evaluate XPath Queries on XML documents
\cite{1042051,958947}. Some use schemas to speed up the processing
\cite{653613} while others don't \cite{958947}.
Simple, but very effective techniques, such as the Stream Index
\cite{1042051} are used to skip over irrelevant document parts.
None of the papers in the XML literature use our idea of cover
automata to significantly simplify the deterministic automata.