%%.tex template for scribes
%Use \equation, \[ \], \align at will, but try not to use too fancy environments.
%Known limitations:
%Don't define any new commands, especially those for math mode.
%Doing math inside \text inside math screws things up,
%e.g. $ \text{this = $x$}$ won't work.
%don't use \qedhere
%\ref inside math doesn't work (why?). It remains as \ref{claim} in wordpress
\documentclass[12pt]{article}
\usepackage[latin9]{inputenc}
\usepackage{amsthm}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{hyperref}
\makeatletter
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Textclass specific LaTeX commands.
\theoremstyle{plain}
\newtheorem{thm}{\protect\theoremname}
\theoremstyle{plain}
\newtheorem{lem}[thm]{\protect\lemmaname}
\theoremstyle{plain}
\newtheorem{prop}[thm]{\protect\propositionname}
\theoremstyle{remark}
\newtheorem{claim}[thm]{\protect\claimname}
\theoremstyle{plain}
\newtheorem{conjecture}[thm]{\protect\conjecturename}
\theoremstyle{definition}
\newtheorem{problem}[thm]{\protect\problemname}
\theoremstyle{remark}
\newtheorem{rem}[thm]{\protect\remarkname}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% User specified LaTeX commands.
%lyx2wpost preable August 3 2017.
%This is an evolving preamble which works in tandem with myConfig5.cfg
%and the lyx2wpost script
\usepackage[english]{babel}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Textclass specific LaTeX commands.
\newtheorem{theorem}{Theorem}[section]
%\newtheorem{conjecture}[theorem]{Conjecture}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{proposition}[theorem]{Proposition}
%\newtheorem{claim}[theorem]{Claim}
\newtheorem{remark}[theorem]{Remark}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{construction}[theorem]{Construction}
%Theorem 1 instead of Theorem 0.1
\renewcommand{\thetheorem}{%
\arabic{theorem}}
\renewenvironment{theorem}
{\vspace{0.2cm} \par \refstepcounter{theorem} \noindent \textbf{Theorem \thetheorem.}}{\vspace{0.2cm} }
\ifcsname thm\endcsname
\renewenvironment{thm}%
{\vspace{0.2cm} \par \refstepcounter{theorem} \noindent \textbf{Theorem \thetheorem.}}{\vspace{0.2cm} }%
\fi
\ifcsname conjecture\endcsname
\renewenvironment{conjecture}
{\vspace{0.2cm} \par \refstepcounter{theorem} \noindent \textbf{Conjecture \thetheorem.}}{\vspace{0.2cm} }
\fi
\ifcsname corollary\endcsname
\renewenvironment{corollary}
{\vspace{0.2cm} \par \refstepcounter{theorem} \noindent \textbf{Corollary \thetheorem.}}{\vspace{0.2cm} }
\fi
\ifcsname proposition\endcsname
\renewenvironment{proposition}
{\vspace{0.2cm} \par \refstepcounter{theorem} \noindent \textbf{Proposition \thetheorem.}}{\vspace{0.2cm} }
\fi
\ifcsname prop\endcsname
\renewenvironment{prop}%
{\vspace{0.2cm} \par \refstepcounter{theorem} \noindent \textbf{Proposition \thetheorem.}}{\vspace{0.2cm} }%
\fi
\ifcsname claim\endcsname
\renewenvironment{claim}%
{\vspace{0.2cm} \par \refstepcounter{theorem} \noindent \textbf{Claim \thetheorem.}}{\vspace{0.2cm} }%
\fi
\ifcsname definition\endcsname
\renewenvironment{definition}
{\vspace{0.2cm} \par \refstepcounter{theorem} \noindent \textbf{Definition \thetheorem.}}{\vspace{0.2cm} }
\fi
\ifcsname lemma\endcsname
\renewenvironment{lemma}
{\vspace{0.2cm} \par \refstepcounter{theorem} \noindent \textbf{Lemma \thetheorem.}}{\vspace{0.2cm} }
\fi
\ifcsname lem\endcsname
\renewenvironment{lem}%
{\vspace{0.2cm} \par \refstepcounter{theorem} \noindent \textbf{Lemma \thetheorem.}}{\vspace{0.2cm} }%
\fi
\ifcsname remark\endcsname
\renewenvironment{remark}
{\vspace{0.2cm} \par \refstepcounter{theorem} \noindent \textbf{Remark \thetheorem.}}{\vspace{0.2cm} }
\fi
\ifcsname rem\endcsname
\renewenvironment{rem}%
{\vspace{0.2cm} \par \refstepcounter{theorem} \noindent \textbf{Remark \thetheorem.}}{\vspace{0.2cm} }%
\fi
\ifcsname problem\endcsname
\renewenvironment{problem}%
{\vspace{0.2cm} \par \refstepcounter{theorem} \noindent \textbf{Problem \thetheorem.}}{\vspace{0.2cm} }%
\fi
%\renewenvironment{proof}{\par \noindent \textbf{Proof:}}{\hfill $\square$ \newline \newline} %QED \par\vspace{0.5cm}}
\renewcommand{\qedsymbol}{$\square$}
\renewcommand{\paragraph}[1]{\textbf{#1}. }
\makeatother
\providecommand{\claimname}{Claim}
\providecommand{\conjecturename}{Conjecture}
\providecommand{\lemmaname}{Lemma}
\providecommand{\problemname}{Problem}
\providecommand{\propositionname}{Proposition}
\providecommand{\remarkname}{Remark}
\providecommand{\theoremname}{Theorem}
\begin{document}
\global\long\def\E{\mathrm{\mathbb{E}}}
%\global\long\def\e{\mathrm{\mathbb{\epsilon}}}
\noindent
\href{http://www.ccs.neu.edu/home/viola/classes/spepf17.html}{Special
Topics in Complexity Theory}, Fall 2017. Instructor:
\href{http://www.ccs.neu.edu/home/viola/}{Emanuele Viola}
%\href{http://www.ccs.neu.edu/home/viola/classes/spepf17.html}{For the
%class webpage click here.}
\section{Lectures 2-3, Scribe: Tanay Mehta}
In these lectures we conclude the proof that bounded independence fools
the AND function, and look at the more recent result that bounded
independence fools the circuit class $\mathrm{AC}^0$.
\subsection{Bounded Independence Fools AND}
We state again the theorem from last time.
\begin{thm}
Let $(X_1, \dots, X_n)$ be a distribution over $\{0,1\}^n$ such that any $k$
$X_i$ are independent (but not necessarily uniform). Then, we have that
$$ \left|\Pr \left[\bigwedge_{i=1}^n X_i = 1\right] - \prod_{i=1}^n \Pr[X_i = 1]\right| \leq 2^{- \Omega(k)}$$
\end{thm}
\begin{proof}
Let $D$ be the distribution of $(X_1, \dots, X_n)$. Let $B$ be the $n$-wise
independent distribution $(Y_1, \dots, Y_n)$ such that $\Pr[Y_i = 1] =
\Pr[X_i = 1]$ for all $i \in [n]$ and the $Y_i$ are independent. The theorem
is equivalent to the following statement.
$$|\Pr_{X \leftarrow D}\left[\bigwedge_{i=1}^n X_i = 1 \right] - \Pr_{X \leftarrow B}\left[\bigwedge_{i=1}^n X_i = 1 \right]| \leq 2^{-\Omega(k)}$$
We will prove the above statement by the following version of the
Inclusion-Exclusion principle.
\subsubsection{Inclusion-Exclusion Principle}
Let $V$ be any distribution over $\{0,1\}^n$. Note that by De Morgan's
laws, we have $$\Pr\left[\bigwedge V_i = 1 \right] = 1 - \Pr\left[\bigvee V_i =
0 \right]$$ Let $E_i$ be the event that $V_i = 0$. We want to bound the
quantity $\Pr\left[\bigcup E_i\right]$. By looking at the Venn diagram of the
events $E_i$, we can see that
\begin{align}
\Pr\left[\bigcup E_i\right] & \leq \Pr[E_1] + \dots + \Pr[E_n] = \sum_i
\Pr[E_i] \nonumber \\
\Pr\left[\bigcup E_i\right] & \geq \sum_i \Pr[E_i] - \sum_{i, j} \Pr[E_i \cap E_j] \nonumber\\
\Pr\left[\bigcup E_i\right] & \leq \sum_i \Pr[E_i] ~~ - \sum_{S \subseteq [n], |S| = 2} \Pr\left[\bigcap_{i \in S} E_i \right]~~+ \sum_{S \subseteq [n], |S| = 3} \Pr\left[\bigcap_{i \in S} E_i\right], \nonumber
\end{align}
and so on. In general, we have the following. Define $$T_j := \sum_{S
\subseteq [n], |S| = j} \Pr\left[ \bigcap_{i \in S} E_i \right]$$ $$S_h :=
\sum_{i=1}^h (-1)^{i+1} T_i $$ Then, we have the bounds $\Pr\left[\bigcup
E_i\right] \leq S_j$ for odd $j$, and $\Pr\left[\bigcup E_i\right] \geq S_j$ for
even $j$. This fact holds for \emph{any} distribution.
Let us return to the proof. Note that the $S_h$ are the same for $D$ and
$B$ up to $h = k$ because they only involve sums of ANDs of at most $k$
events. Hence, we have that
$$\left|\Pr_D \left[\bigwedge X_i = 1\right] - \Pr_B\left[ \bigwedge X_i = 1\right]\right| \leq |S_k - S_{k-1}| = |T_k|$$
where the last equality comes from the definition of $S_k$. Therefore, we
are done if $|T_k| \leq 2^{-\Omega(k)}$. We have that $$T_k = \sum_{S
\subseteq [n], |S| = k} \Pr\left[\bigcap_{i \in S} E_i \right] ={ n \choose k} ~~
\mathbb{E}_{S \subseteq [n], |S| = k} \left[ \prod_{i \in S} P_i \right] $$
where $P_i := \Pr[E_i] = 1 - \Pr[X_i = 1]$. To bound the expectation we
recall a useful inequality.
\subsubsection{A Useful Inequality}
Let $Q_1, \dots, Q_n$ be non-negative real numbers. Then, by the
AM-GM inequality, we have that
$$\frac{\sum_i Q_i}{n} \geq \left(\prod_i Q_i \right)^{1/n}.$$
Consider the following more general statement,
\begin{align}
\mathbb{E}_{S \subseteq [n], |S| = 1} \left[ \prod_{i \in S} Q_i \right] \geq
\mathbb{E}_{S \subseteq [n], |S| = 2} \left[ \prod_{i \in S} Q_i \right]^{1/2}
\geq \dots \hspace{5cm} \nonumber \\ \dots \geq \mathbb{E}_{S \subseteq [n], |S| = k} \left[ \prod_{i \in S}Q_i \right]^{1/k} \geq \dots \geq \mathbb{E}_{S \subseteq [n], |S| = n} \left[ \prod_{i \in S} Q_i \right]^{1/n} \nonumber
\end{align}
and note that the left most term is equal to $\frac{\sum_i Q_i}{n}$, while
the right most term is equal to $\left(\prod_i Q_i \right)^{1/n}$
\newline \newline
Applying the above inequality to $T_k$ and a common approximation for
the binomial coefficient, we have that
$$T_k = { n \choose k} ~~ \mathbb{E}_{S \subseteq [n], |S| = k} \left[ \prod_{i \in S} P_i \right] \leq {n \choose k} \sum_{i = 1}^n \left(\frac{P_i}{n}\right)^k \leq \left(\frac{e n}{k}\right)^k \left(\frac{\sum P_i}{n}\right)^k = \left(\frac{e \sum P_i}{k}\right)^k.$$
Therefore, we are done if $\sum P_i \leq \frac{k}{2 e}$. Recall that $P_i =
\Pr[E_i] = 1 - \Pr[X_i = 1]$. So if $P_i$ is small then $\Pr[X_i = 1]$ is close
to $1$.
It remains to handle the case that $\sum P_i \geq \frac{k}{2 e}$.
%Suppose that $\sum P_i \geq \frac{k}{2e} \Leftrightarrow \sum_{i =1}^n 1 -
%\Pr[X_i = 1] \geq \frac{k}{2e} \Leftrightarrow \sum \Pr[X_i = 1] \leq n -
%\frac{k}{2e}$.
Pick $n'$ such that $$\sum_{i=1}^{n'} P_i = \frac{k}{2e} \pm 1.$$ By the
previous argument, the AND of the first $n'$ is the same up to
$2^{-\Omega(k)}$ for $D$ and $B$. Also, for every distribution the
probability of that the And of $n$ bits is 1 is at most the probability that the
And of $n'$ bits is $1$. And also, for the $n$-wise independent
distribution $B$ we have
% \leq \prod_{i=1}^{n'} e^{-P_i} \leq e^{-\sum_{i=1}^{n'} P_i} = e^{-\Omega(k)}
\begin{align*}
\Pr_B\left[\bigwedge_{i=1}^{n'} X_i = 1\right] & = \prod_{i=1}^{n'} \Pr[X_i = 1] \\
& = \prod_{i=1}^{n'} (1 - P_i)\\
& \leq \left(\frac{\sum_{i=1}^{n'}(1-P_i)}{n'}\right)^{n'} \textrm{ by the AM-GM inequality} \\
& \leq \left(\frac{n' - k/2e}{n'}\right)^{n'} \leq (1 - k/2en')^{n'} \leq e^{-\Omega(k)}.
\end{align*}
The combination of these facts concludes this case. To summarize, in this
case we showed that
$$\Pr_D[\bigwedge_{i=1}^n X_i = 1] \le \Pr_D[\bigwedge_{i=1}^{n'} X_i =
1].$$ as well as $$\Pr_B[\bigwedge_{i=1}^n X_i = 1] \le
\Pr_B[\bigwedge_{i=1}^{n'} X_i = 1] \leq 2^{-\Omega(k)}.$$ By the choice of
$n'$ and the previous argument, we also know that
$|\Pr_D[\bigwedge_{i=1}^{n'} X_i = 1]- \Pr_B[\bigwedge_{i=1}^{n'} X_i = 1]|
\le 2^{-\Omega(k)}$ and so we are done, as all quantities above are at
most $2^{-\Omega(k)}$ (and at least $0$).
\end{proof}
\begin{remark}
The bound is tight up to $\Omega( .)$
\end{remark}
\begin{proof}
Let $D$ be the distribution over $\{0,1\}^{k+1}$ as follows: $D_{1, \dots, k}
= U_k$ and $D_{k+1} = D_1 + \dots + D_k \mod 2$. Then, $D$ is
$k$-wise independent. However, if $k$ is even, then
$$\Pr[\bigwedge_{i=1}^{k+1} D_i = 1] = 0.$$ Yet, we have that
$$\Pr[\bigwedge_{i=1}^{k+1} U_i = 1] = 2^{-(k+1)}.$$
\end{proof}
\subsection{Bounded Independence Fools $\textrm{AC}^0$}
\paragraph{Acknowledgement} This section is based on \href{http://www.cs.tau.ac.il/~amnon/Classes/2016-PRG/class.htm}{Amnon Ta-Shma's notes} for the class 0368.4159 Expanders, Pseudorandomness and Derandomization
CS dept, Tel-Aviv University, Fall 2016.
\medskip
Note that a DNF on $n$ bits can be modeled as a depth two circuit where
the top layer is an OR-gate whose inputs are AND-gates, which take
inputs $X_1, \dots, X_n$ and their negations. The circuit class
$\textrm{AC}^0$ can be viewed as a generalization of this to higher (but
constant) depth circuits. That is, $\textrm{AC}^0$ consists of circuits using
AND-gates, OR-gates, NOT-gates, and input registers. Each of the gates
have unbounded fan-in (i.e. the number of input wires). The size of the
circuit is defined to be the number of gates.
$\textrm{AC}^0$ is one of the most studied classes in complexity theory.
$\textrm{AC}^0$ circuits of polynomial size can do many things, including
adding and subtracting $n$-bit integers.
\begin{conjecture}[Linial-Nisan\cite{LinialN90}]
$\log^{O(d)} s$-wise independence fools $\textrm{AC}^0$ circuits of depth
$d$ and size $s$.
\end{conjecture}
The conjecture was open for a long time, even for in the special case
$d=2$. In 2007 a breakthrough work by Bazzi \cite{Bazzi09} proved it for
$d=2$. Shortly afterwards, Razborov presented a simpler proof of Bazzi's
result \cite{Razborov09}, and Braverman proved the conjecture for any $d$
with $\log^{d^2} s$-wise independence \cite{Braverman10}. Tal improved
the result to $\log^{O(d)} s$ \cite{Tal17}.
Interestingly, the progress on the conjecture does not use ideas that were
not around since the time of its formulation. Bottom line: if a problem is
open for a long time, you should immediately attack it with existing tools.
The high-level intuition why such a result should be true is the following:
\begin{enumerate}
\item $\textrm{AC}^0$ is approximated by polylog degree polynomials.
\item $k$-wise independence fools degree-$k$ polynomials.
\end{enumerate}
\begin{proof}[Proof of (2)]
Let $x = (x_1, \dots, x_n) \in \{0,1\}^n$. Let $p(x_1, \dots, x_n)$ be a
degree $k$ polynomial over $\mathbb{R}$. Write $p$ as $$p(x_1, \dots,
x_n) = \sum_{M \subseteq [n], |M| \leq k} c_M \cdot x_M.$$ If $D$ is a
$k$-wise independent distribution on $\{0,1\}^n$, then by linearity of
expectation
\[
\mathbb{E}_D[P] = \sum_{M \subseteq [n], |M| \leq k} c_M \mathbb{E}_D[x_M] = \sum_{M \subseteq [n], |M| \leq k} c_M \mathbb{E}_U[x_M] = \mathbb{E}_U[P].
\]
\end{proof}
There are several notions of approximating AC$^0$ by low-degree
polynomials. We now review two of them, explaining why neither of them
is sufficient. Braverman showed how to cleverly combine the two methods
to prove a version of (1) that's strong enough.
\subsubsection{Approximation 1}
\begin{theorem}
For all $\textrm{AC}^0$ circuits $C(x_1, \dots, x_n)$ of size $s$ and depth
$d$, for all distributions $D$ over $\{0,1\}^n$, for all $\epsilon$, there
exists a polynomial $p(x_1, \dots, x_n)$ of degree $\log^{O(d)} s/\epsilon$
such that $$\Pr_{x \leftarrow D}[p(x) = C(x)] \geq 1 - \epsilon.$$
\end{theorem}
The important features of this approximation are that it works under any
distribution, and when the polynomial is correct it outputs a boolean value.
Similar approximations appear in many papers, going back to Razborov's
paper \cite{Raz87} (who considers polynomials modulo 2) which uses
ideas from earlier still work.
Note that the polynomial $p$ depends on the circuit $C$ chosen, and on
the distribution. This theorem is not a good enough approximation because
on the $\epsilon$ fraction of inputs where the polynomial and circuit are
unequal, the value of the polynomial can (and does) explode to be much
greater than $1/\epsilon$. This prevents us from bounding the average of
the polynomial.
Nevertheless, let us prove the above theorem.
\begin{proof}
Consider one OR-gate of fan-in $s$. We construct a distribution of
polynomials that compute any input with high probability. This implies that
there is a fixed polynomial that computes the circuit on a large fraction of
the inputs by an averaging argument.
For $i = 1, 2, \dots, \log s$. let $S_i$ be a random subset of $[s]$ where
every element is included with probability $1/2^i$, independently.
Suppose $x$ has Hamming weight $2^j$. Then, $\mathbb{E}[\sum_{n \in
S_j} x_n] = 1$. And the sum can be shown to equal $1$ with constant
probability.
Define the approximation polynomial $p$ to be $$p(x) := 1 -
\prod_{i=1}^{\log s} (1 - \sum_{h \in S_i} x_h)$$ Note that if $x$ has weight
$w > 0$, then $p(x) = 0$ with constant probability. If $w =0$, then $p(x) =
1$ with probability $1$. We can adjust the error probability to $\epsilon$
by repeating each term in the product $\log(1/\epsilon)$ times.
Thus, we can approximate one gate with the above polynomial of degree
$O(\log(s) \cdot \log(1/\epsilon))$. Construct polynomials as $p$ above for
each gate, with error parameter $\epsilon/s$. The probability that any of
them is wrong is at most $\epsilon$ by a union bound. To obtain the
approximating polynomial for the whole circuit compose all the polynomials
together. Since the circuit is of depth $d$, the final degree of the
approximating polynomial is $(\log(s) \cdot \log(s/\epsilon))^{d}$, as
desired.
As mentioned at the beginning, this is a distribution on polynomials that
computes correctly any input with probability at least $1-\epsilon$. By
averaging, there exists a fixed polynomial that computes correctly a
$1-\epsilon$ fraction of inputs.
\end{proof}
It can be verified that the value of the polynomial can be larger than
$1/\epsilon$. The polynomial for the gates closest to the input can be as
large as $s$. Then at the next level it can be as large as $s^{\log
s/\epsilon}$, which is already much larger than $1/\epsilon$.
\subsection{Approximation 2}
\begin{theorem} \label{t-lmn}
For all circuits $C$ of size $s$ and depth $d$, for all error values
$\epsilon$, there exists a polynomial $p(x_1, \dots, x_n)$ of degree
$O(\log(s)^{d-1} \log (1/\epsilon))$ such that
\[
\mathbb{E}_{x \leftarrow U_n}[(C(x) - p(x))^2] \leq \epsilon.
\]
\end{theorem}
The important feature of this approximation is that it bounds the average,
but only under the uniform distribution. Because it does not provide any
guarantee on other distributions, including $k$-wise independent
distributions, it cannot be used directly for our aims.
\begin{remark}
Approximation 2 is proved via the \emph{switching lemma}, an influential
lemma first proved in the early 80's by Ajtai \cite{Ajt83} and by Furst, Saxe,
and Sipser \cite{FSS84}. The idea is to randomly set a subset of the
variables to simplify the circuit. You can do this repeatedly to simplify the
circuit even further, but it only works on the uniform distribution. Hastad
\cite{Has87} gave a much tighter analysis of the switching lemma, and the
paper \cite{LMN93} used it to prove a version of Approximation 2 with a
slightly worse dependence on the error. Recently, a refinement of the
switching lemma was proved in \cite{Hastad14a,ImpagliazzoMP-AC0-sat}.
Based on that, Tal \cite{Tal17} obtained the corresponding refinement of
Approximation 2 where the parameters are as stated above. (The
polynomial is simply obtained from the Fourier expansion of the function
computed by the circuit by removing all Fourier coefficients larger than a
certain threshold. The bound on the Fourier decay in \cite{Tal17} implies
the desired approximation.)
\end{remark}
\subsection{Bounded Independence Fools $\textrm{AC}^0$}
\begin{theorem}
\label{braverman} For all circuits $C$ with unbounded fan-in of size $s$
and depth $d$, for all error values $\epsilon$, for all $k$-wise independent
distributions $D$ on $\{0,1\}^n$, we have that
\[
|\mathbb{E}[C(D)] - \mathbb{E}[C(U_n)]| \leq \epsilon
\]
for $k = \log (s/\epsilon)^{O(d)}$.
\end{theorem}
\begin{corollary}
In particular, if $s = \textrm{poly}(n)$, $d = O(1)$, $s = 1/\textrm{poly}(n)$,
then $k = \log^{O(1)}(n)$ suffices.
\end{corollary}
The next claim is the ultimate polynomial approximation used to prove the
theorem.
\begin{claim}
\label{claim} For all circuits $C$ with unbounded fan-in of size $s$ and
depth $d$, for all error values $\epsilon$, for all $k$-wise independent
distributions $D$ on $\{0,1\}^n$, there is a set $E$ of inputs, and a
degree-$k$ polynomial $p$ such that:
\begin{enumerate}
\item $E$ is 'rare' under both $D$ and $U_n$: \newline $\Pr_{x
\leftarrow U_n}[E(x) = 1] \leq \epsilon$, and $\Pr_{x \leftarrow D}[E(x)
=1] \leq \epsilon$. Here we write $E(x)$ for the indicator function of
the event $x \in E$.
\item For all $x$, $p(x) \leq C(x) \vee E(x)$. Here $\vee$ is the logical
Or.
\item $\mathbb{E}[p(U_n)] = \mathbb{E}[C(U_n)] \pm \epsilon$.
\end{enumerate}
\end{claim}
We only need (1) under $D$, but (1) under $U$ is used to prove (3).
\begin{proof}[Proof of Theorem \ref{braverman} from Claim \ref{claim}]
\begin{align}
\mathbb{E}[C(D)] & = \mathbb{E}[C(D) \vee E(D)] \pm \epsilon \text{, by Claim.(1)} \nonumber \\
& \geq \mathbb{E}[p(D)] \pm \epsilon \text{, by Claim.(2)} \nonumber \\
& = \mathbb{E}[p(U_n)] \pm \epsilon \text{, because } p \text{ has degree } k \text{ and } D \text{ is } k \text{-wise independent} \nonumber \\
& = \mathbb{E}[C(U_n)] \pm \epsilon \text{, by Claim.(3)} \nonumber
\end{align}
For the other direction, repeat the argument for `not $C$'.
\end{proof}
We can construct the polynomial approximation from Claim \ref{claim} by
using a combination of Approximation 1 and 2. First we need a little more
information about Approximation 1.
\begin{claim}
\label{prop} Two properties of approximation 1:
\begin{enumerate}
\item For all $x$, $p(x) \leq 2^{\log(s/\epsilon)^{O(d)}}$. \label{prop1}
\item The 'bad' set $E$ is computable by a circuit of size
$\textrm{poly}(s)$, and depth $d + O(1)$. \label{prop2}
\end{enumerate}
\end{claim}
\begin{proof}[Proof of Claim \ref{prop} part \ref{prop2}]
Consider a single OR gate with input gates $g_1, \dots, g_s$. This is
represented in the approximating polynomial by the term $$1 -
\prod_{i=1}^{\textrm{polylog}(s/\epsilon)}(1 - \sum_{j \in S_i} g_j).$$ Note
that the term is incorrect exactly when the input $g_1, \dots, g_s$ has
weight $> 0$ but all the sets $S_i$ intersect $0$ or $\geq 2$ ones. This
can be checked in AC$^0$, in parallel for all gates in the circuit.
\end{proof}
\begin{proof}[Proof of Claim \ref{claim}]
Run approximation 1 for the distribution $\frac{D+U}{2}$, yielding the
polynomial $p_c$ and the set $E$. This already proves the first part of the
claim for both $D$ and $U$, because if $E$ has probability $\epsilon$
under $D$ it has probability $\ge \epsilon/2$ under $(D+U)/2$, and the
same for $U$ . Use Claim \ref{prop} part \ref{prop2}, and run approximation
2 on $E$. Call the resulting polynomial $p_E$, which has degree
$\log(s/\delta)^{O(d)}$ with error bound $\delta$.
The idea in the ultimate approximating polynomial is to ``check if there is a
mistake, and if so, output $0$. Otherwise, output $C$''. Formally:
$$p(x) := 1 - (1 - p_c(1 - p_E))^2$$
Claim \ref{claim} part 2 can be shown as follows. $p(x) \leq 1$ by
definition. So, if $C(x) \vee E(x) = 1$, then we are done. Otherwise, $C(x)
\vee E(x) = 0$. So there is no mistake, and $C=0$. Hence, by the
properties of Approximation 1, $p_c(x) = 0$. This implies $p(x)=0$.
It only remains to show Claim \ref{claim} part 3:
\[
\mathbb{E}_U[p(x)] = \mathbb{E}_U[C(x)] \pm \epsilon.
\]
By part 1 of Claim \ref{claim},
\[
\mathbb{E}_U[C(x) - p(x)] = \mathbb{E}_U[C(x) \vee E(x) - p(x)] \pm \epsilon.
\]
We can show that this equals
\[
\mathbb{E}_U\left[\left(C(x) \vee E(x) - p_c(x)(1-p_E(x))\right)^2\right] \pm \epsilon
\]
by the following argument: If $C(x) \vee E(x) =1$ then $1-p(x) =
(1-p_c(x)(1-p_E(x)) )^2$ by definition. If $C(x) \vee E(x) =0$, then there is
no mistake, and $C(x) = 0$. This implies that $p_c(x)(1 - p_E(x)) = p(x) =
0$.
Let us rewrite the above expression in terms of the expectation $\ell_2$
norm.
\[
|| C \vee E - p_c(1 - p_E)||_2^2.
\]
Recall the triangle inequality, which states: $||u - v||_2 \leq ||u - w||_2 + ||w -
v||_2$. Therefore, letting $w = p_c(1 - E)$ we have that the above quantity
is
\[
\leq (|| p_c(1 - E) - p_c(1 - p_E)||_2 ~~ + ~~ ||p_c(1 - E) - C \vee E ||_2)^2
\]
\[
\leq O(||p_c(1-E) - p_c(1 - p_E)||_2^2 + ||p_c (1 - E) - C \vee E ||_2^2).
\]
To conclude, we will show that each of the above terms are $\leq
\epsilon$. Note that
\[
||p_c(1-E) - p_c(1 - p_E)||_2^2 \leq \max_x |p_c(x)|^2 ||(1 - E) - (1 - p_E)||_2^2.
\]
By Claim \ref{prop} part \ref{prop1} and Approximation 2, this is at most
\[
2^{\log(s/\epsilon)^{O(d)}} \cdot ||E - p_E||_2^2 \leq 2^{\log(s/\epsilon)^{O(d)}} \cdot \delta.
\]
For this quantity to be at most $\epsilon$ we set $\delta = \epsilon \cdot
2^{-\log(s/\epsilon)^{O(d)}}$. Here we critically set the error in
Approximation 2 much lower, to cancel the large values arising from
Approximation 1. By Theorem \ref{t-lmn}, the polynomial arising from
approximation 2 has degree $O(\log (s)^{d-1} \log(1/\delta)) =
\log(s/\epsilon)^{O(d)}$.
Finally, let us bound the other term, $ ||p_c (1 - E) - C \vee E ||_2^2$. If
$E(x) = 0$, then the distance is $0$. If $E(x) =1$, then the distance is
$\leq 1$. Therefore, this term is at most $\Pr_U[E(x) =1]$, which we know
to be at most $\epsilon$.
\end{proof}
\bibliographystyle{alpha}
\bibliography{C:/home/krv/math/OmniBib}
\end{document}
\end{rem}