From 61e0d7239b2a23297557c903f81066d9124258fb Mon Sep 17 00:00:00 2001
From: Martin Hafskjold Thoresen <martinhath@gmail.com>
Date: Tue, 4 Jul 2017 12:34:33 +0200
Subject: Begin on Static Tree Queries

---
 book.tex | 119 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 116 insertions(+), 3 deletions(-)

(limited to 'book.tex')

diff --git a/book.tex b/book.tex
index deccabf..0375356 100644
--- a/book.tex
+++ b/book.tex
@@ -1,9 +1,12 @@
 \documentclass[a4paper]{book}
-\usepackage[a4paper, top=3cm,bottom=3cm, left=3cm, right=3cm, paperheight=23cm]{geometry}
+\usepackage[a4paper, top=3cm,bottom=3cm, left=3cm, right=3cm%
+%, paperheight=23cm
+]{geometry}
 \usepackage{amsmath}
 \usepackage{amsfonts}
 \usepackage{tikz}
 \usetikzlibrary{positioning,calc,shapes}
+\usepackage{todonotes}
 \usepackage{hyperref}
 \usepackage[backend=bibtex]{biblatex}
 \bibliography{bibtex}
@@ -16,9 +19,17 @@
 
 \newcommand{\topics}[1]{Topics: \textit{#1}}
 \newcommand{\code}[2]{\textsc{#1}$(#2)$}
+
+\newcommand{\RMQ}{$\textsc{RMQ}$}
+\newcommand{\LCA}{$\textsc{LCA}$}
+\newcommand{\LA}{$\textsc{LA}$}
+
+
 \newenvironment{example}[0]{{\parindent=0em \textbf{Example:}}}{\vspace{1em}}
 \newenvironment{definition}[1]{{\parindent=0em \textbf{Definition} --- \emph{#1}:\\}}{\vspace{1em}}
 
+\DeclareMathOperator*{\argmin}{arg\,min} % thin space, limits underneath in displays
+
 % Expectation symbol
 \DeclareMathOperator*{\E}{\mathbb{E}}
 
@@ -150,8 +161,110 @@ If the hashing functions are fully random or $\log n$-wise independent we get $O
 
 \chapter{Static Tree Queries}
 
-Lowest Common Ancestor (\texttt{LCA}), Level Ancestor (\texttt{LA}),
-Range Queries, Range Trees.
+\topics{Range Minimum Query (\code{RMQ}{i, j}), Lowest Common Ancestor (\code{LCA}{x, y}), Level Ancestor (\code{LA}{x, n}), Range Queries, Range Trees.}
+
+We would like $O(1)$ query time on selected operations, and still only $O(n)$ space;
+if we allow $O(n^2)$ space this is trivial, as we can simply precompute all queries and store them.
+
+\section{Range Minimum Query}
+We would like to retrieve the index of the minimum element in an array, between index $i$ and $j$.
+Our goal is to get $O(n)$ time and space preprocessing, and constant time query.
+Note that this is easy to do in $O(\log n)$ time using Range Trees ---
+store minimum in internal nodes, and traverse from $i$ to $j$ in $O(\log n)$ time.
+
+It turns out that \LCA{} and \RMQ{} are equivalent.
+
+\subsection{Reduction from \RMQ{} to \LCA{}}
+
+\todo{Add tree from lecture notes}
+Build a \emph{Cartesian Tree}:
+Walk through the array, while keeping track of the right spine of the tree.
+When inserting a new element, if the element is the largest element in the spine, insert it at the end.
+Else, we have an edge from $v$ to $w$, where the new element $a$ should be in between.
+Make $a$ the right child of $v$, and make $w$ the \emph{left} child of $a$.
+This step looks like it will be linear, but
+the more we have to search through the right spine, the more swaps we do,
+and the smaller the spine gets, so it amortizes to constant time,
+making the algorithm linear.
+
+Note that simpler divide and conquer algorithm runs in $O(n \log n)$.
+
+We see that \code{LCA}{i, j} in the cartesian tree is the same as \code{RMQ}{i, j} in $A$.
+
+\subsection{Reduction from \LCA{} to \RMQ{}}
+
+Traverse the tree in-order, and write out the \emph{depth} of each node to $A$.
+Now \code{RMQ}{i, j} = \code{LCA}{i, j}.
+Naturally, since this scheme should work for any tree, we cannot write out the node values, since
+the tree may not be a cartesian tree.
+
+Note that if we go from \RMQ{} to \LCA{} and back again, we end up with different numbers
+in the array. However, since we are not interested in the actual minimum, but only the index of the minimum,
+the two arrays act exactly the same.
+A consequence of this is that we get \emph{Universe reduction}, where we have mapped the universe $\mathcal{U}$ to $[n-1]$.
+
+\section{Constant time \LCA{} (\RMQ{})}
+
+\subsubsection{Step 1: Reduction}
+We start by reducting the problem to $\pm 1 \textsc{RMQ}$, in which adjacent elements of the array differs by at most 1.
+Walk an \emph{Euler Tour} of the tree:
+\todo{Add figure}
+visit every edge twice, and for each edge write down the node we left.
+Each node store a pointer to the first visit of that node in the array, and the array elements store a pointer to its element in the tree.
+\RMQ{} and \LCA{} are still equivalent.
+
+We need this later on (step 4).
+
+\subsubsection{Step 2: Precomputation}
+Get $O(1)$ time and $O(n \log n)$ space \RMQ{}.
+Precompute and store all queries from any starting point where the interval length is a power of 2.
+Since there are $n$ starting points, and $\log n$ powers of 2 to choose from, there are $n \log n$ such queries.
+The key observation is that any arbitrary interval is the union of two such intervals.
+For instance $A[4..11] = A[4..8] \cup A[7..11]$.
+The double counting does not matter, since $\min$ is an idempotent operation.
+The intervals are trivially computed.
+
+\subsubsection{Step 3: Indirection}
+Indirection.
+Make a two layer structure: divide the numbers in $A$ into groups of size $1/2 \log n$, which makes the bottom layer.
+The top layer consists of the minimum element in each block. Since there are $n/(2 \log n) = 2n/\log n$ such blocks,
+there are equally many items in the top layer.
+
+A query in this structure consists of (potentially) three parts:
+A query in the bottom block in which $i$ is,
+a query in the top block for all blocks which are completely covered by the interval $[i, j]$,
+and a query in the bottom block in which $j$ is.
+We need all three queries to be $O(1)$.
+
+The gain from this is that the top layer only stores $O(n/ \log n)$, so we can afford Step 2, since the $\log$ factors cancel.
+We get $O(1)$ query and $O(n)$ space for the top structure.
+
+\subsubsection{Step 4: Lookup Tables}
+We use lookup tables for the bottom groups.
+The groups are of size $n' = 1/2 \log n$.
+\RMQ{} queries in these groups are invariant over value ``shifts'' in the gruop:
+if we add $a$ to all elements in the group, the queries are still the same.
+Shift all groups by its first element, such that all groups start with 0.
+Now every group is completely defined by the difference of adjacent elements,
+so the blocks can be encoded as a bitstring of the same length as a block, where 0 is decreasing and 1 is increasing:
+$[3,4,5,4] \rightarrow [0,1,2,1] \rightarrow [-, 1, 1, 0]$.
+There are $2^{n'} = \sqrt{n}$ possible such blocks,
+${(1/2 \log n)}^2$ possible queries, and each answer requires $\log \log n$ bits,
+so storing a lookup tables for all possible blocks, over all possible queries with all possible
+answers take $\sqrt{n} {(1/2 \log n)}^2 \log \log n = o(n)$ bits.
+Now each bottom block can simply store a pointer into the table, and we get $O(1)$ query for the bottom groups.
+
+
+\section{Constant Time \LA{}}
+Level Anscestor queries take a node $x$ and a level $n$, and the goal is to find the $n$th parent of $x$ in the tree.
+The simplest way to do this is for each node to store its parent, making the query $O(n)$.
+We want $O(1)$.
+
+\subsubsection{Step 1: Jump Pointers}
+Instead of having each node storing only its parent, each node can store its $2^k$th parent.
+Each node has $O(\log n)$ such parents, making the space requirement $O(n \log n)$.
+
+
 
 \chapter{Strings}
 
-- 
cgit v1.2.3