From 882dda0271204d6eee7bb53904bdc303a26db3a3 Mon Sep 17 00:00:00 2001
From: Martin Hafskjold Thoresen <martinhath@gmail.com>
Date: Tue, 4 Jul 2017 20:41:47 +0200
Subject: Finish LCA and LA chapter

---
 book.tex | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 59 insertions(+), 1 deletion(-)

(limited to 'book.tex')

diff --git a/book.tex b/book.tex
index 0375356..dd9509e 100644
--- a/book.tex
+++ b/book.tex
@@ -4,6 +4,7 @@
 ]{geometry}
 \usepackage{amsmath}
 \usepackage{amsfonts}
+\usepackage{mathtools}
 \usepackage{tikz}
 \usetikzlibrary{positioning,calc,shapes}
 \usepackage{todonotes}
@@ -29,6 +30,8 @@
 \newenvironment{definition}[1]{{\parindent=0em \textbf{Definition} --- \emph{#1}:\\}}{\vspace{1em}}
 
 \DeclareMathOperator*{\argmin}{arg\,min} % thin space, limits underneath in displays
+\DeclarePairedDelimiter{\ceil}{\lceil}{\rceil}
+\DeclarePairedDelimiter{\floor}{\lfloor}{\rfloor}
 
 % Expectation symbol
 \DeclareMathOperator*{\E}{\mathbb{E}}
@@ -204,6 +207,7 @@ the two arrays act exactly the same.
 A consequence of this is that we get \emph{Universe reduction}, where we have mapped the universe $\mathcal{U}$ to $[n-1]$.
 
 \section{Constant time \LCA{} (\RMQ{})}
+\label{sec:constant-lca}
 
 \subsubsection{Step 1: Reduction}
 We start by reducting the problem to $\pm 1 \textsc{RMQ}$, in which adjacent elements of the array differs by at most 1.
@@ -251,7 +255,7 @@ $[3,4,5,4] \rightarrow [0,1,2,1] \rightarrow [-, 1, 1, 0]$.
 There are $2^{n'} = \sqrt{n}$ possible such blocks,
 ${(1/2 \log n)}^2$ possible queries, and each answer requires $\log \log n$ bits,
 so storing a lookup tables for all possible blocks, over all possible queries with all possible
-answers take $\sqrt{n} {(1/2 \log n)}^2 \log \log n = o(n)$ bits.
+answers take $\sqrt{n}\ {(1/2 \log n)}^2\ \log \log n = o(n)$ bits.
 Now each bottom block can simply store a pointer into the table, and we get $O(1)$ query for the bottom groups.
 
 
@@ -263,11 +267,65 @@ We want $O(1)$.
 \subsubsection{Step 1: Jump Pointers}
 Instead of having each node storing only its parent, each node can store its $2^k$th parent.
 Each node has $O(\log n)$ such parents, making the space requirement $O(n \log n)$.
+Query time is $O(\log n)$, since we at least halve $n$ each jump.
+
+\subsubsection{Step 2: Long-path Decomposition}
+Decompose the tree into a set of paths.
+Find the longest path in the tree from the root, and store the nodes in an array.
+The nodes themselves store the array and its index in the array.
+Recurse on the subtrees that are hanging from the path.
+
+With this scheme, a query is done as follows:
+if $n$ is less than the nodes index in its path, we jump directly to the node.
+Else, we jump to the first node in our path, subtract $n$ by $x$s index, and repeat.
+We risk at most to visit $O(\sqrt{n})$ such paths, since we know that the paths are the \emph{longest} paths.
+We end up using $O(n)$ space, and $O(\sqrt{n})$ query time.
+
+\subsubsection{Step 3: Ladder Decomposition}
+Extend each path upwards by twice its length.
+Now the arrays overlap, but the nodes still only store their original array and index.
+This doubles the space of Step 2, but we are still linear.
+The improvement of this step is that we at least double the length from the node to the end of its path,
+since the path in which we can jump freely goes at least twice that length up.
+
+\subsubsection{Step 4: Step 1 + Step 3}
+We combine the jump pointers and the ladder decomposition.
+Jump pointers are great for long jumps, and ladders are great for short jumps.
+We follow the jump pointer $k/2 \leq 2^{\floor{\log k}} \leq k$ steps up, for some $k$.
+Since we have gone up a path from $x$ to a node by the jump pointer, we know that the node we hit is
+of large height, and hence is part of a long ladder. Since its height from the end of the path
+can be doubled by Step 3, we can get from $k/2$ to $k$, in which we know our target is.
+Hence, we get $O(1)$ query, but still $O(n \log n)$ space (and preprocessing).
+
+\subsubsection{Step 5: Only Leaves Store Jump Pointers}
+Since all nodes have constant access to a leaf node (by its ladder, of which the last node is a leaf, by the maximal property),
+only leaves need to store the jump pointers.
+In other words, we make all queries start at leaves.
+
+\subsubsection{Step 6: Leaf Trimming}
+We define a \emph{maximally deep node} as a node with $\geq 1/4 \log n$ descendants.
+Split the tree in two layers by the maximally deep nodes.
+The number of leaves in the top part is now $O(n/\log n)$, since
+\todo{eh?}
+for each $1/4 \log n$ nodes in the original tree we have ``replaced'' it with a subtree (the bottom structure).
+If we now use Step 5 on the top, we get $O(n)$ space.
+
+\subsubsection{Step 7: Lookup Table}
+For the bottom trees, we use lookup tables.
+The trees are of size $n' \leq 1/4 \log n$. The number of rooted trees on $n'$ nodes is limited by
+$2^{2n'} \leq \sqrt{n}$ by encoding an euler tour as a string of $\pm1$, like in Section~\ref{sec:constant-lca}.
+There are ${(n')}^2 = O(\log^2 n)$ possible queries (if $n$ is large, we just go to the top structure),
+and an answer takes $O(\log \log n)$ bits,
+so a lookup table for all possible trees, with all possible queries takes only
+$\sqrt{n}\ O(\log^2 n)\ O(\log \log n) = o(n)$ bits.
+
+We end up with $O(1)$ time queries, using $O(n)$ space!
 
 
 
 \chapter{Strings}
 
+
 String search, suffix trees, suffix arrays.
 
 \chapter{Temporal Structures}
-- 
cgit v1.2.3