PhD

The LaTeX sources of my Ph.D. thesis
git clone https://esimon.eu/repos/PhD.git
Log | Files | Refs | README | LICENSE

notation.tex (5694B)


      1 \chapter{Notation}
      2 Most of this thesis is formatted in one and a half columns, which means that a large right margin is filled with complementary material.
      3 This includes figures, tables and algorithms when space allows, but also epigraphs and marginal notes with supplementary details and comments.
      4 The titles of important bibliographical references are also given in the margin right of their first mention in the section.
      5 Some marginal paragraphs are left unnumbered and provide material about the broadly adjacent passage.
      6 When a section seems unclear, we invite the reader to look for additional information in the margin.
      7 For example, while relation algebra is introduced in Section~\ref{sec:context:relation algebra}, we do not expect most readers to be familiar with its notation.
      8 As such, we will systematically provide an interpretation of relation algebra formulae in plain English in unnumbered marginal paragraphs.
      9 
     10 \bigskip
     11 
     12 \newlength{\notationsWidest}
     13 \settowidth{\notationsWidest}{\(\jsd(P\mathrel{\|}Q)\)}
     14 \begin{longtable}{@{}c p{\dimexpr\textwidth-\tabcolsep*2-\notationsWidest\relax}@{}}
     15 	\multicolumn{2}{@{}c@{}}{\textbf{Domain of Variables}} \\
     16 	\(x\) & A scalar \\
     17 	\(\vctr{x}\) & A vector, its elements are indexed \(x_i\) \\
     18 	\(\mtrx{X}\) & A matrix, its rows are indexed \(\vctr{x}_i\), its elements \(x_{ij}\) \\
     19 	\(\tnsr{X}\) & A (three-way) tensor, indexed \(\mtrx{X}_i\), \(\vctr{x}_{ij}\), \(x_{ijk}\) \\
     20 	\(\rndm{x}\) & A random variable (sometimes \(\rndm{X}\) to avoid confusion) \\
     21 	\(\rndmvctr{x}\) & A random vector \\
     22 	\(\symbb{R}\) & The set of real numbers \\
     23 	\(\symbb{R}^n\) & The set of real-valued vectors of length \(n\) \\
     24 	\(\symbb{R}^{n\times m}\) & The set of real-valued matrices with \(n\) rows and \(m\) columns \\
     25 	\(B^A\) & The set of functions from \(A\) to \(B\), in particular \(2^A\) denotes the power set of \(A\) \\
     26 	\multicolumn{2}{@{}b{\textwidth}@{}}{
     27 		To describe the set of real-valued vectors with the same number of elements as a set \(A\), we abuse the morphism from the functions \(\symbb{R}^A\) to the vectors \(\symbb{R}^{|A|}\) and simply write \(\vctr{x}\in\symbb{R}^A\) to denote that \(\vctr{x}\) is a vector with \(|A|\) elements.
     28 	} \\[5mm]
     29 	\multicolumn{2}{@{}c@{}}{\textbf{Relation Algebra}} \\
     30 	\multicolumn{2}{@{}l@{}}{Relation algebra is described in more detail in Section~\ref{sec:context:relation algebra}.} \\
     31 	\(\relationZero\) & Empty relation \\
     32 	\(\relationOne\) & Complete relation \\
     33 	\(\relationIdentity\) & Identity relation \\
     34 	\(\bar{r}\) & Complementary relation \\
     35 	\(\breve{r}\) & Converse relation (reversed orientation), when applied to a surface form: \(\widebreve{\textsl{born in}}\) \\
     36 	\(\relationComposition\) & Relation composition \\[5mm]
     37 	\multicolumn{2}{@{}c@{}}{\textbf{Probability and Information Theory}} \\
     38 	\(P(\rndm{x})\), \(Q(\rndm{x})\) & Probability distribution over \(\rndm{x}\), by default we heavily overload \(P\) (as is customary), when confusion is possible we disambiguate by using \(Q\) \\
     39 	\(\empP(\rndm{x})\) & Empirical distribution over \(\rndm{x}\) (as defined by the dataset) \\
     40 	\(\rndm{x} \independent \rndm{y} \mid \rndm{z}\) & Conditional independence of \(\rndm{x}\) and \(\rndm{y}\) given \(\rndm{z}\) \\
     41 	\(\rndm{x} \notindependent \rndm{y}\) & \(\rndm{x}\) and \(\rndm{y}\) are not independent \\
     42 	\(\uniformDistribution(X)\) & Uniform distribution over the set \(X\) \\
     43 	\(\normalDistribution(\mu, \sigma^2)\) & Normal distribution of mean \(\mu\) and variance \(\sigma^2\) (also used for the multivariate case) \\
     44 	\(\entropy(\rndm{x})\) & Shannon entropy of the random variable \(\rndm{x}\), \(\entropy(\rndm{x}, \rndm{y})\) denotes the joint entropy \\
     45 	\(\entropy(\rndm{x}\mid\rndm{y})\) & Conditional entropy of \(\rndm{x}\) given \(\rndm{y}\) \\
     46 	\(\entropy_Q(P)\) & Cross-entropy of \(P\) relative to \(Q\) \\
     47 	\(\operatorname{I}(\rndm{x}; \rndm{y})\) & Mutual information of \(\rndm{x}\) and \(\rndm{y}\) \\
     48 	\(\pmi(x, y)\) & Pointwise mutual information of events \(x\) and \(y\) \\
     49 	\(\kl(P\mathrel{\|}Q)\) & Kullback--Leibler divergence from \(Q\) to \(P\) \\
     50 	\(\jsd(P\mathrel{\|}Q)\) & Jensen--Shannon divergence between \(P\) and \(Q\) \\
     51 	\(W_1(P, Q)\) & 1-Wasserstein distance between \(P\) and \(Q\) \\[5mm]
     52 	\multicolumn{2}{@{}c@{}}{\textbf{Machine Learning}} \\
     53 	\(\sigmoid(x)\) & Logistic sigmoid \(\sigmoid(x) = 1 \divslash (1 + \exp(-x))\) \\
     54 	\(\ReLU(x)\) & Rectified linear unit \(\ReLU(x) = \max(0, x)\), we use \(\ReLU_{\halfCircleScript}\) to refer to the ReLU activation applied to half of the units (see Section~\ref{sec:context:attention lm}) \\
     55 	\(\symcal{L}\) & Loss (to be minimized) \\
     56 	\(J\) & Objective (to be maximized) \\
     57 	\(\overDirected{\fone}\), \(\overUndirected{\fone}\), \(\overHalfdirected{\fone}\) & Directed, undirected and half-directed \fone{} measures (see Section~\ref{sec:relation extraction:supervised evaluation}) \\[5mm]
     58 	\multicolumn{2}{@{}c@{}}{\textbf{Graph Operations}} \\
     59 	\(\gfsource(a)\) & Source vertex of the arc \(a\) \\
     60 	\(\gftarget(a)\) & Target vertex of the arc \(a\) \\
     61 	\(\gfrelation(a)\) & Relation conveyed by the arc \(a\) \\
     62 	\(\gfsentence(a)\) & Sentence corresponding to the arc \(a\) \\
     63 	\(\gfneighbors(e)\) & Vertices neighboring the vertex \(e\) \\
     64 	\(\gfincidents(e)\) & Arcs incident to the vertex \(e\) \\
     65 	\(\gfeneighbors(a)\) & Arcs neighboring the arc \(a\) \\[5mm]
     66 	\multicolumn{2}{@{}c@{}}{\textbf{Other Operations}} \\
     67 	\(\odot\) & Element-wise (Hadamard) product \\
     68 	\(*\) & Convolution \\
     69 	\(\bowtie\) & Natural join \\
     70 	\(\times_A\) & Pullback with common codomain \(A\) \\
     71 	\(\delta_{i,j}\) & Kronecker's delta, 1 if \(i=j\), 0 otherwise \\
     72 \end{longtable}