notation.tex (5694B)
1 \chapter{Notation} 2 Most of this thesis is formatted in one and a half columns, which means that a large right margin is filled with complementary material. 3 This includes figures, tables and algorithms when space allows, but also epigraphs and marginal notes with supplementary details and comments. 4 The titles of important bibliographical references are also given in the margin right of their first mention in the section. 5 Some marginal paragraphs are left unnumbered and provide material about the broadly adjacent passage. 6 When a section seems unclear, we invite the reader to look for additional information in the margin. 7 For example, while relation algebra is introduced in Section~\ref{sec:context:relation algebra}, we do not expect most readers to be familiar with its notation. 8 As such, we will systematically provide an interpretation of relation algebra formulae in plain English in unnumbered marginal paragraphs. 9 10 \bigskip 11 12 \newlength{\notationsWidest} 13 \settowidth{\notationsWidest}{\(\jsd(P\mathrel{\|}Q)\)} 14 \begin{longtable}{@{}c p{\dimexpr\textwidth-\tabcolsep*2-\notationsWidest\relax}@{}} 15 \multicolumn{2}{@{}c@{}}{\textbf{Domain of Variables}} \\ 16 \(x\) & A scalar \\ 17 \(\vctr{x}\) & A vector, its elements are indexed \(x_i\) \\ 18 \(\mtrx{X}\) & A matrix, its rows are indexed \(\vctr{x}_i\), its elements \(x_{ij}\) \\ 19 \(\tnsr{X}\) & A (three-way) tensor, indexed \(\mtrx{X}_i\), \(\vctr{x}_{ij}\), \(x_{ijk}\) \\ 20 \(\rndm{x}\) & A random variable (sometimes \(\rndm{X}\) to avoid confusion) \\ 21 \(\rndmvctr{x}\) & A random vector \\ 22 \(\symbb{R}\) & The set of real numbers \\ 23 \(\symbb{R}^n\) & The set of real-valued vectors of length \(n\) \\ 24 \(\symbb{R}^{n\times m}\) & The set of real-valued matrices with \(n\) rows and \(m\) columns \\ 25 \(B^A\) & The set of functions from \(A\) to \(B\), in particular \(2^A\) denotes the power set of \(A\) \\ 26 \multicolumn{2}{@{}b{\textwidth}@{}}{ 27 To describe the set of real-valued vectors with the same number of elements as a set \(A\), we abuse the morphism from the functions \(\symbb{R}^A\) to the vectors \(\symbb{R}^{|A|}\) and simply write \(\vctr{x}\in\symbb{R}^A\) to denote that \(\vctr{x}\) is a vector with \(|A|\) elements. 28 } \\[5mm] 29 \multicolumn{2}{@{}c@{}}{\textbf{Relation Algebra}} \\ 30 \multicolumn{2}{@{}l@{}}{Relation algebra is described in more detail in Section~\ref{sec:context:relation algebra}.} \\ 31 \(\relationZero\) & Empty relation \\ 32 \(\relationOne\) & Complete relation \\ 33 \(\relationIdentity\) & Identity relation \\ 34 \(\bar{r}\) & Complementary relation \\ 35 \(\breve{r}\) & Converse relation (reversed orientation), when applied to a surface form: \(\widebreve{\textsl{born in}}\) \\ 36 \(\relationComposition\) & Relation composition \\[5mm] 37 \multicolumn{2}{@{}c@{}}{\textbf{Probability and Information Theory}} \\ 38 \(P(\rndm{x})\), \(Q(\rndm{x})\) & Probability distribution over \(\rndm{x}\), by default we heavily overload \(P\) (as is customary), when confusion is possible we disambiguate by using \(Q\) \\ 39 \(\empP(\rndm{x})\) & Empirical distribution over \(\rndm{x}\) (as defined by the dataset) \\ 40 \(\rndm{x} \independent \rndm{y} \mid \rndm{z}\) & Conditional independence of \(\rndm{x}\) and \(\rndm{y}\) given \(\rndm{z}\) \\ 41 \(\rndm{x} \notindependent \rndm{y}\) & \(\rndm{x}\) and \(\rndm{y}\) are not independent \\ 42 \(\uniformDistribution(X)\) & Uniform distribution over the set \(X\) \\ 43 \(\normalDistribution(\mu, \sigma^2)\) & Normal distribution of mean \(\mu\) and variance \(\sigma^2\) (also used for the multivariate case) \\ 44 \(\entropy(\rndm{x})\) & Shannon entropy of the random variable \(\rndm{x}\), \(\entropy(\rndm{x}, \rndm{y})\) denotes the joint entropy \\ 45 \(\entropy(\rndm{x}\mid\rndm{y})\) & Conditional entropy of \(\rndm{x}\) given \(\rndm{y}\) \\ 46 \(\entropy_Q(P)\) & Cross-entropy of \(P\) relative to \(Q\) \\ 47 \(\operatorname{I}(\rndm{x}; \rndm{y})\) & Mutual information of \(\rndm{x}\) and \(\rndm{y}\) \\ 48 \(\pmi(x, y)\) & Pointwise mutual information of events \(x\) and \(y\) \\ 49 \(\kl(P\mathrel{\|}Q)\) & Kullback--Leibler divergence from \(Q\) to \(P\) \\ 50 \(\jsd(P\mathrel{\|}Q)\) & Jensen--Shannon divergence between \(P\) and \(Q\) \\ 51 \(W_1(P, Q)\) & 1-Wasserstein distance between \(P\) and \(Q\) \\[5mm] 52 \multicolumn{2}{@{}c@{}}{\textbf{Machine Learning}} \\ 53 \(\sigmoid(x)\) & Logistic sigmoid \(\sigmoid(x) = 1 \divslash (1 + \exp(-x))\) \\ 54 \(\ReLU(x)\) & Rectified linear unit \(\ReLU(x) = \max(0, x)\), we use \(\ReLU_{\halfCircleScript}\) to refer to the ReLU activation applied to half of the units (see Section~\ref{sec:context:attention lm}) \\ 55 \(\symcal{L}\) & Loss (to be minimized) \\ 56 \(J\) & Objective (to be maximized) \\ 57 \(\overDirected{\fone}\), \(\overUndirected{\fone}\), \(\overHalfdirected{\fone}\) & Directed, undirected and half-directed \fone{} measures (see Section~\ref{sec:relation extraction:supervised evaluation}) \\[5mm] 58 \multicolumn{2}{@{}c@{}}{\textbf{Graph Operations}} \\ 59 \(\gfsource(a)\) & Source vertex of the arc \(a\) \\ 60 \(\gftarget(a)\) & Target vertex of the arc \(a\) \\ 61 \(\gfrelation(a)\) & Relation conveyed by the arc \(a\) \\ 62 \(\gfsentence(a)\) & Sentence corresponding to the arc \(a\) \\ 63 \(\gfneighbors(e)\) & Vertices neighboring the vertex \(e\) \\ 64 \(\gfincidents(e)\) & Arcs incident to the vertex \(e\) \\ 65 \(\gfeneighbors(a)\) & Arcs neighboring the arc \(a\) \\[5mm] 66 \multicolumn{2}{@{}c@{}}{\textbf{Other Operations}} \\ 67 \(\odot\) & Element-wise (Hadamard) product \\ 68 \(*\) & Convolution \\ 69 \(\bowtie\) & Natural join \\ 70 \(\times_A\) & Pullback with common codomain \(A\) \\ 71 \(\delta_{i,j}\) & Kronecker's delta, 1 if \(i=j\), 0 otherwise \\ 72 \end{longtable}