Full Code of tengyuma/cs229m_notes for AI

main 7e5b50a20514 cached
38 files
2.8 MB
723.4k tokens
1 symbols
1 requests
Download .txt
Showing preview only (2,893K chars total). Download the full file or copy to clipboard to get everything.
Repository: tengyuma/cs229m_notes
Branch: main
Commit: 7e5b50a20514
Files: 38
Total size: 2.8 MB

Directory structure:
gitextract_7v14hukr/

├── .gitignore
├── Templates/
│   ├── macros.tex
│   ├── master.tex
│   ├── template.tex
│   ├── yoursunetID.tex
│   └── yoursunetID2.tex
└── tex/
    ├── all.bib
    ├── bibliography.bib
    ├── collection/
    │   ├── 01supervised.tex
    │   ├── 02-01-2021.tex
    │   ├── 02asymptotics.tex
    │   ├── 03concentration.tex
    │   ├── 04-01-uniform.tex
    │   ├── 04-02-uniform.tex
    │   ├── 04-03-uniform.tex
    │   ├── 05-01-concrete-models.tex
    │   ├── 05-02-concrete-models.tex
    │   ├── 05-03-deep-nets.tex
    │   ├── 06-dltheory.tex
    │   ├── 07-01-nonconvex.tex
    │   ├── 07-02-nonconvex.tex
    │   ├── 07-03-nonconvex.tex
    │   ├── 07-03-ntk.tex
    │   ├── 07-05-ntk-limitation.tex
    │   ├── 08-01-algorithmic.tex
    │   ├── 08-02-algorithmic.tex
    │   ├── 08-03-algorithmic-new.tex
    │   ├── 08-03-algorithmic.tex
    │   ├── 09-01-data-dependent.tex
    │   ├── 09-01-unsupervised.tex
    │   ├── 10-01-online.tex
    │   └── 10-02-online.tex
    ├── figures/
    │   ├── chaining_figures.pptx
    │   ├── figures.pptx
    │   ├── multilayer_covering.pptx
    │   └── ntk-1d.py
    ├── macros.tex
    └── master.tex

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
## Core latex/pdflatex auxiliary files:
*.aux
*.lof
*.log
*.lot
*.fls
*.out
*.toc
*.fmt
*.fot
*.cb
*.cb2
.*.lb

## Intermediate documents:
*.dvi
*.xdv
*-converted-to.*
# these rules might exclude image files for figures etc.
# *.ps
# *.eps
*.pdf

## Generated if empty string is given at "Please type another file name for output:"
.pdf

## Bibliography auxiliary files (bibtex/biblatex/biber):
*.bbl
*.bcf
*.blg
*-blx.aux
*-blx.bib
*.run.xml

## Build tool auxiliary files:
*.fdb_latexmk
*.synctex
*.synctex(busy)
*.synctex.gz
*.synctex.gz(busy)
*.pdfsync

## Build tool directories for auxiliary files
# latexrun
latex.out/

## Auxiliary and intermediate files from other packages:
# algorithms
*.alg
*.loa

# achemso
acs-*.bib

# amsthm
*.thm

# beamer
*.nav
*.pre
*.snm
*.vrb

# changes
*.soc

# comment
*.cut

# cprotect
*.cpt

# elsarticle (documentclass of Elsevier journals)
*.spl

# endnotes
*.ent

# fixme
*.lox

# feynmf/feynmp
*.mf
*.mp
*.t[1-9]
*.t[1-9][0-9]
*.tfm

#(r)(e)ledmac/(r)(e)ledpar
*.end
*.?end
*.[1-9]
*.[1-9][0-9]
*.[1-9][0-9][0-9]
*.[1-9]R
*.[1-9][0-9]R
*.[1-9][0-9][0-9]R
*.eledsec[1-9]
*.eledsec[1-9]R
*.eledsec[1-9][0-9]
*.eledsec[1-9][0-9]R
*.eledsec[1-9][0-9][0-9]
*.eledsec[1-9][0-9][0-9]R

# glossaries
*.acn
*.acr
*.glg
*.glo
*.gls
*.glsdefs
*.lzo
*.lzs

# uncomment this for glossaries-extra (will ignore makeindex's style files!)
# *.ist

# gnuplottex
*-gnuplottex-*

# gregoriotex
*.gaux
*.gtex

# htlatex
*.4ct
*.4tc
*.idv
*.lg
*.trc
*.xref

# hyperref
*.brf

# knitr
*-concordance.tex
# TODO Comment the next line if you want to keep your tikz graphics files
*.tikz
*-tikzDictionary

# listings
*.lol

# luatexja-ruby
*.ltjruby

# makeidx
*.idx
*.ilg
*.ind

# minitoc
*.maf
*.mlf
*.mlt
*.mtc[0-9]*
*.slf[0-9]*
*.slt[0-9]*
*.stc[0-9]*

# minted
_minted*
*.pyg

# morewrites
*.mw

# nomencl
*.nlg
*.nlo
*.nls

# pax
*.pax

# pdfpcnotes
*.pdfpc

# sagetex
*.sagetex.sage
*.sagetex.py
*.sagetex.scmd

# scrwfile
*.wrt

# sympy
*.sout
*.sympy
sympy-plots-for-*.tex/

# pdfcomment
*.upa
*.upb

# pythontex
*.pytxcode
pythontex-files-*/

# tcolorbox
*.listing

# thmtools
*.loe

# TikZ & PGF
*.dpth
*.md5
*.auxlock

# todonotes
*.tdo

# vhistory
*.hst
*.ver

# easy-todo
*.lod

# xcolor
*.xcp

# xmpincl
*.xmpi

# xindy
*.xdy

# xypic precompiled matrices and outlines
*.xyc
*.xyd

# endfloat
*.ttt
*.fff

# Latexian
TSWLatexianTemp*

## Editors:
# WinEdt
*.bak
*.sav

# Texpad
.texpadtmp

# LyX
*.lyx~

# Kile
*.backup

# gummi
.*.swp

# KBibTeX
*~[0-9]*

# TeXnicCenter
*.tps

# auto folder when using emacs and auctex
./auto/*
*.el

# expex forward references with \gathertags
*-tags.tex

# standalone packages
*.sta

# Makeindex log files
*.lpz

# MacOS filesystem metadata
*.DS_Store


================================================
FILE: Templates/macros.tex
================================================
\usepackage{color}
\usepackage{lipsum}



\ifnum\lectureformat=1
\newcommand{\metadata}[3]
{
	\newpage
	
	\def\lectureID{#1}
	
	\setcounter{chapter}{\lectureID}

	\draftnotice
	
	\begin{center}
		\bf\large CS229M/STATS214: Machine Learning Theory
	\end{center}
	
	\noindent
	Lecturer: Tengyu Ma   %%% FILL IN LECTURER (if not RS)
	\hfill
	Lecture \# \lectureID              %%% FILL IN LECTURE NUMBER HERE
	\\
	Scribe: #2                  %%% FILL IN YOUR NAME HERE
	\hfill
	#3           %%% FILL IN LECTURE DATE HERE
	
	\noindent
	\rule{\textwidth}{1pt}
	
	\medskip
}
\else 
\newcommand{\metadata}[3]{}
\fi

\DeclareMathOperator*{\Exp}{\mathbb{E}}
\DeclareMathOperator*{\argmin}{\textup{argmin}}
\DeclareMathOperator*{\argmax}{\textup{argmax}}
\newcommand{\E}{\mathbb{E}}

\newcommand{\err}{\ell_{\textup{0-1}}}
\newcommand{\thetaerm}{\theta_{\textup{ERM}}}
\newcommand{\hatL}{\widehat{L}}
\newcommand{\tilO}{\widetilde{O}}
\newcommand{\iid}{\overset{\textup{iid}}{\sim}}

\newcommand{\norm}[1]{\|#1\|}
\newcommand{\Norm}[1]{\left\|#1\right\|}


\newcommand{\al}[1]{
	\begin{align}
	#1
	\end{align}
}


\renewcommand{\sp}[1]{^{(#1)}}

\newcommand{\cA}{\mathcal A}
\newcommand{\cB}{\mathcal B}
\newcommand{\cC}{\mathcal C}
\newcommand{\cD}{\mathcal D}
\newcommand{\cE}{\mathcal E}
\newcommand{\cF}{\mathcal F}
\newcommand{\cG}{\mathcal G}
\newcommand{\cH}{\mathcal H}
\newcommand{\cI}{\mathcal I}
\newcommand{\cJ}{\mathcal J}
\newcommand{\cK}{\mathcal K}
\newcommand{\cL}{\mathcal L}
\newcommand{\cM}{\mathcal M}
\newcommand{\cN}{\mathcal N}
\newcommand{\cO}{\mathcal O}
\newcommand{\cP}{\mathcal P}
\newcommand{\cQ}{\mathcal Q}
\newcommand{\cR}{\mathcal R}
\newcommand{\cS}{\mathcal S}
\newcommand{\cT}{\mathcal T}
\newcommand{\cU}{\mathcal U}
\newcommand{\cV}{\mathcal V}
\newcommand{\cW}{\mathcal W}
\newcommand{\cX}{\mathcal X}
\newcommand{\cY}{\mathcal Y}
\newcommand{\cZ}{\mathcal Z}

\newcommand{\bbB}{\mathbb B}
\newcommand{\bbS}{\mathbb S}
\newcommand{\bbR}{\mathbb R}
\newcommand{\bbZ}{\mathbb Z}
\newcommand{\bbI}{\mathbb I}
\newcommand{\bbQ}{\mathbb Q}
\newcommand{\bbP}{\mathbb P}
\newcommand{\bbE}{\mathbb E}
\newcommand{\bbN}{\mathbb N}

\newcommand{\R}{\bbR}

================================================
FILE: Templates/master.tex
================================================
%% filename: amsbook-template.tex
%% version: 1.1
%% date: 2014/07/24
%%
%% American Mathematical Society
%% Technical Support
%% Publications Technical Group
%% 201 Charles Street
%% Providence, RI 02904
%% USA
%% tel: (401) 455-4080
%%      (800) 321-4267 (USA and Canada only)
%% fax: (401) 331-3842
%% email: tech-support@ams.org
%% 
%% Copyright 2006, 2008-2010, 2014 American Mathematical Society.
%% 
%% This work may be distributed and/or modified under the
%% conditions of the LaTeX Project Public License, either version 1.3c
%% of this license or (at your option) any later version.
%% The latest version of this license is in
%%   http://www.latex-project.org/lppl.txt
%% and version 1.3c or later is part of all distributions of LaTeX
%% version 2005/12/01 or later.
%% 
%% This work has the LPPL maintenance status `maintained'.
%% 
%% The Current Maintainer of this work is the American Mathematical
%% Society.
%%
%% ====================================================================

%    AMS-LaTeX v.2 driver file template for use with amsbook
%
%    Remove any commented or uncommented macros you do not use.

\documentclass{book}
\usepackage{amsfonts,bm, amsthm, amsmath}


\newtheorem{theorem}{Theorem}[chapter]
\newtheorem{lemma}[theorem]{Lemma}

\theoremstyle{definition}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{example}[theorem]{Example}
\newtheorem{xca}[theorem]{Exercise}

\theoremstyle{remark}
\newtheorem{remark}[theorem]{Remark}

\numberwithin{section}{chapter}
\numberwithin{equation}{chapter}

%    For a single index; for multiple indexes, see the manual
%    "Instructions for preparation of papers and monographs:
%    AMS-LaTeX" (instr-l.pdf in the AMS-LaTeX distribution).
\makeindex
\def\lectureformat{0}
\input{macros}
\begin{document}

\frontmatter

\title{Lecture Notes for Machine Learning Theory (CS229M/STATS214)}

%    Remove any unused author tags.

%    author one information
\author{Instructor: Tengyu Ma}
%\address{}
%\curraddr{}
%\email{}
\thanks{}

%    author two information
%\author{}
%\address{}
%\curraddr{}
%\email{}
%\thanks{}

%\subjclass[2010]{Primary }

%\keywords{}

%\date{}

%\begin{abstract}
%\end{abstract}

\maketitle

%    Dedication.  If the dedication is longer than a line or two,
%    remove the centering instructions and the line break.
%\cleardoublepage
%\thispagestyle{empty}
%\vspace*{13.5pc}
%\begin{center}
%  Dedication text (use \\[2pt] for line break if necessary)
%\end{center}
%\cleardoublepage

%    Change page number to 6 if a dedication is present.
\setcounter{page}{4}

\tableofcontents

%    Include unnumbered chapters (preface, acknowledgments, etc.) here.
%\include{}
\mainmatter
\let\sec\section
\let\subsec\subsection

\chapter{Generalization Bounds with Uniform Convergence}
%\section{}
\input{yoursunetID}
\input{yoursunetID2}

%    Include main chapters here.
%\include{}
,,
\appendix
%    Include appendix "chapters" here.


\backmatter
%    Bibliography styles amsplain or harvard are also acceptable.
\bibliographystyle{amsalpha}
\bibliography{}
%    See note above about multiple indexes.
%\printindex

\end{document}

%-----------------------------------------------------------------------
% End of amsbook-template.tex
%-----------------------------------------------------------------------


================================================
FILE: Templates/template.tex
================================================
	\documentclass[11pt]{book}
	
	\usepackage{amsfonts,amsthm, bm,amsmath, bbm,amssymb,mathtools}
	\usepackage{fullpage}
	
	
	\newtheorem{theorem}{Theorem}[chapter]
	\newtheorem{lemma}[theorem]{Lemma}
	
	\theoremstyle{definition}
	\newtheorem{definition}[theorem]{Definition}
	\newtheorem{example}[theorem]{Example}
	\newtheorem{xca}[theorem]{Exercise}
	\newtheorem{corollary}[theorem]{Corollary}  % added for Lecture 5
	\newtheorem{proposition}{Proposition}[section]  % added for Lecture 6
	
	\theoremstyle{remark}
	\newtheorem{remark}[theorem]{Remark}
	
	\numberwithin{section}{chapter}
	\numberwithin{equation}{chapter}
	
	\makeindex
	
	\def\lectureformat{1}
	\input{macros}
	\begin{document}
	
	\frontmatter
	
	\mainmatter
	\let\sec\section
	\let\subsec\subsection
	
	\newcommand{\secwarning}[1]{
		{	
			\color{red}
			$\backslash$section and $\backslash$subsection are disallowed, please use 	$\backslash$sec and $\backslash$subsec instead
		}
	}
	\let\section\secwarning
	\let\subsection\secwarning
	
	
	\newcommand{\draftnotice}{\vbox to 0.25in{\noindent
			\raisebox{0.6in}[0in][0in]{\makebox[\textwidth][r]{\it
					DRAFT --- a final version will be posted shortly}}}
		\vspace{-.25in}\vspace{-\baselineskip}
	}
	
	%\section{}
	\input{yoursunetID}
	
	\input{yoursunetID2}
	
	%    Include main chapters here.
	%\include{}
	\appendix
	%    Include appendix "chapters" here.
	
	
	\backmatter
	%    Bibliography styles amsplain or harvard are also acceptable.
	\bibliographystyle{amsalpha}
	\bibliography{}
	%    See note above about multiple indexes.
%	\printindex
	
	\end{document}
	
	%-----------------------------------------------------------------------
	% End of amsbook-template.tex
	%-----------------------------------------------------------------------


================================================
FILE: Templates/yoursunetID.tex
================================================
%\newcommand{\Exp}{\mathbb{E}}

% reset section counter
\setcounter{section}{0}

%\metadata{lecture ID}{Your names}{date}
\metadata{1}{Alice and Bob}{Jan 1st, 2021}

\sec{Review and Overview}

\begin{enumerate}
	\item If appropriate, one paragraph to briefly review the connection to previous lectures.
	\item An overview paragraph that summarizes the main idea of the lecture at a high-level. 
\end{enumerate}  
\sec{Macros for frequently used notations}
Please try to reuse the macros defined below to ensure consistency. {\color{blue}We encourage you to use macros frequently which could save a lot of time typing the equations and also help address notation inconsistency. }
\begin{itemize}
	\item $\Exp$, 
	\al{
		\E_{x\sim P}, \Exp_{x\sim P} 
	}
	\item $\Pr[X=1\vert Y=2]$
	\item 
	\al{
		\argmin_{x: x\ge 1}
	}
	\item 
	$\theta$, $\theta^\star$, $\thetaerm$, 
	\item 
	$\cX,\cY, \cH, \cF$
	\item $x\sp{1}, y\sp{k}$
	\item 
	$x\in \R^3, \bbZ$
	\item $\err(\theta)$
	\item $O(\cdot)$, $\tilO(\cdot)$
	\item $\iid$
	\item $\norm{x}, \Norm{x^{2^3}}$, $\norm{x}_{2}$
	\item $x^\top$ 
\end{itemize}
\begin{theorem}
	..
\end{theorem}
\begin{lemma}
	...
\end{lemma}


	
\lipsum
%\subsection{}

================================================
FILE: Templates/yoursunetID2.tex
================================================
%\newcommand{\Exp}{\mathbb{E}}

% reset section counter
\setcounter{section}{0}

\metadata{2}{Mary and Alex}{Jan 3rd, 2021}

\sec{Review and Overview}

\begin{enumerate}
	\item If appropriate, one paragraph to briefly review the connection to previous lectures.
	\item An overview paragraph that summarizes the main idea of the lecture at a high-level. 
\end{enumerate}  
\sec{Macros for frequently used notations}
Please try to reuse the macros defined below to ensure consistency.
\begin{itemize}
	\item $\Exp$, 
	\al{
		\E_{x\sim P}, \Exp_{x\sim P} 
	}
	\item $\Pr[X=1\vert Y=2]$
	\item 
	\al{
		\argmin_{x: x\ge 1}
	}
	\item 
	$\theta$, $\theta^\star$, $\thetaerm$, 
	\item 
	$\cX,\cY, \cH, \cF$
	\item $x\sp{1}, y\sp{k}$
	\item 
	$x\in \R^3, \bbZ$
	\item $\err(\theta)$
	\item $O(\cdot)$, $\tilO(\cdot)$
	\item $\iid$
	\item $\norm{x}, \Norm{x^{2^3}}$, $\norm{x}_{2}$
	\item 
\end{itemize}
\begin{theorem}
	..
\end{theorem}
\begin{lemma}
	...
\end{lemma}


	
\lipsum
%\subsection{}

================================================
FILE: tex/all.bib
================================================
@inproceedings{chung2007four,
	title={Four proofs for the Cheeger inequality and graph partition algorithms},
	author={Chung, Fan},
	booktitle={Proceedings of ICCM},
	volume={2},
	pages={378},
	year={2007},
	organization={Citeseer}
}
@article{arora2009expander,
  title={Expander flows, geometric embeddings and graph partitioning},
  author={Arora, Sanjeev and Rao, Satish and Vazirani, Umesh},
  journal={Journal of the ACM (JACM)},
  volume={56},
  number={2},
  pages={1--37},
  year={2009},
  publisher={ACM New York, NY, USA}
}

@article{aarons2017puns,
	title        = {Puns and Tacit Linguistic Knowledge},
	author       = {Debra Aarons},
	year         = 2017,
	journal      = {The Routledge Handbook of Language and Humor, Routledge, New York, NY, Routledge Handbooks in Linguistics}
}
@article{aaronson2006lower,
	title        = {Lower bounds for local search by quantum arguments},
	author       = {Aaronson, Scott},
	year         = 2006,
	journal      = {SIAM Journal on Computing},
	publisher    = {SIAM},
	volume       = 35,
	number       = 4,
	pages        = {804--824}
}
@inproceedings{aaronson2008complexity,
	title        = {The Complexity Zoo},
	author       = {S. Aaronson and Chris Bourke},
	year         = 2008
}
@article{abadi2015tensorflow,
	title        = {TensorFlow: Large-Scale Machine Learning on Heterogeneous Distributed Systems},
	author       = {Martín Abadi and Ashish Agarwal and Paul Barham and Eugene Brevdo and Zhifeng Chen and Craig Citro and Gregory S. Corrado and Andy Davis and Jeffrey Dean and Matthieu Devin and Sanjay Ghemawat and Ian J. Goodfellow and Andrew Harp and Geoffrey Irving and Michael Isard and Yangqing Jia and Rafal Józefowicz and Lukasz Kaiser and Manjunath Kudlur and Josh Levenberg and Dan Mané and Rajat Monga and Sherry Moore and Derek Gordon Murray and Chris Olah and Mike Schuster and Jonathon Shlens and Benoit Steiner and Ilya Sutskever and Kunal Talwar and Paul A. Tucker and Vincent Vanhoucke and Vijay Vasudevan and Fernanda B. Viégas and Oriol Vinyals and Pete Warden and Martin Wattenberg and Martin Wicke and Yuan Yu and Xiaoqiang Zheng},
	year         = 2015,
	journal      = {arXiv preprint arXiv:1603.04467}
}
@inproceedings{abadi2016tensorflow,
	title        = {TensorFlow: A system for large-scale machine learning},
	author       = {Martin Abadi and Paul Barham and Jianmin Chen and Zhifeng Chen and Andy Davis and Jeffrey Dean and Matthieu Devin and Sanjay Ghemawat and Geoffrey Irving and Michael Isard and others},
	year         = 2016,
	booktitle    = {Proceedings of the 12th USENIX Symposium on Operating Systems Design and Implementation (OSDI). Savannah, Georgia, USA}
}
@inproceedings{abbasi2011improved,
	title        = {Improved algorithms for linear stochastic bandits},
	author       = {Abbasi-Yadkori, Yasin and P{\'a}l, D{\'a}vid and Szepesv{\'a}ri, Csaba},
	year         = 2011,
	booktitle    = {Advances in Neural Information Processing Systems}
}
@article{abbasi2014linear,
	title        = {Linear programming for large-scale {M}arkov decision problems},
	author       = {Abbasi-Yadkori, Yasin and Bartlett, Peter L and Malek, Alan},
	year         = 2014,
	journal      = {arXiv preprint arXiv:1402.6763}
}
@article{abbe2015community,
	title        = {Community detection in general stochastic block models: fundamental limits and efficient recovery algorithms},
	author       = {Emmanuel Abbe and Colin Sandon},
	year         = 2015,
	journal      = {arXiv}
}
@article{abbe2015detection,
	title        = {Detection in the stochastic block model with multiple clusters: proof of the achievability conjectures, acyclic {BP}, and the information-computation gap},
	author       = {Emmanuel Abbe and Colin Sandon},
	year         = 2015,
	journal      = {arXiv}
}
@misc{abbe2017community,
	title        = {Community detection and stochastic block models: recent developments},
	author       = {Emmanuel Abbe},
	year         = 2017,
	eprint       = {1703.10146},
	archiveprefix = {arXiv},
	primaryclass = {math.PR}
}
@inproceedings{abbeel2004apprenticeship,
	title        = {Apprenticeship learning via inverse reinforcement learning},
	author       = {Pieter Abbeel and Andrew Ng},
	year         = 2004,
	booktitle    = {International Conference on Machine Learning (ICML)}
}
@inproceedings{abbeel2006using,
	title        = {Using inaccurate models in reinforcement learning},
	author       = {P. Abbeel and M. Quigley and A. Y. Ng},
	year         = 2006,
	booktitle    = {International Conference on Machine Learning (ICML)},
	pages        = {1--8}
}
@article{abe2003reinforcement,
	title        = {Reinforcement learning with immediate rewards and linear hypotheses},
	author       = {Abe, Naoki and Biermann, Alan W and Long, Philip M},
	year         = 2003,
	journal      = {Algorithmica},
	publisher    = {Springer},
	volume       = 37,
	number       = 4,
	pages        = {263--293}
}
@techreport{abel2017classical,
	title        = {Classical measurement error with several regressors},
	author       = {Andrew B Abel},
	year         = 2017,
	institution  = {Working Paper}
}
@inproceedings{abelson2014poor,
	title        = {Targeting Direct Cash Transfers to the Extremely Poor},
	author       = {Brian Abelson and Kush R. Varshney and Joy Sun},
	year         = 2014,
	booktitle    = {International Conference on Knowledge Discovery and Data Mining (KDD)}
}
@article{abid2017contrastive,
	title        = {Contrastive principal component analysis},
	author       = {Abubakar Abid and Vivek K Bagaria and Martin J Zhang and James Zou},
	year         = 2017,
	journal      = {arXiv preprint arXiv:1709.06716}
}
@article{abid2018exploring,
	title        = {Exploring patterns enriched in a dataset with contrastive principal component analysis},
	author       = {Abubakar Abid and Martin J Zhang and Vivek K Bagaria and James Zou},
	year         = 2018,
	journal      = {Nature Communications},
	volume       = 9,
	number       = 1
}
@article{abid2021persistent,
	title        = {Persistent anti-muslim bias in large language models},
	author       = {Abubakar Abid and Maheen Farooqi and James Zou},
	year         = 2021,
	journal      = {arXiv preprint arXiv:2101.05783}
}
@inproceedings{abiteboul1997querying,
	title        = {Querying semi-structured data},
	author       = {Serge Abiteboul},
	year         = 1997,
	booktitle    = {International Conference on Database Theory}
}
@article{abolafia2018neural,
	title        = {Neural Program Synthesis with Priority Queue Training},
	author       = {Daniel A Abolafia and Mohammad Norouzi and Quoc V Le},
	year         = 2018,
	journal      = {arXiv preprint arXiv:1801.03526}
}
@techreport{abramson2004active,
	title        = {Active learning for visual object recognition},
	author       = {Yotam Abramson and Yoav Freund},
	year         = 2004,
	institution  = {University of California, San Diego}
}
@book{absil2007optimization,
	title        = {Optimization Algorithms on Matrix Manifolds},
	author       = {Absil, P.A. and Mahony, R. and Sepulchre, R.},
	year         = 2007,
	publisher    = {Princeton University Press},
	isbn         = 9780691132983,
	url          = {https://books.google.com/books?id=gyaKmAEACAAJ},
	lccn         = 2007927538
}
@article{abu1990learning,
	title        = {Learning from hints in neural networks},
	author       = {Yaser S Abu-Mostafa},
	year         = 1990,
	journal      = {Journal of Complexity},
	volume       = 6,
	number       = 2,
	pages        = {192--198}
}
@article{abujabal2018comqa,
	title        = {ComQA: A Community-sourced Dataset for Complex Factoid Question Answering with Paraphrase Clusters},
	author       = {Abdalghani Abujabal and Rishiraj Saha Roy and Mohamed Yahya and Gerhard Weikum},
	year         = 2018,
	journal      = {arXiv preprint arXiv:1809.09528}
}
@article{acar2009unsupervised,
	title        = {Unsupervised multiway data analysis: A literature survey},
	author       = {Acar, Evrim and Yener, B{\"u}lent},
	year         = 2009,
	journal      = {Knowledge and Data Engineering, IEEE Transactions on},
	volume       = 21,
	number       = 1,
	pages        = {6--20}
}
@inproceedings{achiam2017constrained,
	title        = {Constrained policy optimization},
	author       = {Achiam, Joshua and Held, David and Tamar, Aviv and Abbeel, Pieter},
	year         = 2017,
	booktitle    = {International Conference on Machine Learning},
	pages        = {22--31},
	organization = {PMLR}
}
@article{achiam2019benchmarking,
	title        = {Benchmarking Safe Exploration in Deep Reinforcement Learning},
	author       = {Joshua Achiam and Dario Amodei},
	year         = 2019,
	journal      = {arXiv}
}
@inproceedings{achlioptas2005spectral,
	title        = {On spectral learning of mixtures of distributions},
	author       = {Dimitris Achlioptas and Frank McSherry},
	year         = 2005,
	booktitle    = {Conference on Learning Theory (COLT)}
}
@article{ackermann2019reducing,
	title        = {Reducing overestimation bias in multi-agent domains using double centralized critics},
	author       = {Ackermann, Johannes and Gabler, Volker and Osa, Takayuki and Sugiyama, Masashi},
	year         = 2019,
	journal      = {arXiv preprint arXiv:1910.01465}
}
@article{adamczak2011chevet,
	title        = {Chevet type inequality and norms of submatrices},
	author       = {Adamczak, Rados{\l}aw and Lata{\l}a, Rafa{\l} and Litvak, Alexander E and Pajor, Alain and Tomczak-Jaegermann, Nicole},
	year         = 2011,
	journal      = {arXiv preprint arXiv:1107.4066}
}
@article{adamczak2015concentration,
	title        = {Concentration inequalities for non-{L}ipschitz functions with bounded derivatives of higher order},
	author       = {Rados{\l{}}aw Adamczak and Pawe{\l{}} Wolff},
	year         = 2015,
	journal      = {Probability Theory and Related Fields},
	volume       = 162,
	pages        = {531--586}
}
@inproceedings{adel2016comparing,
	title        = {Comparing Convolutional Neural Networks to Traditional Models for Slot Filling},
	author       = {Heike Adel and Benjamin Roth and Hinrich Sch\"{u}tze},
	year         = 2016,
	booktitle    = {Human Language Technology and North American Association for Computational Linguistics (HLT/NAACL)}
}
@article{adelman2008sixth,
	title        = {The sixth data release of the {S}loan digital sky survey},
	author       = {Jennifer K. Adelman-McCarthy and Marcel A. Ag{\"u}eros and Sahar S. Allam and Carlos Allende Prieto and Kurt S. J. Anderson and Scott F. Anderson and James Annis and Neta A. Bahcall and C. A. L. Bailer-Jones, and Ivan K. Baldry and others},
	year         = 2008,
	journal      = {The Astrophysical Journal Supplement Series},
	volume       = 175,
	number       = 2
}
@article{adhlw19,
	title        = {Fine-Grained Analysis of Optimization and Generalization for Overparameterized Two-Layer Neural Networks},
	author       = {Sanjeev Arora and Simon S. Du and Wei Hu and Zhiyuan Li and Ruosong Wang},
	year         = 2019,
	journal      = {CoRR},
	volume       = {abs/1901.08584},
	url          = {http://arxiv.org/abs/1901.08584},
	archiveprefix = {arXiv},
	eprint       = {1901.08584},
	timestamp    = {Sat, 02 Feb 2019 16:56:00 +0100},
	biburl       = {https://dblp.org/rec/bib/journals/corr/abs-1901-08584},
	bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@book{adler2009random,
	title        = {Random fields and geometry},
	author       = {Adler, Robert J and Taylor, Jonathan E},
	year         = 2009,
	publisher    = {Springer Science \& Business Media}
}
@inproceedings{adler2012textexploration,
	title        = {Entailment-based Text Exploration with Application to the Health-care Domain},
	author       = {Meni Adler and Jonathan Berant and Ido Dagan},
	year         = {2012 2012},
	booktitle    = {ACL system demonstrations}
}
@article{adler2016auditing,
	title        = {Auditing Black-box Models for Indirect Influence},
	author       = {Philip Adler and Casey Falk and Sorelle A Friedler and Gabriel Rybeck and Carlos Scheidegger and Brandon Smith and Suresh Venkatasubramanian},
	year         = 2016,
	journal      = {arXiv preprint arXiv:1602.07043}
}
@article{adomavicius2014bias,
	title        = {De-Biasing User Preference Ratings in Recommender Systems},
	author       = {Gediminas Adomavicius and Jesse Bockstedt and Shawn Curley and Jingjing Zhang},
	year         = 2014,
	journal      = {CEUR Workshop Proceedings},
	volume       = 1253,
	pages        = {2--9}
}
@article{adragna2020fairness,
	title        = {Fairness and Robustness in Invariant Learning: A Case Study in Toxicity Classification},
	author       = {Robert Adragna and Elliot Creager and David Madras and Richard Zemel},
	year         = 2020,
	journal      = {arXiv preprint arXiv:2011.06485}
}
@techreport{adriaans99shallow,
	title        = {Learning Shallow Context-Free Languages under Simple Distributions},
	author       = {Pieter W. Adriaans},
	year         = 1999,
	institution  = {Stanford University}
}
@article{advani2017high,
	title        = {High-dimensional dynamics of generalization error in neural networks},
	author       = {Madhu S Advani and Andrew M Saxe},
	year         = 2017,
	journal      = {arXiv preprint arXiv:1710.03667}
}
@inproceedings{afantenos2012developing,
	title        = {Developing a corpus of strategic conversation in The Settlers of Catan},
	author       = {Stergos Afantenos and Nicholas Asher and Farah Benamara and Anais Cadilhac and Cédric Dégremont and Pascal Denis and Markus Guhe and Simon Keizer and Alex Lascarides and Oliver Lemon and Philippe Muller and Soumya Paul and Verena Rieser and Laure Vieu},
	year         = 2012,
	booktitle    = {SeineDial 2012 - The 16th Workshop on the Semantics and Pragmatics of Dialogue}
}
@inproceedings{afantenos2012modelling,
	title        = {Modelling Strategic Conversation: Model, Annotation Design and Corpus},
	author       = {Stergos Afantenos and Nicholas Asher and Farah Benamara and Anais Cadilhac and Cedric Dégremont and Pascal Denis and Markus Guhe and Simon Keizer and Alex Lascarides and Oliver Lemon and others},
	year         = 2012,
	booktitle    = {Proceedings of SemDial 2012: Workshop on the Semantics and Pragmatics of Dialogue},
	pages        = {167--168}
}
@inproceedings{afsari2006simple,
	title        = {Simple {LU} and {QR} based non-orthogonal matrix joint diagonalization},
	author       = {Bijan Afsari},
	year         = 2006,
	booktitle    = {Independent Component Analysis and Blind Signal Separation},
	pages        = {1--7}
}
@article{afsari2008sensitivity,
	title        = {Sensitivity analysis for the problem of matrix joint diagonalization},
	author       = {B. Afsari},
	year         = 2008,
	journal      = {SIAM Journal on Matrix Analysis and Applications},
	volume       = 30,
	number       = 3,
	pages        = {1148--1171}
}
@inproceedings{agarwal09hybrid,
	title        = {Exponential Family Hybrid Learning},
	author       = {Arvind Agarwal and Hal {Daum{\'e} III}},
	year         = 2009,
	booktitle    = {International Joint Conference on Artificial Intelligence (IJCAI)}
}
@article{agarwal2005geometric,
	title        = {Geometric approximation via coresets},
	author       = {Agarwal, Pankaj K. and {Har-Peled}, Sariel and Varadarajan, Kasturi R.},
	year         = 2005,
	journal      = {Combinatorial and computational geometry},
	publisher    = {Cambridge University Press New York},
	volume       = 52,
	pages        = {1--30}
}
@inproceedings{agarwal2013selective,
	title        = {Selective sampling algorithms for cost-sensitive multiclass prediction},
	author       = {Alekh Agarwal},
	year         = 2013,
	booktitle    = {International Conference on Machine Learning (ICML)},
	pages        = {1220--1228}
}
@inproceedings{agarwal2014taming,
	title        = {Taming the monster: A fast and simple algorithm for contextual bandits},
	author       = {Agarwal, Alekh and Hsu, Daniel and Kale, Satyen and Langford, John and Li, Lihong and Schapire, Robert},
	year         = 2014,
	booktitle    = {International Conference on Machine Learning},
	pages        = {1638--1646}
}
@article{agarwal2015multisection,
	title        = {Multisection in the stochastic block model using semidefinite programming},
	author       = {Naman Agarwal and Afonso S. Bandeira and Konstantinos Koiliaris and Alexandra Kolla},
	year         = 2015,
	journal      = {arXiv}
}
@article{agarwal2016finding,
	title        = {Finding approximate local minima for nonconvex optimization in linear time},
	author       = {Agarwal, Naman and Allen-Zhu, Zeyuan and Bullins, Brian and Hazan, Elad and Ma, Tengyu},
	year         = 2016,
	journal      = {arXiv preprint arXiv:1611.01146}
}
@article{agarwal2016second,
	title        = {Second order stochastic optimization in linear time},
	author       = {Naman Agarwal and Brian Bullins and Elad Hazan},
	year         = 2016,
	journal      = {arXiv preprint arXiv:1602.03943}
}
@misc{agarwal2017finding,
	title        = {Finding Approximate Local Minima Faster than Gradient Descent},
	author       = {Naman Agarwal and Zeyuan Allen-Zhu and Brian Bullins and Elad Hazan and Tengyu Ma},
	year         = 2017,
	eprint       = {1611.01146},
	archiveprefix = {arXiv},
	primaryclass = {math.OC}
}
@inproceedings{agarwal2018reductions,
	title        = {A Reductions Approach to Fair Classification},
	author       = {Alekh Agarwal and Alina Beygelzimer and Miroslav Dudik and John Langford and Hanna Wallach},
	year         = 2018,
	booktitle    = {International Conference on Machine Learning (ICML)},
	pages        = {60--69}
}
@article{agarwal2019learning,
	title        = {Learning to Generalize from Sparse and Underspecified Rewards},
	author       = {Rishabh Agarwal and Chen Liang and Dale Schuurmans and Mohammad Norouzi},
	year         = 2019,
	journal      = {arXiv preprint arXiv:1902.07198}
}
@inproceedings{agarwal2019optimality,
	title        = {Optimality and Approximation with Policy Gradient Methods in {Markov} Decision Processes},
	author       = {Agarwal, Alekh and Kakade, Sham M and Lee, Jason D and Mahajan, Gaurav},
	year         = 2020,
	month        = {09--12 Jul},
	booktitle    = {Conference on Learning Theory},
	publisher    = {PMLR},
	series       = {Proceedings of Machine Learning Research},
	volume       = 125,
	pages        = {64--66},
	pdf          = {http://proceedings.mlr.press/v125/agarwal20a/agarwal20a.pdf},
	abstract     = {Policy gradient (PG) methods are among the most effective methods in challenging reinforcement learning problems with large state and/or action spaces. However, little is known about even their most basic theoretical convergence properties, including: if and how fast they converge to a globally optimal solution (say with a sufficiently rich policy class); how they cope with approximation error due to using a restricted class of parametric policies; or their finite sample behavior. Such characterizations are important not only to compare these methods to their approximate value function counterparts (where such issues are relatively well understood, at least in the worst case), but also to help with more principled approaches to algorithm design. This work provides provable characterizations of computational, approximation, and sample size issues with regards to policy gradient methods in the context of discounted Markov Decision Processes (MDPs). We focus on both: 1) “tabular” policy parameterizations, where the optimal policy is contained in the class and where we show global convergence to the optimal policy, and 2) restricted policy classes, which may not contain the optimal policy and where we provide agnostic learning results. In the \emph{tabular setting}, our main results are: 1) convergence rate to global optimum for direct parameterization and projected gradient ascent 2) an asymptotic convergence to global optimum for softmax policy parameterization and PG; and a convergence rate with additional entropy regularization, and 3) dimension-free convergence to global optimum for softmax policy parameterization and Natural Policy Gradient (NPG) method with exact gradients. In \emph{function approximation}, we further analyze NPG with exact as well as inexact gradients under certain smoothness assumptions on the policy parameterization and establish rates of convergence in terms of the quality of the initial state distribution. One insight of this work is in formalizing how a favorable initial state distribution provides a means to circumvent worst-case exploration issues. Overall, these results place PG methods under a solid theoretical footing, analogous to the global convergence guarantees of iterative value function based algorithms.}
}
@article{agarwal2019reinforcement,
	title        = {Reinforcement learning: Theory and algorithms},
	author       = {Agarwal, Alekh and Jiang, Nan and Kakade, Sham M},
	year         = 2019,
	journal      = {CS Dept., UW Seattle, Seattle, WA, USA, Tech. Rep}
}
@article{agarwal2020disentangling,
	title        = {Disentangling Adaptive Gradient Methods from Learning Rates},
	author       = {Agarwal, Naman and Anil, Rohan and Hazan, Elad and Koren, Tomer and Zhang, Cyril},
	year         = 2020,
	journal      = {arXiv preprint arXiv:2002.11803}
}
@article{agarwal2020flambe,
	title        = {FLAMBE: Structural complexity and representation learning of low rank MDPs},
	author       = {Agarwal, Alekh and Kakade, Sham and Krishnamurthy, Akshay and Sun, Wen},
	year         = 2020,
	journal      = {arXiv preprint arXiv:2006.10814}
}
@inproceedings{agarwal2020pc,
	title        = {{PC-PG}: Policy cover directed exploration for provable policy gradient learning},
	author       = {Agarwal, Alekh and Henaff, Mikael and Kakade, Sham and Sun, Wen},
	year         = 2020,
	booktitle    = {Advances in Neural Information Processing Systems}
}
@article{AgarwalEtal:SparseCoding2013,
	title        = {{Learning Sparsely Used Overcomplete Dictionaries via Alternating Minimization}},
	author       = {A. Agarwal and A. Anandkumar and P. Jain and P. Netrapalli and R. Tandon},
	year         = 2013,
	month        = {Oct.},
	journal      = {Available on arXiv:1310.7991}
}
@inproceedings{agazzi1993connected,
	title        = {Connected and degraded text recognition using planar hidden {M}arkov models},
	author       = {Oscar E Agazzi and S-s Kuo and Esther Levin and Roberto Pieraccini},
	year         = 1993,
	booktitle    = {International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
	volume       = 5,
	pages        = {113--116}
}
@inproceedings{agichtein2000snowball,
	title        = {Snowball: Extracting relations from large plain-text collections},
	author       = {Eugene Agichtein and Luis Gravano},
	year         = 2000,
	booktitle    = {Proceedings of the fifth ACM conference on Digital Libraries}
}
@inproceedings{agirre2014semeval,
	title        = {Sem{E}val-2014 {T}ask 10: Multilingual Semantic Textual Similarity},
	author       = {Eneko Agirre and Carmen Banea and Claire Cardie and Daniel M Cer and Mona T Diab and Aitor Gonzalez-Agirre and Weiwei Guo and Rada Mihalcea and German Rigau and Janyce Wiebe},
	year         = 2014,
	booktitle    = {International Conference on Computational Linguistics (COLING)},
	pages        = {81--91}
}
@inproceedings{agmr17,
	title        = {Provable learning of noisy-or networks},
	author       = {Arora, Sanjeev and Ge, Rong and Ma, Tengyu and Risteski, Andrej},
	year         = 2017,
	booktitle    = {Proceedings of the 49th Annual ACM SIGACT Symposium on Theory of Computing (STOC)},
	pages        = {1057--1066},
	organization = {ACM}
}
@incollection{agralwal2017optimistic,
	title        = {Optimistic posterior sampling for reinforcement learning: worst-case regret bounds},
	author       = {Agrawal, Shipra and Jia, Randy},
	year         = 2017,
	booktitle    = {Advances in Neural Information Processing Systems 30},
	publisher    = {Curran Associates, Inc.},
	pages        = {1184--1194},
	url          = {http://papers.nips.cc/paper/6718-optimistic-posterior-sampling-for-reinforcement-learning-worst-case-regret-bounds.pdf},
	editor       = {I. Guyon and U. V. Luxburg and S. Bengio and H. Wallach and R. Fergus and S. Vishwanathan and R. Garnett}
}
@inproceedings{agrawal2012analysis,
	title        = {Analysis of thompson sampling for the multi-armed bandit problem},
	author       = {Agrawal, Shipra and Goyal, Navin},
	year         = 2012,
	booktitle    = {Conference on learning theory},
	pages        = {39--1}
}
@inproceedings{agrawal2013thompson,
	title        = {Thompson sampling for contextual bandits with linear payoffs},
	author       = {Agrawal, Shipra and Goyal, Navin},
	year         = 2013,
	booktitle    = {International Conference on Machine Learning},
	pages        = {127--135}
}
@article{agrawal2015vqa1,
	title        = {{VQA}: Visual Question Answering},
	author       = {Aishwarya Agrawal and Jiasen Lu and Stanislaw Antol and Margaret Mitchell and C. Lawrence Zitnick and Devi Parikh and Dhruv Batra},
	year         = 2015,
	journal      = {International Journal of Computer Vision},
	volume       = 123,
	pages        = {4--31}
}
@inproceedings{agrawal2016analyzing,
	title        = {Analyzing the behavior of visual question answering models},
	author       = {Aishwarya Agrawal and Dhruv Batra and Devi Parikh},
	year         = 2016,
	booktitle    = {Empirical Methods in Natural Language Processing (EMNLP)}
}
@inproceedings{agrawal2017discrete,
	title        = {Discrete Control Barrier Functions for Safety-Critical Control of Discrete Systems with Application to Bipedal Robot Navigation.},
	author       = {Agrawal, Ayush and Sreenath, Koushil},
	year         = 2017,
	booktitle    = {Robotics: Science and Systems}
}
@article{agrawal2017near,
	title        = {Near-optimal regret bounds for thompson sampling},
	author       = {Agrawal, Shipra and Goyal, Navin},
	year         = 2017,
	journal      = {Journal of the ACM (JACM)},
	publisher    = {ACM New York, NY, USA},
	volume       = 64,
	number       = 5,
	pages        = {1--24}
}
@inproceedings{agrawal2018don,
	title        = {Don't just assume; look and answer: Overcoming priors for visual question answering},
	author       = {Aishwarya Agrawal and Dhruv Batra and Devi Parikh and Aniruddha Kembhavi},
	year         = 2018,
	booktitle    = {Computer Vision and Pattern Recognition (CVPR)},
	pages        = {4971--4980}
}
@article{agrawala1970learning,
	title        = {Learning with a probabilistic teacher},
	author       = {Ashok K. Agrawala},
	year         = 1970,
	journal      = {IEEE Transactions on Information Theory},
	volume       = 16,
	pages        = {373--379}
}
@inproceedings{aguiar2006automatic,
	title        = {Automatic Learning of Articulated Skeletons from 3D Marker Trajectories},
	author       = {Edilson de Aguiar and Christian Theobalt and Hans-Peter Seidel},
	year         = 2006,
	booktitle    = {ISVC (1)},
	pages        = {485--494}
}
@inproceedings{agv,
	title        = {Simultaneous Hardcore Bits and Cryptography against Memory Attacks},
	author       = {Akavia, Adi and Goldwasser, Shafi and Vaikuntanathan, Vinod},
	year         = 2009,
	booktitle    = {Proceedings of the 6th Theory of Cryptography Conference on Theory of Cryptography},
	location     = {San Francisco, CA},
	publisher    = {Springer-Verlag},
	address      = {Berlin, Heidelberg},
	series       = {TCC '09},
	pages        = {474--495},
	doi          = {10.1007/978-3-642-00457-5_28},
	isbn         = {978-3-642-00456-8},
	url          = {http://dx.doi.org/10.1007/978-3-642-00457-5_28},
	numpages     = 22,
	acmid        = 1530469
}
@inproceedings{AH2016-nonconvex,
	title        = {{Variance Reduction for Faster Non-Convex Optimization}},
	author       = {{Allen-Zhu}, Zeyuan and Hazan, Elad},
	year         = 2016,
	booktitle    = {ICML}
}
@inproceedings{AH2016-reduction,
	title        = {{Optimal Black-Box Reductions Between Optimization Objectives}},
	author       = {{Allen-Zhu}, Zeyuan and Hazan, Elad},
	year         = 2016,
	booktitle    = {Proceedings of the 30th Conference on Neural Information Processing Systems},
	series       = {NIPS~'16}
}
@inproceedings{ahadi2015exploring,
	title        = {Exploring machine learning methods to automatically identify students in need of assistance},
	author       = {Alireza Ahadi and Raymond Lister and Heikki Haapala and Arto Vihavainen},
	year         = 2015,
	booktitle    = {Proceedings of the eleventh annual International Conference on International Computing Education Research},
	pages        = {121--130}
}
@inproceedings{aharon2005k,
	title        = {K-SVD and its non-negative variant for dictionary design},
	author       = {Aharon, Michal and Elad, Michael and Bruckstein, Alfred M},
	year         = 2005,
	booktitle    = {Optics \& Photonics 2005},
	pages        = {591411--591411},
	organization = {International Society for Optics and Photonics},
	owner        = {gewor_000},
	timestamp    = {2013.11.10}
}
@article{aharon2006img,
	title        = {K-SVD: An algorithm for designing overcomplete dictionaries for sparse representation},
	author       = {Aharon, Michal and Elad, Michael and Bruckstein, Alfred},
	year         = 2006,
	journal      = {Signal Processing, IEEE Transactions on},
	publisher    = {IEEE},
	volume       = 54,
	number       = 11,
	pages        = {4311--4322},
	owner        = {gewor_000},
	timestamp    = {2013.11.10}
}
@phdthesis{AharonThesis,
	title        = {Overcomplete Dictionaries for Sparse Representation of Signals},
	author       = {Michal Aharon},
	year         = 2006,
	school       = {Technion - Israel Institute of Technology},
	owner        = {gewor_000},
	timestamp    = {2013.11.10}
}
@inproceedings{AHK,
	title        = {A method of moments for mixture models and hidden Markov models},
	author       = {A. Anandkumar and D. Hsu and S. Kakade},
	year         = 2012,
	month        = {June},
	booktitle    = {COLT}
}
@inproceedings{AHK12,
	title        = {A method of moments for mixture models and hidden {M}arkov models},
	author       = {Anima Anandkumar and Daniel Hsu and Sham M. Kakade},
	year         = 2012,
	booktitle    = {COLT}
}
@inproceedings{AHK2005,
	title        = {{Fast Algorithms for Approximate Semidefinite Programming using the Multiplicative Weights Update Method}},
	author       = {Arora, Sanjeev and Hazan, Elad and Kale, Satyen},
	year         = 2005,
	booktitle    = {46th Annual IEEE Symposium on Foundations of Computer Science (FOCS'05)},
	publisher    = {IEEE},
	pages        = {339--348},
	doi          = {10.1109/SFCS.2005.35},
	isbn         = {0-7695-2468-0},
	file         = {:C$\backslash$:/Users/Zeyuan/Documents/Mendeley Desktop/Arora, Hazan, Kale - 2005 - Fast Algorithms for Approximate Semidefinite Programming using the Multiplicative Weights Update Method.pdf:pdf},
	mendeley-groups = {Algorithms/Multiplicative Weight,Algorithms/Multiplicative Weight/SDP}
}
@article{AHK2012,
	title        = {{The Multiplicative Weights Update Method: a Meta-Algorithm and Applications.}},
	author       = {Arora, Sanjeev and Hazan, Elad and Kale, Satyen},
	year         = 2012,
	journal      = {Theory of Computing},
	volume       = 8,
	pages        = {121--164},
	doi          = {10.4086/toc.2012.v008a006},
	file         = {:C$\backslash$:/Users/Zeyuan/Documents/Mendeley Desktop/Arora, Hazan, Kale - 2012 - The Multiplicative Weights Update Method a Meta-Algorithm and Applications.pdf:pdf},
	mendeley-groups = {Algorithms/Multiplicative Weight}
}
@article{ahlszz18,
	title        = {Towards provable control for unknown linear dynamical systems},
	author       = {Arora, Sanjeev and Hazan, Elad and Lee, Holden and Singh, Karan and Zhang, Cyril and Zhang, Yi},
	year         = 2018
}
@article{AhlWin02,
	title        = {Strong converse for identification via quantum channels},
	author       = {R.~Ahlswede and A.~Winter},
	year         = 2002,
	journal      = {IEEE Transactions on Information Theory},
	volume       = 48,
	number       = 3,
	pages        = {569--579}
}
@article{ahmad2010soilmoisture,
	title        = {Estimating soil moisture using remote sensing data: A machine learning approach},
	author       = {Sajjad Ahmad and Ajay Kalra and Haroon Stephen},
	year         = 2010,
	journal      = {Advances in Water Resources},
	volume       = 33,
	number       = 1,
	pages        = {69--80}
}
@article{ahmadi2017dsos,
	title        = {{DSOS} and {SDSOS} optimization: more tractable alternatives to sum of squares and semidefinite optimization},
	author       = {Amir Ali Ahmadi and Anirudha Majumdar},
	year         = 2017,
	journal      = {arXiv preprint arXiv:1706.02586}
}
@inproceedings{ahmed2018compilation,
	title        = {Compilation error repair: for the student programs, from the student programs},
	author       = {Umair Z Ahmed and Pawan Kumar and Amey Karkare and Purushottam Kar and Sumit Gulwani},
	year         = 2018,
	booktitle    = {International Conference on Software Engineering (ICSE)}
}
@inproceedings{ahmed2019understanding,
	title        = {Understanding the impact of entropy on policy optimization},
	author       = {Ahmed, Zafarali and Le Roux, Nicolas and Norouzi, Mohammad and Schuurmans, Dale},
	year         = 2019,
	booktitle    = {International Conference on Machine Learning},
	pages        = {151--160},
	organization = {PMLR}
}
@inproceedings{Ahmedetal12,
	title        = {Scalable inference in latent variable models},
	author       = {A. Ahmed and M. Aly and J. Gonzalez and S. Narayanamurthy and A. J. Smola},
	year         = 2012,
	booktitle    = {WSDM '12: Proceedings of the fifth ACM international conference on Web search and data mining},
	location     = {Seattle, Washington, USA},
	publisher    = {ACM},
	address      = {New York, NY, USA},
	pages        = {123--132},
	doi          = {http://doi.acm.org/10.1145/2124295.2124312},
	url          = {http://dl.acm.org/authorize?6666391}
}
@inproceedings{ahn2004labeling,
	title        = {Labeling images with a computer game},
	author       = {Luis von Ahn and Laura A. Dabbish},
	year         = 2004,
	booktitle    = {Conference on Human Factors in Computing Systems (CHI)}
}
@inproceedings{ahuja2021empirical,
	title        = {Empirical or Invariant Risk Minimization? A Sample Complexity Perspective},
	author       = {Kartik Ahuja and Jun Wang and Amit Dhurandhar and Karthikeyan Shanmugam and Kush R. Varshney},
	year         = 2021,
	booktitle    = {International Conference on Learning Representations},
	url          = {https://openreview.net/forum?id=jrA5GAccy_}
}
@article{ai2019haim,
	title        = {{HAIM}: A Modest Step Towards Controllable Text Generation},
	author       = {AI21},
	year         = 2019,
	journal      = {AI21 Labs Blog}
}
@misc{ai2020wordtune,
	title        = {Wordtune (accessed 2020 {O}ct 30)},
	author       = {AI21},
	year         = 2020,
	howpublished = {\url{https://www.wordtune.com/}}
}
@article{aijo2014methods,
	title        = {Methods for time series analysis of {RNA}-seq data with application to human {Th17} cell differentiation},
	author       = {Tarmo {\"A}ij{\"o} and Vincent Butty and Zhi Chen and Verna Salo and Subhash Tripathi and Christopher B Burge and Riitta Lahesmaa and Harri L{\"a}hdesm{\"a}ki},
	year         = 2014,
	journal      = {Bioinformatics},
	volume       = 30,
	number       = 12
}
@inproceedings{airoldi2009mixed,
	title        = {Mixed membership stochastic blockmodels},
	author       = {Airoldi, Edoardo M and Blei, David M and Fienberg, Stephen E and Xing, Eric P},
	year         = 2009,
	booktitle    = {Advances in Neural Information Processing Systems},
	pages        = {33--40}
}
@inproceedings{AK01,
	title        = {Learning mixtures of arbitrary {G}aussians},
	author       = {S. Arora and R. Kannan},
	year         = 2001,
	booktitle    = {STOC}
}
@article{akaike74aic,
	title        = {A new look at the statistical model identification},
	author       = {Hirotugu Akaike},
	year         = 1974,
	journal      = {IEEE Transactions on Automatic Control},
	volume       = 19,
	pages        = {716--723}
}
@article{akgun2012keyframe,
	title        = {Keyframe-based learning from demonstration},
	author       = {B. Akgun and M. Cakmak and K. Jiang and A. Thomaz},
	year         = 2012,
	journal      = {International Journal of Social Robotics (IJSR)},
	volume       = 4,
	number       = 4,
	pages        = {343--355}
}
@article{akkaya2019solving,
	title        = {Solving rubik's cube with a robot hand},
	author       = {Akkaya, Ilge and Andrychowicz, Marcin and Chociej, Maciek and Litwin, Mateusz and McGrew, Bob and Petron, Arthur and Paino, Alex and Plappert, Matthias and Powell, Glenn and Ribas, Raphael and others},
	year         = 2019,
	journal      = {arXiv preprint arXiv:1910.07113}
}
@article{akram2018leveraging,
	title        = {Leveraging unlabeled whole-slide-images for mitosis detection},
	author       = {Saad Ullah Akram and Talha Qaiser and Simon Graham and Juho Kannala and Janne Heikkil{\"a} and Nasir Rajpoot},
	year         = 2018,
	journal      = {Computational Pathology and Ophthalmic Medical Image Analysis},
	volume       = 1,
	pages        = {69--77}
}
@article{al192,
	title        = {Can {SGD} Learn Recurrent Neural Networks with Provable Generalization?},
	author       = {Zeyuan Allen{-}Zhu and Yuanzhi Li},
	year         = 2019,
	journal      = {CoRR},
	volume       = {abs/1902.01028},
	url          = {http://arxiv.org/abs/1902.01028},
	archiveprefix = {arXiv},
	eprint       = {1902.01028},
	timestamp    = {Fri, 01 Mar 2019 17:14:13 +0100},
	biburl       = {https://dblp.org/rec/bib/journals/corr/abs-1902-01028},
	bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{AL2016-kCCA,
	title        = {{Doubly Accelerated Methods for Faster CCA and Generalized Eigendecomposition}},
	author       = {{Allen-Zhu}, Zeyuan and Li, Yuanzhi},
	year         = 2016,
	month        = jul,
	journal      = {ArXiv e-prints},
	volume       = {abs/1607.06017}
}
@inproceedings{AL2016-kSVD,
	title        = {{Even Faster SVD Decomposition Yet Without Agonizing Pain}},
	author       = {{Allen-Zhu}, Zeyuan and Li, Yuanzhi},
	year         = 2016,
	booktitle    = {NIPS}
}
@article{AL2016-onlinePCA,
	title        = {{Fast Global Convergence of Online PCA}},
	author       = {{Allen-Zhu}, Zeyuan and Li, Yuanzhi},
	year         = 2016,
	month        = jul,
	journal      = {ArXiv e-prints},
	volume       = {abs/1607.07837}
}
@article{AL2016-PCR,
	title        = {{Faster Principal Component Regression via Optimal Polynomial Approximation to sgn(x)}},
	author       = {{Allen-Zhu}, Zeyuan and Li, Yuanzhi},
	year         = 2016,
	month        = aug,
	journal      = {ArXiv e-prints},
	volume       = {abs/1608.04773}
}
@inproceedings{Alamgir2010,
	title        = {Multi-agent Random Walks for Local Clustering on Graphs},
	author       = {Alamgir, Morteza and von Luxburg, Ulrike},
	year         = 2010,
	series       = {ICDM '10},
	pages        = {18--27}
}
@article{alaoui2014fast,
	title        = {Fast randomized kernel methods with statistical guarantees},
	author       = {Alaoui, Ahmed El and Mahoney, Michael W},
	year         = 2014,
	journal      = {arXiv preprint arXiv:1411.0306}
}
@article{albadawy2018tumor,
	title        = {Deep learning for segmentation of brain tumors: Impact of cross-institutional training and testing},
	author       = {EA AlBadawy and A Saha and MA Mazurowski},
	year         = 2018,
	journal      = {Med Phys.},
	volume       = 45
}
@article{albuquerque2019generalizing,
	title        = {Generalizing to unseen domains via distribution matching},
	author       = {Isabela Albuquerque and João Monteiro and Mohammad Darvishi and Tiago H. Falk and Ioannis Mitliagkas},
	year         = 2019,
	journal      = {arXiv preprint arXiv:1911.00804}
}
@article{aldous85exch,
	title        = {Exchangeability and related topics},
	author       = {D. Aldous},
	year         = 1985,
	journal      = {Springer Lecture Notes in Math},
	volume       = 1117,
	pages        = {1--198}
}
@inproceedings{alekhnovich,
	title        = {More on Average Case vs Approximation Complexity},
	author       = {Alekhnovich, Michael},
	year         = 2003,
	booktitle    = {Proceedings of the 44th Annual IEEE Symposium on Foundations of Computer Science},
	publisher    = {IEEE Computer Society},
	address      = {Washington, DC, USA},
	series       = {FOCS '03},
	pages        = {298--},
	isbn         = {0-7695-2040-5},
	url          = {http://dl.acm.org/citation.cfm?id=946243.946338},
	acmid        = 946338
}
@article{alemi2016deep,
	title        = {Deep variational information bottleneck},
	author       = {Alexander A Alemi and Ian Fischer and Joshua V Dillon and Kevin Murphy},
	year         = 2016,
	journal      = {arXiv preprint arXiv:1612.00410}
}
@inproceedings{alexandari2020maximum,
	title        = {Maximum likelihood with bias-corrected calibration is hard-to-beat at label shift adaptation},
	author       = {Amr Alexandari and Anshul Kundaje and Avanti Shrikumar},
	year         = 2020,
	booktitle    = {International Conference on Machine Learning (ICML)},
	pages        = {222--232}
}
@inproceedings{alexandrescu2009graph,
	title        = {Graph-based learning for statistical machine translation},
	author       = {Andrei Alexandrescu and Katrin Kirchhoff},
	year         = 2009,
	booktitle    = {North American Association for Computational Linguistics (NAACL)},
	pages        = {119--127}
}
@techreport{alfakih98embeddability,
	title        = {On the embeddability of weighted graphs in Euclidean spaces},
	author       = {A. Afakih and H. Wolkowicz},
	year         = 1998,
	institution  = {University of Waterloo}
}
@inproceedings{alfonseca2012pattern,
	title        = {Pattern learning for relation extraction with a hierarchical topic model},
	author       = {Enrique Alfonseca and Katja Filippova and Jean-Yves Delort and Guillermo Garrido},
	year         = 2012,
	booktitle    = {Association for Computational Linguistics (ACL)},
	pages        = {54--59}
}
@article{ali1966general,
	title        = {A General Class of Coefficients of Divergence of One Distribution from Another},
	author       = {S M Ali and Samuel David Silvey},
	year         = 1966,
	journal      = {Journal of the Royal Statistical Society. Series B (Methodological)},
	volume       = 28
}
@inproceedings{ali2010automation,
	title        = {Automation of question generation from sentences},
	author       = {Husam Ali and Yllias Chali and Sadid A Hasan},
	year         = 2010,
	booktitle    = {Proceedings of QG2010: The Third Workshop on Question Generation},
	pages        = {58--67}
}
@article{all18,
	title        = {{Learning and Generalization in Overparameterized Neural Networks, Going Beyond Two Layers}},
	author       = {{Allen-Zhu}, Zeyuan and Li, Yuanzhi and Liang, Yingyu},
	year         = 2018,
	month        = nov,
	journal      = {arXiv preprint arXiv:1811.04918}
}
@inproceedings{allamanis2015bimodal,
	title        = {Bimodal modelling of source code and natural language},
	author       = {Miltos Allamanis and Daniel Tarlow and Andrew Gordon and Yi Wei},
	year         = 2015,
	booktitle    = {International Conference on Machine Learning (ICML)},
	pages        = {2123--2132}
}
@inproceedings{allamanis2018varmisuse,
	title        = {Learning to Represent Programs with Graphs},
	author       = {Miltiadis Allamanis and Marc Brockschmidt and Mahmoud Khademi},
	year         = 2018,
	booktitle    = {International Conference on Learning Representations (ICLR)}
}
@article{allemand2001polynomial,
	title        = {A polynomial case of unconstrained zero-one quadratic optimization},
	author       = {Kim Allemand and Komei Fukuda and Thomas M Liebling and Erich Steiner},
	year         = 2001,
	journal      = {Mathematical programming},
	volume       = 91,
	number       = 1,
	pages        = {49--52}
}
@article{allen1980analyzing,
	title        = {Analyzing intention in utterances},
	author       = {James F Allen and C Raymond Perrault},
	year         = 1980,
	journal      = {Artificial Intelligence},
	volume       = 15,
	number       = 3,
	pages        = {143--178}
}
@article{allen2001toward,
	title        = {Toward conversational human-computer interaction},
	author       = {James F Allen and Donna K Byron and Myroslava Dzikovska and George Ferguson and Lucian Galescu and Amanda Stent},
	year         = 2001,
	journal      = {AI magazine},
	volume       = 22,
	number       = 4
}
@inproceedings{allen2007plow,
	title        = {{PLOW}: A collaborative task learning agent},
	author       = {James Allen and Nathanael Chambers and George Ferguson and Lucian Galescu and Hyuckchul Jung and Mary Swift and William Taysom},
	year         = 2007,
	booktitle    = {Association for the Advancement of Artificial Intelligence (AAAI)},
	pages        = {1514--1519}
}
@book{allen2014reasoning,
	title        = {Reasoning about plans},
	author       = {James Allen and Henry Kautz and Richard Pelavin and Josh Tenenberg},
	year         = 2014,
	publisher    = {Morgan Kaufmann}
}
@article{allen2016first,
	title        = {First Efficient Convergence for Streaming k-{PCA}: a Global, Gap-Free, and Near-Optimal Rate},
	author       = {Allen-Zhu, Zeyuan and Li, Yuanzhi},
	year         = 2016,
	journal      = {arXiv preprint arXiv:1607.07837}
}
@article{allen2017natasha,
	title        = {Natasha 2: Faster non-convex optimization than {SGD}},
	author       = {Allen-Zhu, Zeyuan},
	year         = 2017,
	journal      = {arXiv preprint arXiv:1708.08694}
}
@article{allen2018convergence,
	title        = {On the convergence rate of training recurrent neural networks},
	author       = {Allen-Zhu, Zeyuan and Li, Yuanzhi and Song, Zhao},
	year         = 2018,
	journal      = {arXiv preprint arXiv:1810.12065}
}
@article{allen2018convergencetheory,
	title        = {A Convergence Theory for Deep Learning via Over-Parameterization},
	author       = {Allen-Zhu, Zeyuan and Li, Yuanzhi and Song, Zhao},
	year         = 2018,
	month        = nov,
	journal      = {arXiv preprint arXiv:1811.03962}
}
@article{allen2019can,
	title        = {What can resnet learn efficiently, going beyond kernels?},
	author       = {Allen-Zhu, Zeyuan and Li, Yuanzhi},
	year         = 2019,
	journal      = {arXiv preprint arXiv:1905.10337}
}
@article{Allenzhu2016Katyusha,
	title        = {{Katyusha: The First Direct Acceleration of Stochastic Gradient Methods}},
	author       = {{Allen-Zhu}, Zeyuan},
	year         = 2016,
	month        = mar,
	journal      = {ArXiv e-prints},
	volume       = {abs/1603.05953}
}
@article{allman11identifiability,
	title        = {Identifiability of 2-tree mixtures for group-based models},
	author       = {Elizabeth S. Allman and Sonja Petrovi and John A. Rhodes and Seth Sullivant},
	year         = 2011,
	journal      = {Transactions on Computational Biology and Bioinformatics},
	volume       = 8,
	pages        = {710--722}
}
@inproceedings{ALO-bss,
	title        = {{Spectral Sparsification and Regret Minimization Beyond Multiplicative Updates}},
	author       = {{Allen-Zhu}, Zeyuan and Liao, Zhenyu and Orecchia, Lorenzo},
	year         = 2015,
	booktitle    = {Proceedings of the 47th Annual ACM Symposium on Theory of Computing},
	series       = {STOC~'15}
}
@inproceedings{ALO-sdp-parallel,
	title        = {Using Optimization to Obtain a Width-Independent, Parallel, Simpler, and Faster Positive {SDP} Solver},
	author       = {{Allen-Zhu}, Zeyuan and Lee, Yin Tat and Orecchia, Lorenzo},
	year         = 2016,
	booktitle    = {Proceedings of the 27th ACM-SIAM Symposium on Discrete Algorithms},
	series       = {SODA~'16}
}
@article{alon1999space,
	title        = {The space complexity of approximating the frequency moments},
	author       = {Alon, Noga and Matias, Yossi and Szegedy, Mario},
	year         = 1999,
	journal      = {Journal of Computer and system sciences},
	publisher    = {Elsevier},
	volume       = 58,
	number       = 1,
	pages        = {137--147}
}
@article{alon2006approximating,
	title        = {Approximating the cut-norm via {G}rothendieck's inequality},
	author       = {N. Alon and A. Naor},
	year         = 2006,
	journal      = {SIAM Journal on Computing},
	volume       = 35,
	number       = 4,
	pages        = {787--803}
}
@article{Alon86,
	title        = {Eigenvalues and expanders},
	author       = {Noga Alon},
	year         = 1986,
	journal      = {Combinatorica},
	volume       = 6,
	number       = 2,
	pages        = {83--96}
}
@article{alphago16,
	title        = {Mastering the game of {G}o with deep neural networks and tree search},
	author       = {Silver, David and Huang, Aja and Maddison, Chris J and Guez, Arthur and Sifre, Laurent and Van Den Driessche, George and Schrittwieser, Julian and Antonoglou, Ioannis and Panneershelvam, Veda and Lanctot, Marc and others},
	year         = 2016,
	journal      = {Nature},
	publisher    = {Nature Research},
	volume       = 529,
	number       = 7587,
	pages        = {484--489}
}
@article{alphago17,
	title        = {Mastering the game of {G}o without human knowledge},
	author       = {Silver, David and Schrittwieser, Julian and Simonyan, Karen and Antonoglou, Ioannis and Huang, Aja and Guez, Arthur and Hubert, Thomas and Baker, Lucas and Lai, Matthew and Bolton, Adrian and others},
	year         = 2017,
	journal      = {Nature},
	publisher    = {Nature Publishing Group},
	volume       = 550,
	number       = 7676,
	pages        = 354
}
@inproceedings{alshawi11nlf,
	title        = {Deterministic Statistical Mapping of Sentences to Underspecified Semantics},
	author       = {Hiyan Alshawi and Pi-Chuan Chang and Michael Ringgaard},
	year         = 2011,
	booktitle    = {International Conference on Compositional Semantics (IWCS)},
	pages        = {15--24}
}
@inproceedings{alshiekh2018safe,
	title        = {Safe reinforcement learning via shielding},
	author       = {Alshiekh, Mohammed and Bloem, Roderick and Ehlers, R{\"u}diger and K{\"o}nighofer, Bettina and Niekum, Scott and Topcu, Ufuk},
	year         = 2018,
	booktitle    = {Thirty-Second AAAI Conference on Artificial Intelligence}
}
@inproceedings{alterovitz2011rapidly,
	title        = {Rapidly-exploring roadmaps: Weighing exploration vs. refinement in optimal motion planning},
	author       = {R. Alterovitz and S. Patil and A. Derbakova},
	year         = 2011,
	booktitle    = {International Conference on Robotics and Automation (ICRA)}
}
@article{altham1973,
	title        = {Rawls' Difference Principle},
	author       = {JE J. Altham},
	year         = 1973,
	journal      = {Philosophy},
	volume       = 48,
	pages        = {75--78}
}
@book{altman1999constrained,
	title        = {Constrained Markov decision processes},
	author       = {Altman, Eitan},
	year         = 1999,
	publisher    = {CRC Press},
	volume       = 7
}
@inproceedings{AltTensorDecomp:COLT2015,
	title        = {{Learning Overcomplete Latent Variable Models through Tensor Methods}},
	author       = {A. Anandkumar and R. Ge and M. Janzamin},
	year         = 2015,
	month        = jul,
	booktitle    = {Proceedings of the Conference on Learning Theory (COLT)},
	address      = {Paris, France}
}
@article{AltTensorDecomp2014,
	title        = {{Guaranteed Non-Orthogonal Tensor Decomposition via Alternating Rank-$1$ Updates}},
	author       = {Anima Anandkumar and Rong Ge and Majid Janzamin},
	year         = 2014,
	month        = feb,
	journal      = {arXiv preprint arXiv:1402.5180}
}
@inproceedings{ALY2016-geometry,
	title        = {{Optimization Algorithms for Faster Computational Geometry}},
	author       = {{Allen-Zhu}, Zeyuan and Liao, Zhenyu and Yuan, Yang},
	year         = 2016,
	booktitle    = {ICALP}
}
@inproceedings{alzantot2018adversarial,
	title        = {Generating Natural Language Adversarial Examples},
	author       = {Moustafa Alzantot and Yash Sharma and Ahmed Elgohary and Bo-Jhang Ho and Mani Srivastava and Kai-Wei Chang},
	year         = 2018,
	booktitle    = {Empirical Methods in Natural Language Processing (EMNLP)}
}
@inproceedings{AM05,
	title        = {On Spectral Learning of Mixtures of Distributions},
	author       = {D. Achlioptas and F. McSherry},
	year         = 2005,
	booktitle    = {COLT}
}
@article{amari1998natural,
	title        = {Natural gradient works efficiently in learning},
	author       = {Amari, Shun-Ichi},
	year         = 1998,
	journal      = {Neural computation},
	publisher    = {MIT Press},
	volume       = 10,
	number       = 2,
	pages        = {251--276}
}
@article{amari2002geometrical,
	title        = {Geometrical singularities in the neuromanifold of multilayer perceptrons},
	author       = {Amari, Shun-ichi and Park, Hyeyoung and Ozeki, Tomoko},
	year         = 2002,
	journal      = {Advances in neural information processing systems},
	volume       = 1,
	pages        = {343--350}
}
@article{amari2006singularities,
	title        = {Singularities affect dynamics of learning in neuromanifolds},
	author       = {Amari, Shun-Ichi and Park, Hyeyoung and Ozeki, Tomoko},
	year         = 2006,
	journal      = {Neural computation},
	publisher    = {MIT Press},
	volume       = 18,
	number       = 5,
	pages        = {1007--1065}
}
@inproceedings{amato2018decision,
	title        = {Decision-Making Under Uncertainty in Multi-Agent and Multi-Robot Systems: Planning and Learning.},
	author       = {Amato, Christopher},
	year         = 2018,
	booktitle    = {IJCAI},
	pages        = {5662--5666}
}
@inproceedings{ambainis2000quantum,
	title        = {Quantum lower bounds by quantum arguments},
	author       = {Ambainis, Andris},
	year         = 2000,
	booktitle    = {Proceedings of the thirty-second annual ACM symposium on Theory of computing},
	pages        = {636--643},
	organization = {ACM}
}
@article{amelunxen2014living,
	title        = {Living on the edge: Phase transitions in convex programs with random data},
	author       = {Amelunxen, Dennis and Lotz, Martin and McCoy, Michael B and Tropp, Joel A},
	year         = 2014,
	journal      = {Information and Inference: A Journal of the IMA},
	publisher    = {OUP},
	volume       = 3,
	number       = 3,
	pages        = {224--294}
}
@inproceedings{amershi2015modeltracker,
	title        = {Modeltracker: Redesigning performance analysis tools for machine learning},
	author       = {Saleema Amershi and Max Chickering and Steven M Drucker and Bongshin Lee and Patrice Simard and Jina Suh},
	year         = 2015,
	booktitle    = {Conference on Human Factors in Computing Systems (CHI)},
	pages        = {337--346}
}
@inproceedings{ames2019control,
	title        = {Control barrier functions: Theory and applications},
	author       = {Ames, Aaron D and Coogan, Samuel and Egerstedt, Magnus and Notomista, Gennaro and Sreenath, Koushil and Tabuada, Paulo},
	year         = 2019,
	booktitle    = {2019 18th European Control Conference (ECC)},
	pages        = {3420--3431},
	organization = {IEEE}
}
@inproceedings{amini2003semisupervised,
	title        = {Semi-Supervised Learning with Explicit Misclassification Modeling},
	author       = {Massih-Reza Amini and Patrick Gallinari},
	year         = 2003,
	booktitle    = {International Joint Conference on Artificial Intelligence (IJCAI)}
}
@inproceedings{amit2007uncovering,
	title        = {Uncovering shared structures in multiclass classification},
	author       = {Amit, Yonatan and Fink, Michael and Srebro, Nathan and Ullman, Shimon},
	year         = 2007,
	booktitle    = {Proceedings of the 24th international conference on Machine learning},
	pages        = {17--24},
	organization = {ACM}
}
@inproceedings{amodei2016,
	title        = {Deep Speech 2 End to End Speech Recognition in {E}nglish and Mandarin},
	author       = {Dario Amodei and others},
	year         = 2016,
	booktitle    = {International Conference on Machine Learning (ICML)},
	pages        = {173--182}
}
@article{amodei2016concrete,
	title        = {Concrete problems in {AI} safety},
	author       = {Dario Amodei and Chris Olah and Jacob Steinhardt and Paul Christiano and John Schulman and Dan Mané},
	year         = 2016,
	journal      = {arXiv preprint arXiv:1606.06565}
}
@inproceedings{amorim2018automated,
	title        = {Automated essay scoring in the presence of biased ratings},
	author       = {Evelin Amorim and Marcia Can{\c{c}}ado and Adriano Veloso},
	year         = 2018,
	booktitle    = {Association for Computational Linguistics (ACL)},
	pages        = {229--237}
}
@inproceedings{amos2017input,
	title        = {Input convex neural networks},
	author       = {Amos, Brandon and Xu, Lei and Kolter, J Zico},
	year         = 2017,
	booktitle    = {International Conference on Machine Learning},
	pages        = {146--155},
	organization = {PMLR}
}
@article{AMP2010,
	title        = {The dynamics of message passing on dense graphs, with applications to compressed sensing},
	author       = {Mohsen Bayati and Andrea Montanari},
	year         = 2010,
	month        = jan,
	journal      = {arXiv preprint arXiv:1001.3448}
}
@article{AMR09,
	title        = {{Identifiability of parameters in latent structure models with many observed variables}},
	author       = {E. S. Allman and C. Matias and J. A. Rhodes},
	year         = 2009,
	journal      = {The Annals of Statistics},
	volume       = 37,
	number       = {6A},
	pages        = {3099--3132}
}
@article{anand2012semantic,
	title        = {Contextually Guided Semantic Labeling and Search for 3{D} Point Clouds},
	author       = {A. Anand and H. Koppula and T. Joachims and A. Saxena},
	year         = 2012,
	journal      = {International Journal of Robotics Research (IJRR)},
	volume       = 32
}
@inproceedings{anandkumar11tree,
	title        = {Spectral Methods for Learning Multivariate Latent Tree Structure},
	author       = {Animashree Anandkumar and Kamalika Chaudhuri and Daniel Hsu and Sham M. Kakade and Le Song and Tong Zhang},
	year         = 2011,
	booktitle    = {Advances in Neural Information Processing Systems (NeurIPS)}
}
@inproceedings{anandkumar12lda,
	title        = {Two {SVD}s Suffice: Spectral decompositions for probabilistic topic modeling and latent {D}irichlet allocation},
	author       = {Animashree Anandkumar and Dean P. Foster and Daniel Hsu and Sham M. Kakade and Yi-Kai Liu},
	year         = 2012,
	booktitle    = {Advances in Neural Information Processing Systems (NeurIPS)}
}
@inproceedings{anandkumar12moments,
	title        = {A Method of Moments for Mixture Models and Hidden {M}arkov Models},
	author       = {Animashree Anandkumar and Daniel Hsu and Sham M. Kakade},
	year         = 2012,
	booktitle    = {Conference on Learning Theory (COLT)}
}
@inproceedings{anandkumar13linear,
	title        = {Learning Linear {B}ayesian Networks with Latent Variables},
	author       = {Animashree Anandkumar and Daniel Hsu and Adel Javanmard and Sham M. Kakade},
	year         = 2013,
	booktitle    = {International Conference on Machine Learning (ICML)}
}
@article{anandkumar13tensor,
	title        = {Tensor decompositions for learning latent variable models},
	author       = {Anima Anandkumar and Rong Ge and Daniel Hsu and Sham M. Kakade and Matus Telgarsky},
	year         = 2013,
	journal      = {arXiv}
}
@inproceedings{anandkumar2013community,
	title        = {A Tensor Spectral Approach to Learning Mixed Membership Community Models},
	author       = {Animashree Anandkumar and Rong Ge and Daniel Hsu and Sham Kakade},
	year         = 2013,
	booktitle    = {Conference on Learning Theory (COLT)},
	pages        = {867--881}
}
@article{anandkumar2013overcomplete,
	title        = {When are Overcomplete Representations Identifiable? Uniqueness of Tensor Decompositions Under Expansion Constraints},
	author       = {Animashree Anandkumar and Daniel Hsu and Majid Janzamin and Sham Kakade},
	year         = {2013 2013},
	journal      = {arXiv}
}
@inproceedings{anandkumar2015learning,
	title        = {Learning overcomplete latent variable models through tensor methods},
	author       = {Anandkumar, Animashree and Ge, Rong and Janzamin, Majid},
	year         = 2015,
	booktitle    = {Proceedings of the Conference on Learning Theory (COLT), Paris, France}
}
@article{anandkumar2016analyzing,
	title        = {Analyzing tensor power method dynamics in overcomplete regime},
	author       = {Anandkumar, Anima and Ge, Rong and Janzamin, Majid},
	year         = 2016,
	journal      = {JMLR}
}
@inproceedings{anandkumar2016efficient,
	title        = {Efficient approaches for escaping higher order saddle points in non-convex optimization},
	author       = {Anandkumar, Animashree and Ge, Rong},
	year         = 2016,
	journal      = {arXiv preprint arXiv:1602.05908},
	booktitle    = {Conference on learning theory},
	pages        = {81--102},
	organization = {PMLR}
}
@inproceedings{AnandkumarEtal:community12,
	title        = {{A Tensor Spectral Approach to Learning Mixed Membership Community Models}},
	author       = {A. Anandkumar and R. Ge and D. Hsu and S. M. Kakade},
	year         = 2013,
	month        = jun,
	booktitle    = {Conference on Learning Theory (COLT)}
}
@article{AnandkumarEtal:communityimplementation13,
	title        = {{Fast Detection of Overlapping Communities via Online Tensor Methods}},
	author       = {F. Huang and U. N. Niranjan and M. Hakeem and A. Anandkumar},
	year         = 2013,
	month        = sep,
	journal      = {ArXiv 1309.0787}
}
@article{AnandkumarEtal:lda12,
	title        = {{Two SVDs Suffice: Spectral Decompositions for Probabilistic Topic Modeling and Latent Dirichlet Allocation}},
	author       = {A. Anandkumar and D. P. Foster and D. Hsu and S. M. Kakade and Y. K. Liu},
	year         = 2013,
	month        = jul,
	journal      = {to appear in the special issue of Algorithmica on New Theoretical Challenges in Machine Learning},
	note         = {arXiv:1204.6703},
	eprint       = {arXiv:1204.6703}
}
@inproceedings{AnandkumarEtal:NIPS13,
	title        = {{When are Overcomplete Topic Models Identifiable? Uniqueness of Tensor Tucker Decompositions with Structured Sparsity}},
	author       = {A. Anandkumar and D. Hsu and M. Janzamin and S. M. Kakade},
	year         = 2013,
	month        = dec,
	booktitle    = {Neural Information Processing (NIPS)}
}
@article{AnandkumarEtal:tensor12,
	title        = {{Tensor Methods for Learning Latent Variable Models}},
	author       = {A. Anandkumar and R. Ge and D. Hsu and S. M. Kakade and M. Telgarsky},
	year         = 2012,
	month        = oct,
	journal      = {Available at arXiv:1210.7559}
}
@inproceedings{AnandkumarHsuKakade:graphmixturesNIPS12,
	title        = {Learning Mixtures of Tree Graphical Models},
	author       = {A. Anandkumar and D. Hsu and F. Huang and S.M. Kakade},
	year         = 2012,
	booktitle    = {Advances in Neural Information Processing Systems 25}
}
@mastersthesis{anca2009math,
	title        = {Natural Language and Mathematics Processing for Applicable Theorem Search},
	author       = {Stefan Anca},
	year         = 2009,
	school       = {Jacobs University Bremen}
}
@book{andersen1995linear,
	title        = {Linear and graphical models for the multivariate complex normal distribution},
	author       = {
		Heidi H. Andersen and Malene Hojbjerre and Dorte Sorensen and Poul

		Svante Eriksen
	},
	year         = 1995,
	publisher    = {Springer-Verlag},
	series       = {Lecture notes in statistics},
	isbn         = 9780387945217,
	lccn         = 95019290,
	owner        = {leili},
	timestamp    = {2010.11.13}
}
@inproceedings{AndersenLang06WWW,
	title        = {Communities from seed sets},
	author       = {Andersen, Reid and Lang, Kevin J.},
	year         = 2006,
	series       = {WWW '06},
	pages        = {223--232}
}
@inproceedings{AndersenLang2008,
	title        = {An algorithm for improving graph partitions},
	author       = {Andersen, Reid and Lang, Kevin J.},
	year         = 2008,
	series       = {SODA},
	pages        = {651--660}
}
@inproceedings{AndersenPeres09,
	title        = {Finding sparse cuts locally using evolving sets},
	author       = {Reid Andersen and Yuval Peres},
	year         = 2009,
	series       = {STOC}
}
@article{anderson1949estimation,
	title        = {Estimation of the parameters of a single equation in a complete system of stochastic equations},
	author       = {Theodore W. Anderson and Herman Rubin},
	year         = 1949,
	journal      = {The Annals of Mathematical Statistics},
	pages        = {46--63}
}
@article{anderson1950asymptotic,
	title        = {The asymptotic properties of estimates of the parameters of a single equation in a complete system of stochastic equations},
	author       = {Theodore W. Anderson and Herman Rubin},
	year         = 1950,
	journal      = {The Annals of Mathematical Statistics},
	pages        = {570--582}
}
@book{anderson1979optimal,
	title        = {Optimal Filtering},
	author       = {Brian D. O. Anderson and John B. Moore},
	year         = 1979,
	publisher    = {Prentice Hall},
	address      = {New York}
}
@article{Anderson2014,
	title        = {{An Efficient Algorithm for Unweighted Spectral Graph Sparsification}},
	author       = {Anderson, David G. and Gu, Ming and Melgaard, Christopher},
	year         = 2014,
	month        = oct,
	journal      = {ArXiv e-prints},
	volume       = {abs/1410.4273},
	url          = {http://arxiv.org/abs/1410.4273v1},
	eprint       = {1410.4273}
}
@inproceedings{anderson2014blessing,
	title        = {The more, the merrier: the blessing of dimensionality for learning large {G}aussian mixtures},
	author       = {Joseph Anderson and Mikhail Belkin and Navin Goyal and Luis Rademacher and James R. Voss},
	year         = 2014,
	booktitle    = {Conference on Learning Theory (COLT)}
}
@inproceedings{anderson2015spectral,
	title        = {{Spectral Gap Error Bounds for Improving CUR Matrix Decomposition and the Nystr\"{o}m Method}},
	author       = {David Anderson and Simon Du and Michael Mahoney and Christopher Melgaard and Kunming Wu and Ming Gu},
	year         = 2015,
	month        = {09--12 May},
	booktitle    = {Proceedings of the Eighteenth International Conference on Artificial Intelligence and Statistics},
	publisher    = {PMLR},
	address      = {San Diego, California, USA},
	series       = {Proceedings of Machine Learning Research},
	volume       = 38,
	pages        = {19--27},
	url          = {http://proceedings.mlr.press/v38/anderson15.html},
	editor       = {Guy Lebanon and S. V. N. Vishwanathan},
	pdf          = {http://proceedings.mlr.press/v38/anderson15.pdf},
	abstract     = {The CUR matrix decomposition and the related Nyström method build low-rank approximations of data matrices by selecting a small number of representative rows and columns of the data. Here, we introduce novel \emphspectral gap error bounds that judiciously exploit the potentially rapid spectrum decay in the input matrix, a most common occurrence in machine learning and data analysis. Our error bounds are much tighter than existing ones for matrices with rapid spectrum decay, and they justify the use of a constant amount of oversampling relative to the rank parameter k, i.e, when the number of columns/rows is \ell=k+ O(1). We demonstrate our analysis on a novel deterministic algorithm, \emphStableCUR, which additionally eliminates a previously unrecognized source of potential instability in CUR decompositions. While our algorithm accepts any method of row and column selection, we implement it with a recent column selection scheme with strong singular value bounds. Empirical results on various classes of real world data matrices demonstrate that our algorithm is as efficient as and often outperforms competing algorithms.}
}
@inproceedings{anderson2018butd,
	title        = {Bottom-Up and Top-Down Attention for Image Captioning and Visual Question Answering},
	author       = {Peter Anderson and X. He and C. Buehler and Damien Teney and Mark Johnson and Stephen Gould and Lei Zhang},
	year         = 2018,
	booktitle    = {Computer Vision and Pattern Recognition (CVPR)},
	pages        = {6077--6086}
}
@inproceedings{anderson2018vision,
	title        = {Vision-and-language navigation: Interpreting visually-grounded navigation instructions in real environments},
	author       = {Peter Anderson and Qi Wu and Damien Teney and Jake Bruce and Mark Johnson and Niko S{\"u}nderhauf and Ian Reid and Stephen Gould and Anton van den Hengel},
	year         = 2018,
	booktitle    = {Computer Vision and Pattern Recognition (CVPR)}
}
@article{anderson2020neurosymbolic,
	title        = {Neurosymbolic reinforcement learning with formally verified exploration},
	author       = {Anderson, Greg and Verma, Abhinav and Dillig, Isil and Chaudhuri, Swarat},
	year         = 2020,
	journal      = {arXiv preprint arXiv:2009.12612}
}
@inproceedings{ando07,
	title        = {Two-view feature generation model for semi-supervised learning},
	author       = {R. Ando and T. Zhang},
	year         = 2007,
	booktitle    = {ICML}
}
@inproceedings{ando2007two,
	title        = {Two-view feature generation model for semi-supervised learning},
	author       = {Rie Kubota Ando and Tong Zhang},
	year         = 2007,
	booktitle    = {Conference on Learning Theory (COLT)},
	pages        = {25--32}
}
@inproceedings{ando2017deep,
	title        = {Deep over-sampling framework for classifying imbalanced data},
	author       = {Ando, Shin and Huang, Chun Yuan},
	year         = 2017,
	booktitle    = {Joint European Conference on Machine Learning and Knowledge Discovery in Databases},
	pages        = {770--785}
}
@phdthesis{Andoni2009thesis,
	title        = {Nearest Neighbor Search: the Old, the New, and the Impossible},
	author       = {Andoni, Alexandr},
	year         = 2009,
	school       = {MIT}
}
@inproceedings{andoni2014learning,
	title        = {Learning sparse polynomial functions},
	author       = {Andoni, Alexandr and Panigrahy, Rina and Valiant, Gregory and Zhang, Li},
	year         = 2014,
	booktitle    = {Proceedings of the Twenty-Fifth Annual ACM-SIAM Symposium on Discrete Algorithms},
	pages        = {500--510},
	organization = {Society for Industrial and Applied Mathematics}
}
@article{andor2016globally,
	title        = {Globally normalized transition-based neural networks},
	author       = {Daniel Andor and Chris Alberti and David Weiss and Aliaksei Severyn and Alessandro Presta and Kuzman Ganchev and Slav Petrov and Michael Collins},
	year         = 2016,
	journal      = {arXiv preprint arXiv:1603.06042}
}
@inproceedings{andre2002state,
	title        = {State abstraction for programmable reinforcement learning agents},
	author       = {David Andre and Stuart J Russell},
	year         = 2002,
	booktitle    = {Association for the Advancement of Artificial Intelligence (AAAI)},
	pages        = {119--125}
}
@phdthesis{andre2003programmable,
	title        = {Programmable reinforcement learning agents},
	author       = {D. Andre},
	year         = 2003,
	school       = {University of California, Berkeley}
}
@article{andreas2013generative,
	title        = {A Generative Model of Vector Space Semantics},
	author       = {Andreas, Jacob and Ghahramani, Zoubin},
	year         = 2013,
	journal      = {Transactions of the Association for Computational Linguistics}
}
@inproceedings{andreas2014grounding,
	title        = {Grounding Language with Points and Paths in Continuous Spaces},
	author       = {Jacob Andreas and Dan Klein},
	year         = 2014,
	booktitle    = {Computational Natural Language Learning (CoNLL)},
	pages        = {58--67}
}
@inproceedings{andreas2014when,
	title        = {When and why are log-linear models self-normalizing?},
	author       = {Jacob Andreas and Dan Klein},
	year         = 2014,
	booktitle    = {Proceedings of the Annual Meeting of the North American Chapter of the Association for Computational Linguistics}
}
@inproceedings{andreas2015alignment,
	title        = {Alignment-Based Compositional Semantics for Instruction Following},
	author       = {Jacob Andreas and Dan Klein},
	year         = 2015,
	booktitle    = {Empirical Methods in Natural Language Processing (EMNLP)}
}
@inproceedings{andreas2016learning,
	title        = {Learning to Compose Neural Networks for Question Answering},
	author       = {Jacob  Andreas and Marcus   Rohrbach and Trevor   Darrell and Dan  Klein},
	year         = 2016,
	booktitle    = {Association for Computational Linguistics (ACL)},
	pages        = {1545--1554}
}
@inproceedings{andreas2016neural,
	title        = {Neural module networks},
	author       = {Jacob Andreas and Marcus Rohrbach and Trevor Darrell and Dan Klein},
	year         = 2016,
	booktitle    = {Computer Vision and Pattern Recognition (CVPR)}
}
@inproceedings{andreas2016reasoning,
	title        = {Reasoning about Pragmatics with Neural Listeners and Speakers},
	author       = {Jacob  Andreas and Dan  Klein},
	year         = 2016,
	booktitle    = {Empirical Methods in Natural Language Processing (EMNLP)},
	pages        = {1173--1182}
}
@article{andreas2017learning,
	title        = {Learning with Latent Language},
	author       = {Jacob Andreas and Dan Klein and Sergey Levine},
	year         = 2017,
	journal      = {arXiv preprint arXiv:1711.00482}
}
@inproceedings{andreas2017sketches,
	title        = {Modular Multitask Reinforcement Learning with Policy Sketches},
	author       = {Jacob Andreas and Dan Klein and Sergey Levine},
	year         = 2017,
	booktitle    = {International Conference on Machine Learning (ICML)}
}
@inproceedings{andreas2020geca,
	title        = {Good-Enough Compositional Data Augmentation},
	author       = {Jacob Andreas},
	year         = 2020,
	booktitle    = {Association for Computational Linguistics (ACL)}
}
@article{andreassen2021evolution,
	title        = {The Evolution of Out-of-Distribution Robustness Throughout Fine-Tuning},
	author       = {Anders Andreassen and Yasaman Bahri and Behnam Neyshabur and Rebecca Roelofs},
	year         = 2021,
	journal      = {arXiv}
}
@inproceedings{andrew2013deep,
	title        = {Deep canonical correlation analysis},
	author       = {Galen Andrew and Raman Arora and Jeff Bilmes and Karen Livescu},
	year         = 2013,
	booktitle    = {International Conference on Machine Learning (ICML)},
	pages        = {1247--1255}
}
@inproceedings{andrews2012name,
	title        = {Name phylogeny: A generative model of string variation},
	author       = {Nocholas Andrews and Jason Eisner and Mark Dredze},
	year         = 2012,
	booktitle    = {Empirical Methods in Natural Language Processing (EMNLP)},
	pages        = {344--355}
}
@inproceedings{andrieu2005line,
	title        = {On-line Parameter Estimation in General State-Space Models},
	author       = {Andrieu, C. and Doucet, A. and Tadic, V.},
	year         = 2005,
	booktitle    = {Proceedings of the 44th Conference on Decision and Control},
	pages        = {332--337}
}
@article{andrieu2008tutorial,
	title        = {A tutorial on adaptive {MCMC}},
	author       = {Christophe Andrieu and Johannes Thoms},
	year         = 2008,
	journal      = {Statistics and Computing},
	volume       = 18,
	number       = 4,
	pages        = {343--373}
}
@article{andrieu2010particle,
	title        = {Particle {M}arkov chain {M}onte {C}arlo methods},
	author       = {Christophe Andrieu and Arnaud Doucet and Roman Holenstein},
	year         = 2010,
	journal      = {Journal of the Royal Statistical Society: Series B (Statistical Methodology)},
	volume       = 72,
	number       = 3,
	pages        = {269--342}
}
@article{androutsopoulos2010survey,
	title        = {A survey of paraphrasing and textual entailment methods},
	author       = {Ion Androutsopoulos and Prodromos Malakasiotis},
	year         = 2010,
	journal      = {Journal of Artificial Intelligence Research (JAIR)},
	volume       = 38,
	pages        = {135--187}
}
@article{androutsopoulos95nlidb,
	title        = {Natural Language Interfaces to Databases -- An Introduction},
	author       = {I. Androutsopoulos and G. D. Ritchie and P. Thanisch},
	year         = 1995,
	journal      = {Journal of Natural Language Engineering},
	volume       = 1,
	pages        = {29--81}
}
@inproceedings{andrychowicz2016learning,
	title        = {Learning to learn by gradient descent by gradient descent},
	author       = {Marcin Andrychowicz and Misha Denil and Sergio Gomez and Matthew W Hoffman and David Pfau and Tom Schaul and Brendan Shillingford and Nando De Freitas},
	year         = 2016,
	booktitle    = {Advances in neural information processing systems},
	pages        = {3981--3989}
}
@article{andrychowicz2017hindsight,
	title        = {Hindsight Experience Replay},
	author       = {Marcin Andrychowicz and Filip Wolski and Alex Ray and Jonas Schneider and Rachel Fong and Peter Welinder and Bob McGrew and Josh Tobin and Pieter Abbeel and Wojciech Zaremba},
	year         = 2017,
	journal      = {arXiv preprint arXiv:1707.01495}
}
@inproceedings{angeli10generation,
	title        = {A Simple Domain-Independent Probabilistic Approach to Generation},
	author       = {Gabor Angeli and Percy Liang and Dan Klein},
	year         = 2010,
	booktitle    = {Empirical Methods in Natural Language Processing (EMNLP)}
}
@inproceedings{angeli2014combining,
	title        = {Combining distant and partial supervision for relation extraction},
	author       = {Gabor Angeli and Julie Tibshirani and Jean Y Wu and Christopher D Manning},
	year         = 2014,
	booktitle    = {Empirical Methods in Natural Language Processing (EMNLP)}
}
@inproceedings{angeli2014naturalli,
	title        = {NaturalLI: Natural Logic Inference for Common Sense Reasoning},
	author       = {Gabor Angeli and Christopher D. Manning},
	year         = 2014,
	booktitle    = {Empirical Methods in Natural Language Processing (EMNLP)}
}
@inproceedings{angeli2015openie,
	title        = {Leveraging Linguistic Structure for Open Domain Information Extraction},
	author       = {Gabor Angeli and Melvin Johnson Premkumar and Christopher D. Manning},
	year         = 2015,
	booktitle    = {Association for Computational Linguistics (ACL)}
}
@inproceedings{angeli2016naturalli,
	title        = {Combining Natural Logic and Shallow Reasoning for Question Answering},
	author       = {Gabor Angeli and Neha Nayak and Christopher D. Manning},
	year         = 2016,
	booktitle    = {Association for Computational Linguistics (ACL)}
}
@article{angluin88queries,
	title        = {Queries and concept learning},
	author       = {D. Angluin},
	year         = 1988,
	journal      = {Machine Learning},
	volume       = 2,
	number       = 4,
	pages        = {319--342}
}
@book{angrist2009econometrics,
	title        = {Mostly Harmless Econometrics: An Empiricist's Companian},
	author       = {Joashua D. Angrist and Jorn-Steffen Pischke},
	year         = 2009,
	publisher    = {Princeton University Press}
}
@inproceedings{anguita2013har,
	title        = {A Public Domain Dataset for Human Activity Recognition Using Smartphones},
	author       = {Davide Anguita and Alessandro Ghio and Luca Oneto and Xavier Parra and Jorge L. Reyes-Ortiz},
	year         = 2013,
	booktitle    = {21st European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning (ESANN)}
}
@article{angwin2016machine,
	title        = {Machine bias: There’s software used across the country to predict future criminals. and it’s biased against blacks},
	author       = {Julia Angwin and Jeff Larson and Surya Mattu and Lauren Kirchner},
	year         = 2016,
	journal      = {ProPublica},
	volume       = 23
}
@article{anstreicher2002improved,
	title        = {Improved complexity for maximum volume inscribed ellipsoids},
	author       = {Anstreicher, Kurt M.},
	year         = 2002,
	journal      = {SIAM Journal on Optimization},
	publisher    = {SIAM},
	volume       = 13,
	number       = 2,
	pages        = {309--320}
}
@article{antoniak74dpmix,
	title        = {Mixtures of {D}irichlet Processes with Applications to {B}ayesian Nonparametric Problems},
	author       = {C. E. Antoniak},
	year         = 1974,
	journal      = {Annals of Statistics},
	volume       = 2,
	pages        = {1152--1174}
}
@article{antoniou2017data,
	title        = {Data augmentation generative adversarial networks},
	author       = {Antreas Antoniou and Amos Storkey and Harrison Edwards},
	year         = 2017,
	journal      = {arXiv preprint arXiv:1711.04340}
}
@inproceedings{AO-lp-coordinate,
	title        = {{Nearly-Linear Time Positive LP Solver with Faster Convergence Rate}},
	author       = {{Allen-Zhu}, Zeyuan and Orecchia, Lorenzo},
	year         = 2015,
	booktitle    = {Proceedings of the 47th Annual ACM Symposium on Theory of Computing},
	series       = {STOC~'15}
}
@inproceedings{AO-lp-parallel,
	title        = {Using Optimization to Break the Epsilon Barrier: A Faster and Simpler Width-Independent Algorithm for Solving Positive Linear Programs in Parallel},
	author       = {{Allen-Zhu}, Zeyuan and Orecchia, Lorenzo},
	year         = 2015,
	month        = jul,
	journal      = {ArXiv e-prints},
	booktitle    = {Proceedings of the 26th ACM-SIAM Symposium on Discrete Algorithms},
	series       = {SODA~'15},
	volume       = {abs/1407.1925},
	bibsource    = {DBLP, http://dblp.uni-trier.de}
}
@article{AO-survey-nesterov,
	title        = {Linear Coupling: An Ultimate Unification of Gradient and Mirror Descent},
	author       = {{Allen-Zhu}, Zeyuan and Orecchia, Lorenzo},
	year         = 2014,
	month        = jul,
	journal      = {ArXiv e-prints},
	volume       = {abs/1407.1537},
	bibsource    = {DBLP, http://dblp.uni-trier.de}
}
@inproceedings{apvz14,
	title        = {Learning polynomials with neural networks},
	author       = {Andoni, Alexandr and Panigrahy, Rina and Valiant, Gregory and Zhang, Li},
	year         = 2014,
	booktitle    = {International Conference on Machine Learning (ICML)},
	pages        = {1908--1916}
}
@inproceedings{arasu2003extracting,
	title        = {Extracting structured data from web pages},
	author       = {Arvind Arasu and Hector Garcia-Molina},
	year         = 2003,
	booktitle    = {ACM SIGMOD international conference on Management of data},
	pages        = {337--348}
}
@inproceedings{ardila2020common,
	title        = {Common Voice: A Massively-Multilingual Speech Corpus},
	author       = {Rosana Ardila and Megan Branson and Kelly Davis and Michael Kohler and Josh Meyer and Michael Henretty and Reuben Morais and Lindsay Saunders and Francis Tyers and Gregor Weber},
	year         = 2020,
	booktitle    = {Language Resources and Evaluation Conference (LREC)},
	pages        = {4218--4222}
}
@article{arefyev2020lssurvey,
	title        = {A Comparative Study of Lexical Substitution Approaches based on Neural Language Models},
	author       = {Nikolay Arefyev and Boris Sheludko and Alexander Podolskiy and Alexander Panchenko},
	year         = 2020,
	journal      = {arXiv}
}
@article{argall2009survey,
	title        = {A survey of robot learning from demonstration},
	author       = {B. Argall and S. Chernova and M. Veloso and B. Browning},
	year         = 2009,
	journal      = {RAS},
	volume       = 57
}
@article{argall2018autonomy,
	title        = {Autonomy in rehabilitation robotics: an intersection},
	author       = {Brenna D Argall},
	year         = 2018,
	journal      = {Annual Review of Control, Robotics, and Autonomous Systems},
	volume       = 1,
	pages        = {441--463}
}
@inproceedings{argyriou07feature,
	title        = {Multi-task feature learning},
	author       = {A. Argyriou and T. Evgeniou and M. Pontil},
	year         = 2007,
	booktitle    = {Advances in Neural Information Processing Systems (NeurIPS)},
	pages        = {41--48}
}
@inproceedings{arikan2002interactive,
	title        = {Interactive motion generation from examples},
	author       = {Okan Arikan and D. A. Forsyth},
	year         = 2002,
	booktitle    = {
		SIGGRAPH '02: Proceedings of the 29th annual conference on Computer

		graphics and interactive techniques
	},
	location     = {San Antonio, Texas},
	publisher    = {ACM Press},
	address      = {New York, NY, USA},
	pages        = {483--490},
	doi          = {http://doi.acm.org/10.1145/566570.566606},
	isbn         = {1-58113-521-1}
}
@inproceedings{ariola97cyclic,
	title        = {Cyclic lambda calculi},
	author       = {Zena M. Ariola and Stefan Blom},
	year         = 1997,
	booktitle    = {Theoretical Aspects of Computer Software},
	pages        = {77--106}
}
@inproceedings{aristidou2008predicting,
	title        = {
		Predicting Missing Markers to Drive Real-Time Centre of Rotation

		Estimation
	},
	author       = {Aristidou, Andreas and Cameron, Jonathan and Lasenby, Joan},
	year         = 2008,
	booktitle    = {
		AMDO '08: Proceedings of the 5th international conference on Articulated

		Motion and Deformable Objects
	},
	location     = {Port d'Andratx, Mallorca, Spain},
	publisher    = {Springer-Verlag},
	address      = {Berlin, Heidelberg},
	pages        = {238--247},
	doi          = {http://dx.doi.org/10.1007/978-3-540-70517-8_23},
	isbn         = {978-3-540-70516-1}
}
@inproceedings{arjovsky2017gan,
	title        = {Towards Principled Methods for Training Generative Adversarial Networks},
	author       = {Martin Arjovsky and Leon Bottou},
	year         = 2017,
	booktitle    = {International Conference on Learning Representations (ICLR)}
}
@article{arjovsky2019invariant,
	title        = {Invariant risk minimization},
	author       = {Arjovsky, Martin and Bottou, L{\'e}on and Gulrajani, Ishaan and Lopez-Paz, David},
	year         = 2019,
	journal      = {arXiv preprint arXiv:1907.02893}
}
@inproceedings{arlot10penalty,
	title        = {Data-driven calibration of linear estimators with minimal penalties},
	author       = {Sylvain Arlot and Francis Bach},
	year         = 2010,
	booktitle    = {Advances in Neural Information Processing Systems (NeurIPS)},
	pages        = {46--54}
}
@phdthesis{armando2008sketch,
	title        = {Program Synthesis by Sketching},
	author       = {Armando Solar-Lezama},
	year         = 2008,
	school       = {University of California at Berkeley}
}
@techreport{AroLiLiaMaetal15,
	title        = {A Latent Variable Model Approach to {PMI}-based Word Embeddings},
	author       = {Sanjeev Arora and Yuanzhi Li and Yingyu Liang and Tengyu Ma and Andrej Risteski},
	year         = 2015,
	note         = {\url{http://arxiv.org/abs/1502.03520}},
	institution  = {ArXiV}
}
@inproceedings{aronson2018eye,
	title        = {Eye-hand behavior in human-robot shared manipulation},
	author       = {Reuben M Aronson and Thiago Santini and Thomas C K{\"u}bler and Enkelejda Kasneci and Siddhartha Srinivasa and Henny Admoni},
	year         = 2018,
	booktitle    = {ACM/IEEE International Conference on Human Robot Interaction (HRI)},
	pages        = {4--13}
}
@inproceedings{arora15simple,
	title        = {Simple, Efficient, and Neural Algorithms for Sparse Coding},
	author       = {Sanjeev Arora and Rong Ge and Tengyu Ma and Ankur Moitra},
	year         = 2015,
	booktitle    = {Proceedings of The 28th Conference on Learning Theory, {COLT} 2015, Paris, France, July 3-6, 2015},
	pages        = {113--149},
	url          = {http://jmlr.org/proceedings/papers/v40/Arora15.html},
	crossref     = {DBLP:conf/colt/2015},
	timestamp    = {Tue, 12 Jul 2016 21:51:13 +0200},
	biburl       = {http://dblp.uni-trier.de/rec/bib/conf/colt/AroraGMM15},
	bibsource    = {dblp computer science bibliography, http://dblp.org},
	pp           = {113–149}
}
@inproceedings{arora16inferencetopic,
	title        = {Provable Algorithms for Inference in Topic Models},
	author       = {Sanjeev Arora and Rong Ge and Frederic Koehler and Tengyu Ma and Ankur Moitra},
	year         = 2016,
	booktitle    = {Proceedings of the 33nd International Conference on Machine Learning, {ICML} 2016, New York City, NY, USA, June 19-24, 2016},
	pages        = {2859--2867},
	url          = {http://jmlr.org/proceedings/papers/v48/arorab16.html},
	crossref     = {DBLP:conf/icml/2016},
	timestamp    = {Tue, 03 Jan 2017 13:40:36 +0100},
	biburl       = {http://dblp.uni-trier.de/rec/bib/conf/icml/AroraGKMM16},
	bibsource    = {dblp computer science bibliography, http://dblp.org}
}
@inproceedings{arora2009interactive,
	title        = {Interactive annotation learning with indirect feature voting},
	author       = {Shilpa Arora and Eric Nyberg},
	year         = 2009,
	booktitle    = {Association for Computational Linguistics (ACL)},
	pages        = {55--60}
}
@inproceedings{arora2012learning,
	title        = {Learning topic models--going beyond {SVD}},
	author       = {Sanjeev Arora and Rong Ge and Ankur Moitra},
	year         = 2012,
	booktitle    = {Foundations of Computer Science (FOCS)}
}
@article{Arora2013,
	title        = {{New Algorithms for Learning Incoherent and Overcomplete Dictionaries}},
	author       = {{Arora}, S. and {Ge}, R. and {Moitra}, A.},
	year         = 2013,
	month        = aug,
	journal      = {ArXiv e-prints}
}
@inproceedings{arora2013practical,
	title        = {A practical algorithm for topic modeling with provable guarantees},
	author       = {Arora, Sanjeev and Ge, Rong and Halpern, Yonatan and Mimno, David and Moitra, Ankur and Sontag, David and Wu, Yichen and Zhu, Michael},
	year         = 2013,
	booktitle    = {International Conference on Machine Learning},
	pages        = {280--288},
	organization = {PMLR}
}
@article{arora2014more,
	title        = {More algorithms for provable dictionary learning},
	author       = {Arora, Sanjeev and Bhaskara, Aditya and Ge, Rong and Ma, Tengyu},
	year         = 2014,
	journal      = {arXiv preprint arXiv:1401.0579}
}
@inproceedings{arora2014provable,
	title        = {Provable bounds for learning some deep representations},
	author       = {Arora, Sanjeev and Bhaskara, Aditya and Ge, Rong and Ma, Tengyu},
	year         = 2014,
	booktitle    = {International Conference on Machine Learning},
	pages        = {584--592},
	url          = {http://jmlr.org/proceedings/papers/v32/arora14.html},
	crossref     = {DBLP:conf/icml/2014},
	timestamp    = {Sun, 26 Oct 2014 02:38:30 +0200},
	biburl       = {http://dblp.uni-trier.de/rec/bib/conf/icml/AroraBGM14},
	bibsource    = {dblp computer science bibliography, http://dblp.org}
}
@article{arora2015deep,
	title        = {Why are deep nets reversible: A simple theory, with implications for training},
	author       = {Arora, Sanjeev and Liang, Yingyu and Ma, Tengyu},
	year         = 2015,
	journal      = {arXiv preprint arXiv:1511.05653}
}
@article{arora2015rand,
	title        = {Rand-walk: A latent variable model approach to word embeddings},
	author       = {Arora, Sanjeev and Li, Yuanzhi and Liang, Yingyu and Ma, Tengyu and Risteski, Andrej},
	year         = 2015,
	journal      = {Transactions of the Association for Computational Linguistics}
}
@article{arora2015simple,
	title        = {Simple, efficient, and neural algorithms for sparse coding},
	author       = {Arora, Sanjeev and Ge, Rong and Ma, Tengyu and Moitra, Ankur},
	year         = 2015,
	booktitle    = {Conference on Learning Theory},
	publisher    = {Proceedings of Machine Learning Research},
	pages        = {113--149}
}
@article{arora2016latent,
	title        = {A latent variable model approach to pmi-based word embeddings},
	author       = {Arora, Sanjeev and Li, Yuanzhi and Liang, Yingyu and Ma, Tengyu and Risteski, Andrej},
	year         = 2016,
	journal      = {Transactions of the Association for Computational Linguistics},
	publisher    = {MIT Press},
	volume       = 4,
	pages        = {385--399}
}
@article{arora2016linear,
	title        = {Linear algebraic structure of word senses, with applications to polysemy},
	author       = {Arora, Sanjeev and Li, Yuanzhi and Liang, Yingyu and Ma, Tengyu and Risteski, Andrej},
	year         = 2016,
	journal      = {arXiv preprint arXiv:1601.03764}
}
@inproceedings{arora2016provable,
	title        = {Provable Algorithms for Inference in Topic Models},
	author       = {Arora, Sanjeev and Ge, Rong and Koehler, Frederic and Ma, Tengyu and Moitra, Ankur},
	year         = 2016,
	booktitle    = {The 33rd International Conference on Machine Learning (ICML 2016). arXiv preprint arXiv:1605.08491}
}
@article{arora2016simple,
	title        = {A simple but tough-to-beat baseline for sentence embeddings},
	author       = {Arora, Sanjeev and Liang, Yingyu and Ma, Tengyu},
	year         = 2016,
	booktitle    = {5th International Conference on Learning Representations (ICLR 2017)}
}
@article{arora2016understanding,
	title        = {Understanding deep neural networks with rectified linear units},
	author       = {Arora, Raman and Basu, Amitabh and Mianjy, Poorya and Mukherjee, Anirbit},
	year         = 2016,
	journal      = {arXiv preprint arXiv:1611.01491}
}
@article{arora2017gan,
	title        = {Generalization and Equilibrium in Generative Adversarial Nets (GANs)},
	author       = {Sanjeev Arora and Rong Ge and Yingyu Liang and Tengyu Ma and Yi Zhang},
	year         = 2017,
	journal      = {arXiv}
}
@inproceedings{arora2017generalization,
	title        = {Generalization and equilibrium in generative adversarial nets ({GANs})},
	author       = {Arora, Sanjeev and Ge, Rong and Liang, Yingyu and Ma, Tengyu and Zhang, Yi},
	year         = 2017,
	booktitle    = {International Conference on Machine Learning}
}
@inproceedings{arora2017provable,
	title        = {Provable learning of noisy-OR networks},
	author       = {Sanjeev Arora and Rong Ge and Tengyu Ma and Andrej Risteski},
	year         = 2017,
	booktitle    = {Proceedings of the 49th Annual {ACM} {SIGACT} Symposium on Theory of Computing, {STOC} 2017, Montreal, QC, Canada, June 19-23, 2017},
	pages        = {1057--1066},
	doi          = {10.1145/3055399.3055482},
	url          = {http://doi.acm.org/10.1145/3055399.3055482},
	crossref     = {DBLP:conf/stoc/2017},
	timestamp    = {Sat, 17 Jun 2017 18:46:57 +0200},
	biburl       = {http://dblp.uni-trier.de/rec/bib/conf/stoc/Arora0MR17},
	bibsource    = {dblp computer science bibliography, http://dblp.org}
}
@article{arora2018convergence,
	title        = {A Convergence Analysis of Gradient Descent for Deep Linear Neural Networks},
	author       = {Arora, Sanjeev and Cohen, Nadav and Golowich, Noah and Hu, Wei},
	year         = 2018,
	journal      = {arXiv preprint arXiv:1810.02281}
}
@article{arora2018linear,
	title        = {Linear Algebraic Structure of Word Senses, with Applications to Polysemy},
	author       = {Sanjeev Arora and Yuanzhi Li and Yingyu Liang and Tengyu Ma and Andrej Risteski},
	year         = 2018,
	journal      = {Transactions of the Association for Computational Linguistics (TACL)},
	volume       = 6
}
@article{arora2018optimization,
	title        = {On the optimization of deep networks: Implicit acceleration by overparameterization},
	author       = {Arora, Sanjeev and Cohen, Nadav and Hazan, Elad},
	year         = 2018,
	journal      = {arXiv preprint arXiv:1802.06509},
	booktitle    = {International Conference on Machine Learning (ICML)},
	pages        = {244--253}
}
@article{arora2018stronger,
	title        = {Stronger generalization bounds for deep nets via a compression approach},
	author       = {Arora, Sanjeev and Ge, Rong and Neyshabur, Behnam and Zhang, Yi},
	year         = 2018,
	journal      = {arXiv preprint arXiv:1802.05296}
}
@article{arora2018theoretical,
	title        = {Theoretical analysis of auto rate-tuning by batch normalization},
	author       = {Arora, Sanjeev and Li, Zhiyuan and Lyu, Kaifeng},
	year         = 2018,
	journal      = {arXiv preprint arXiv:1812.03981}
}
@article{arora2019exact,
	title        = {On exact computation with an infinitely wide neural net},
	author       = {Arora, Sanjeev and Du, Simon S and Hu, Wei and Li, Zhiyuan and Salakhutdinov, Ruslan and Wang, Ruosong},
	year         = 2019,
	journal      = {arXiv preprint arXiv:1904.11955},
	booktitle    = {Advances in Neural Information Processing Systems},
	publisher    = {Curran Associates, Inc.},
	volume       = 32,
	pages        = {},
	url          = {https://proceedings.neurips.cc/paper/2019/file/dbc4d84bfcfe2284ba11beffb853a8c4-Paper.pdf},
	editor       = {H. Wallach and H. Larochelle and A. Beygelzimer and F. d\textquotesingle Alch\'{e}-Buc and E. Fox and R. Garnett}
}
@article{arora2019fine,
	title        = {Fine-grained analysis of optimization and generalization for overparameterized two-layer neural networks},
	author       = {Arora, Sanjeev and Du, Simon S and Hu, Wei and Li, Zhiyuan and Wang, Ruosong},
	year         = 2019,
	journal      = {arXiv preprint arXiv:1901.08584},
	booktitle    = {International Conference on Machine Learning},
	pages        = {322--332},
	organization = {PMLR}
}
@inproceedings{arora2019implicit,
	title        = {Implicit regularization in deep matrix factorization},
	author       = {Arora, Sanjeev and Cohen, Nadav and Hu, Wei and Luo, Yuping},
	year         = 2019,
	booktitle    = {Advances in Neural Information Processing Systems},
	pages        = {7411--7422}
}
@inproceedings{arora2019theoretical,
	title        = {A theoretical analysis of contrastive unsupervised representation learning},
	author       = {Arora, Sanjeev and Khandeparkar, Hrishikesh and Khodak, Mikhail and Plevrakis, Orestis and Saunshi, Nikunj},
	year         = 2019,
	journal      = {arXiv preprint arXiv:1902.09229},
	booktitle    = {International Conference on Machine Learning}
}
@article{arora2020dropout,
	title        = {Dropout: Explicit Forms and Capacity Control},
	author       = {Arora, Raman and Bartlett, Peter and Mianjy, Poorya and Srebro, Nathan},
	year         = 2020,
	journal      = {arXiv preprint arXiv:2003.03397}
}
@inproceedings{arora2020harnessing,
	title        = {Harnessing the Power of Infinitely Wide Deep Nets on Small-data Tasks},
	author       = {Sanjeev Arora and Simon S. Du and Zhiyuan Li and Ruslan Salakhutdinov and Ruosong Wang and Dingli Yu},
	year         = 2020,
	booktitle    = {International Conference on Learning Representations},
	url          = {https://openreview.net/forum?id=rkl8sJBYvH}
}
@inproceedings{arora2020provable,
	title        = {Provable representation learning for imitation learning via bi-level optimization},
	author       = {Arora, Sanjeev and Du, Simon and Kakade, Sham and Luo, Yuping and Saunshi, Nikunj},
	year         = 2020,
	booktitle    = {International Conference on Machine Learning},
	pages        = {367--376},
	organization = {PMLR}
}
@book{AroraBarak,
	title        = {Computational Complexity - {A} Modern Approach},
	author       = {Sanjeev Arora and Boaz Barak},
	year         = 2009,
	publisher    = {Cambridge University Press},
	isbn         = {978-0-521-42426-4},
	url          = {http://www.cambridge.org/catalogue/catalogue.asp?isbn=9780521424264},
	timestamp    = {Mon, 29 Sep 2014 03:39:22 +0200},
	biburl       = {http://dblp.uni-trier.de/rec/bib/books/daglib/0023084},
	bibsource    = {dblp computer science bibliography, http://dblp.org}
}
@inproceedings{AroraGHMMSWZ13,
	title        = {A Practical Algorithm for Topic Modeling with Provable Guarantees},
	author       = {Sanjeev Arora and Rong Ge and Yonatan Halpern and David M. Mimno and Ankur Moitra and David Sontag and Yichen Wu and Michael Zhu},
	year         = 2013,
	booktitle    = {Proceedings of the 30th International Conference on Machine Learning, {ICML} 2013, Atlanta, GA, USA, 16-21 June 2013},
	pages        = {280--288}
}
@inproceedings{AroraGM14,
	title        = {New Algorithms for Learning Incoherent and Overcomplete Dictionaries},
	author       = {Sanjeev Arora and Rong Ge and Ankur Moitra},
	year         = 2014,
	journal      = {CoRR},
	booktitle    = {Proceedings of The 27th Conference on Learning Theory, {COLT} 2014, Barcelona, Spain, June 13-15, 2014},
	volume       = {abs/1308.6273},
	pages        = {779--806},
	url          = {http://jmlr.org/proceedings/papers/v35/arora14.html},
	bibsource    = {DBLP, http://dblp.uni-trier.de},
	ee           = {http://arxiv.org/abs/1308.6273},
	crossref     = {DBLP:conf/colt/2014},
	timestamp    = {Sun, 26 Oct 2014 02:37:38 +0200},
	biburl       = {http://dblp.uni-trier.de/rec/bib/conf/colt/AroraGM14}
}
@inproceedings{AroraKale2007,
	title        = {{A combinatorial, primal-dual approach to semidefinite programs}},
	author       = {Arora, Sanjeev and Kale, Satyen},
	year         = 2007,
	booktitle    = {Proceedings of the thirty-ninth annual ACM symposium on Theory of computing - STOC '07},
	publisher    = {ACM Press},
	address      = {New York, New York, USA},
	pages        = 227,
	doi          = {10.1145/1250790.1250823},
	isbn         = 9781595936318,
	file         = {:C$\backslash$:/Users/Zeyuan/Documents/Mendeley Desktop/Arora, Kale - 2007 - A combinatorial, primal-dual approach to semidefinite programs.pdf:pdf},
	mendeley-groups = {Algorithms/Multiplicative Weight,Algorithms/Multiplicative Weight/SDP}
}
@article{AroraKannan:Mixtures,
	title        = {LEARNING MIXTURES OF SEPARATED NONSPHERICAL GAUSSIANS},
	author       = {Sanjeev Arora and Ravi Kannan},
	year         = 2005,
	journal      = {The Annals of Applied Probability},
	volume       = 15,
	number       = {1A},
	pages        = {69--92}
}
@inproceedings{arpit2017memorization,
	title        = {A Closer Look at Memorization in Deep Networks},
	author       = {Devansh Arpit and Stanislaw Jastrzebski and Nicolas Ballas and David Krueger and Emmanuel Bengio and Maxinder S. Kanwal and Tegan Maharaj and Asja Fischer and Aaron Courville and Yoshua Bengio and Simon Lacoste-Julien},
	year         = 2017,
	month        = {06--11 Aug},
	booktitle    = {Proceedings of the 34th International Conference on Machine Learning},
	publisher    = {PMLR},
	series       = {Proceedings of Machine Learning Research},
	volume       = 70,
	pages        = {233--242},
	url          = {https://proceedings.mlr.press/v70/arpit17a.html},
	editor       = {Precup, Doina and Teh, Yee Whye},
	pdf          = {http://proceedings.mlr.press/v70/arpit17a/arpit17a.pdf}
}
@article{arpit2019benefits,
	title        = {The Benefits of Over-parameterization at Initialization in Deep ReLU Networks},
	author       = {Arpit, Devansh and Bengio, Yoshua},
	year         = 2019,
	journal      = {arXiv preprint arXiv:1901.03611}
}
@inproceedings{arrieta2018should,
	title        = {Should We Treat Data as Labor? {Moving} beyond ``Free''},
	author       = {Imanol Arrieta-Ibarra and Leonard Goff and Diego Jim{\'e}nez-Hern{\'a}ndez and Jaron Lanier and E Glen Weyl},
	year         = 2018,
	booktitle    = {American Economic Association Papers and Proceedings},
	volume       = 108,
	pages        = {38--42}
}
@article{arrow1973theory,
	title        = {The theory of discrimination},
	author       = {Kenneth Arrow},
	year         = 1973,
	journal      = {Discrimination in labor markets},
	volume       = 3,
	number       = 10,
	pages        = {3--33}
}
@article{arslan2017decentralized,
	title        = {Decentralized {Q}-learning for stochastic teams and games},
	author       = {Arslan, G{\"u}rdal and Y{\"u}ksel, Serdar},
	year         = 2017,
	journal      = {IEEE Transactions on Automatic Control},
	publisher    = {IEEE},
	volume       = 62,
	number       = 4,
	pages        = {1545--1558}
}
@article{artemiadis2010emg,
	title        = {{EMG}-based control of a robot arm using low-dimensional embeddings},
	author       = {Panagiotis K Artemiadis and Kostas J Kyriakopoulos},
	year         = 2010,
	journal      = {IEEE Transactions on Robotics (T-RO)},
	volume       = 26,
	pages        = {393--398}
}
@article{artetxe2017nmt,
	title        = {Unsupervised Neural Machine Translation},
	author       = {Mikel Artetxe and Gorka Labaka and Eneko Agirre and Kyunghyun Cho},
	year         = 2017,
	journal      = {arXiv preprint arXiv:1710.11041}
}
@article{artetxe2018unsupervised,
	title        = {Unsupervised statistical machine translation},
	author       = {Mikel Artetxe and Gorka Labaka and Eneko Agirre},
	year         = 2018,
	journal      = {arXiv preprint arXiv:1809.01272}
}
@inproceedings{artzi11conversations,
	title        = {Bootstrapping Semantic Parsers from Conversations},
	author       = {Yoav Artzi and Luke Zettlemoyer},
	year         = 2011,
	booktitle    = {Empirical Methods in Natural Language Processing (EMNLP)},
	pages        = {421--432}
}
@article{artzi2013uw,
	title        = {{UW} {SPF}: The {U}niversity of {W}ashington Semantic Parsing Framework},
	author       = {Yoav Artzi and Luke Zettlemoyer},
	year         = 2013,
	journal      = {arXiv preprint arXiv:1311.3011}
}
@article{artzi2013weakly,
	title        = {Weakly supervised learning of semantic parsers for mapping instructions to actions},
	author       = {Yoav Artzi and Luke Zettlemoyer},
	year         = 2013,
	journal      = {Transactions of the Association for Computational Linguistics (TACL)},
	volume       = 1,
	pages        = {49--62}
}
@inproceedings{artzi2015broad,
	title        = {Broad-coverage {CCG} Semantic Parsing with {AMR}},
	author       = {Yoav Artzi and Kenton Lee Luke Zettlemoyer},
	year         = 2015,
	booktitle    = {Empirical Methods in Natural Language Processing (EMNLP)}
}
@article{arulampalam2002tutorial,
	title        = {A tutorial on particle filters for on-line non-linear/non-{G}aussian {B}ayesian tracking},
	author       = {Sanjeev Arulampalam and Simon Maskell and Neil Gordon and Tim Clapp},
	year         = 2002,
	journal      = {IEEE Transactions on Signal Processing},
	volume       = 50,
	number       = 2,
	pages        = {174--188}
}
@inproceedings{arumugam2017accurately,
	title        = {Accurately and Efficiently Interpreting Human-Robot Instructions of Varying Granularities},
	author       = {Dilip Arumugam and Siddharth Karamcheti and Nakul Gopalan and Lawson L. S. Wong and Stefanie Tellex},
	year         = 2017,
	booktitle    = {Robotics: Science and Systems (RSS)}
}
@article{ARV,
	title        = {Expander flows, geometric embeddings and graph partitioning},
	author       = {Arora, Sanjeev and Rao, Satish and Vazirani, Umesh},
	year         = 2009,
	journal      = {Journal of the ACM (JACM)},
	publisher    = {ACM},
	volume       = 56,
	number       = 2,
	pages        = 5
}
@article{ARV09,
	title        = {Expander flows, geometric embeddings and graph partitioning},
	author       = {Sanjeev Arora and Satish Rao and Umesh V. Vazirani},
	year         = 2009,
	journal      = {Journal of the ACM},
	volume       = 56,
	number       = 2,
	ee           = {http://doi.acm.org/10.1145/1502793.1502794},
	bibsource    = {DBLP, http://dblp.uni-trier.de}
}
@article{arxivCohenKKPPRS18,
	title        = {Solving Directed Laplacian Systems in Nearly-Linear Time through Sparse {LU} Factorizations},
	author       = {Michael B. Cohen and Jonathan A. Kelner and Rasmus Kyng and John Peebles and Richard Peng and Anup B. Rao and Aaron Sidford},
	year         = 2018,
	journal      = {CoRR},
	booktitle    = {59th {IEEE} Annual Symposium on Foundations of Computer Science, {FOCS} 2018, Paris, France, October 7-9, 2018},
	volume       = {abs/1811.10722},
	pages        = {898--909}
}
@article{arxivCohenKPPSV16,
	title        = {Faster Algorithms for Computing the Stationary Distribution, Simulating Random Walks, and More},
	author       = {Michael B. Cohen and Jonathan A. Kelner and John Peebles and Richard Peng and Aaron Sidford and Adrian Vladu},
	year         = 2016,
	journal      = {CoRR},
	booktitle    = {{IEEE} 57th Annual Symposium on Foundations of Computer Science, {FOCS} 2016, 9-11 October 2016, Hyatt Regency, New Brunswick, New Jersey, {USA}},
	volume       = {abs/1608.03270},
	pages        = {583--592}
}
@inproceedings{arzate2020survey,
	title        = {A survey on interactive reinforcement learning: Design principles and open challenges},
	author       = {Christian Arzate Cruz and Takeo Igarashi},
	year         = 2020,
	booktitle    = {Proceedings of the 2020 ACM Designing Interactive Systems Conference},
	pages        = {1195--1209}
}
@inproceedings{Asadpour2010,
	title        = {{An $O(\log n / \log \log n )$-approximation Algorithm for the Asymmetric Traveling Salesman Problem}},
	author       = {Asadpour, Arash and Goemans, Michel X. and Mądry, Aleksander and Gharan, Shayan Oveis and Saberi, Amin},
	year         = 2010,
	booktitle    = {Proceedings of the Twenty-First Annual ACM-SIAM Symposium on Discrete Algorithms - SODA '10},
	pages        = {379--389},
	isbn         = {0001405101},
	file         = {:D$\backslash$:/Mendeley Desktop/Asadpour et al. - 2010 - An O ( log n log log n ) -approximation Algorithm for the Asymmetric Traveling Salesman Problem.pdf:pdf},
	mendeley-groups = {Algorithms/Traveling Salesman}
}
@inproceedings{asher2016catan,
	title        = {Discourse Structure and Dialogue Acts in Multiparty Dialogue: the {STAC} Corpus},
	author       = {Nicholas Asher and Julie Hunter and Mathieu Morey and Farah Benamara and Stergos Afantenos},
	year         = 2016,
	booktitle    = {Language Resources and Evaluation Conference (LREC)}
}
@inproceedings{ashok2014wizard,
	title        = {Wizard-of-{O}z evaluation of speech-driven web browsing interface for people with vision impairments},
	author       = {Vikas Ashok and Yevgen Borodin and Svetlana Stoyanchev and Yury Puzis and I. V. Ramakrishnan},
	year         = 2014,
	booktitle    = {Web for All Conference}
}
@article{ashtiani2017sample,
	title        = {Sample-Efficient Learning of Mixtures},
	author       = {Hassan Ashtiani and Shai Ben-David and Abbas Mehrabian},
	year         = 2017,
	journal      = {arXiv}
}
@inproceedings{aslam2006statistical,
	title        = {A statistical method for system evaluation using incomplete judgments},
	author       = {Javed A. Aslam and Virgil Pavlu and Emine Yilmaz},
	year         = 2006,
	booktitle    = {ACM Special Interest Group on Information Retreival (SIGIR)},
	pages        = {541--548}
}
@inproceedings{asm08,
	title        = {Fitted {Q}-iteration in continuous action-space MDPs},
	author       = {Antos, Andr{\'a}s and Szepesv{\'a}ri, Csaba and Munos, R{\'e}mi},
	year         = 2008,
	booktitle    = {Advances in neural information processing systems},
	pages        = {9--16}
}
@article{asm08a,
	title        = {Learning near-optimal policies with Bellman-residual minimization based fitted policy iteration and a single sample path},
	author       = {Antos, Andr{\'a}s and Szepesv{\'a}ri, Csaba and Munos, R{\'e}mi},
	year         = 2008,
	journal      = {Machine Learning},
	publisher    = {Springer},
	volume       = 71,
	number       = 1,
	pages        = {89--129}
}
@article{assouad1983deux,
	title        = {Deux remarques sur l'estimation},
	author       = {Patrice Assouad},
	year         = 1983,
	journal      = {Comptes rendus des s{\'e}ances de l'Acad{\'e}mie des sciences. S{\'e}rie 1, Math{\'e}matique},
	volume       = 296,
	number       = 23,
	pages        = {1021--1024}
}
@incollection{asuncion2011distributed,
	title        = {Distributed Gibbs Sampling for Latent Variable Models},
	author       = {Asuncion, A. and Smyth, P. and Welling, M. and Newman, D. and Porteous, I. and Triglia, S.},
	year         = 2011,
	booktitle    = {Scaling Up Machine Learning: Parallel and Distributed Approaches},
	publisher    = {Cambridge Univ Pr}
}
@article{athalye2017synthesizing,
	title        = {Synthesizing robust adversarial examples},
	author       = {Anish Athalye and Ilya Sutskever},
	year         = 2017,
	journal      = {arXiv preprint arXiv:1707.07397}
}
@inproceedings{athalye2018obfuscated,
	title        = {Obfuscated gradients give a false sense of security: Circumventing defenses to adversarial examples},
	author       = {Anish Athalye and Nicholas Carlini and David Wagner},
	year         = 2018,
	booktitle    = {International Conference on Machine Learning (ICML)}
}
@inproceedings{athanasopoulou2014low,
	title        = {Low-Dimensional Manifold Distributional Semantic Models},
	author       = {Georgia Athanasopoulou and Elias Iosif and Alexandros Potamianos},
	year         = 2014,
	booktitle    = {International Conference on Computational Linguistics (COLING)},
	pages        = {731--740}
}
@article{athey2015measure,
	title        = {A measure of robustness to misspecification},
	author       = {Susan Athey and Guido Imbens},
	year         = 2015,
	journal      = {The American Economic Review},
	volume       = 105,
	number       = 5,
	pages        = {476--480}
}
@article{athreya1978new,
	title        = {A new approach to the limit theory of recurrent {M}arkov chains},
	author       = {Krishna B Athreya and P Ney},
	year         = 1978,
	journal      = {Transactions of the American Mathematical Society},
	volume       = 245,
	pages        = {493--501}
}
@inproceedings{atkeson1997robot,
	title        = {Robot learning from demonstration},
	author       = {Christopher G Atkeson and Stefan Schaal},
	year         = 1997,
	booktitle    = {International Conference on Machine Learning (ICML)},
	volume       = 97,
	pages        = {12--20}
}
@inproceedings{attenberg2010why,
	title        = {Why Label when you can Search? Alternatives to Active Learning for Applying Human Resources to Build Classification Models Under Extreme Class Imbalance},
	author       = {Josh Attenberg and Foster Provost},
	year         = 2010,
	booktitle    = {International Conference on Knowledge Discovery and Data Mining (KDD)}
}
@article{attene2013tox21,
	title        = {The Tox21 robotic platform for the assessment of environmental chemicals--from vision to reality},
	author       = {Matias S Attene-Ramos and Nicole Miller and Ruili Huang and Sam Michael and Misha Itkin and Robert J Kavlock and Christopher P Austin and Paul Shinn and Anton Simeonov and Raymond R Tice and others},
	year         = 2013,
	journal      = {Drug discovery today},
	volume       = 18,
	number       = 15,
	pages        = {716--723}
}
@inproceedings{attias2019improved,
	title        = {Improved Generalization Bounds for Robust Learning},
	author       = {Idan Attias and Aryeh Kontorovich and Yishay Mansour},
	year         = 2019,
	booktitle    = {Algorithmic Learning Theory},
	pages        = {162--183}
}
@article{attouch2010proximal,
	title        = {Proximal alternating minimization and projection methods for nonconvex problems: An approach based on the Kurdyka-{\L}ojasiewicz inequality},
	author       = {Attouch, H{\'e}dy and Bolte, J{\'e}r{\^o}me and Redont, Patrick and Soubeyran, Antoine},
	year         = 2010,
	journal      = {Mathematics of operations research},
	publisher    = {INFORMS},
	volume       = 35,
	number       = 2,
	pages        = {438--457}
}
@article{atzmon2016compositions,
	title        = {Learning to generalize to new compositions in image understanding},
	author       = {Yuval Atzmon and Jonathan Berant and Vahid Kezami and Amir Globerson and Gal Chechik},
	year         = 2016,
	journal      = {arXiv preprint arXiv:1608.07639}
}
@article{aubin2021linear,
	title        = {Linear unit-tests for invariance discovery},
	author       = {Aubin, Benjamin and S{\l}owik, Agnieszka and Arjovsky, Martin and Bottou, Leon and Lopez-Paz, David},
	year         = 2021,
	journal      = {arXiv preprint arXiv:2102.10867}
}
@article{audibert2009exploration,
	title        = {Exploration--exploitation tradeoff using variance estimates in multi-armed bandits},
	author       = {Jean-Yves Audibert and R{'e}mi Munos and Csaba Szepesv{'a}ri},
	year         = 2009,
	journal      = {Theoretical Computer Science},
	volume       = 410,
	number       = 19,
	pages        = {1876--1902}
}
@article{audibert2011minimax,
	title        = {Minimax Policies for Combinatorial Prediction Games},
	author       = {Audibert, Jean-Yves and Bubeck, S{\'e}bastien and Lugosi, G{\'a}bor},
	year         = 2011,
	journal      = {Proceedings of COLT 2011}
}
@article{auer02nonstochastic,
	title        = {The Nonstochastic Multiarmed Bandit Problem},
	author       = {Peter Auer and Nicol\`{o} {Cesa-Bianchi} and Yoav Freund and Robert E. Schapire},
	year         = 2002,
	journal      = {SIAM Journal on Computing},
	volume       = 32,
	number       = 1,
	pages        = {48--77}
}
@inproceedings{Auer1995,
	title        = {{Gambling in a rigged casino: The adversarial multi-armed bandit problem}},
	author       = {Auer, Peter and {Cesa-Bianchi}, Nicol\`{o} and Freund, Yoav and Schapire, Robert E.},
	year         = 1995,
	booktitle    = {Proceedings of IEEE 36th Annual Foundations of Computer Science},
	publisher    = {IEEE Comput. Soc. Press},
	pages        = {322--331},
	doi          = {10.1109/SFCS.1995.492488},
	isbn         = {0-8186-7183-1},
	file         = {:C$\backslash$:/Users/Zeyuan/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Unknown - Unknown - Gambling in a rigged casino The adversarial multi-armed bandit problem.pdf:pdf},
	mendeley-groups = {Optimization/Bandit}
}
@inproceedings{auer1995gambling,
	title        = {Gambling in a rigged casino: The adversarial multi-armed bandit problem},
	author       = {P. Auer and N. Cesa-Bianchi and Y. Freund and R. E. Schapire},
	year         = 1995,
	booktitle    = {International Conference on Robotics and Automation (ICRA)},
	pages        = {322--322}
}
@article{auer1996exponentially,
	title        = {Exponentially many local minima for single neurons},
	author       = {Auer, Peter and Herbster, Mark and Warmuth, Manfred K and others},
	year         = 1996,
	journal      = {Advances in neural information processing systems},
	publisher    = {Citeseer},
	pages        = {316--322}
}
@inproceedings{auer1997multiple,
	title        = {On Learning From Multi-Instance Examples: Empirical Evaluation of a Theoretical Approach},
	author       = {Peter Auer},
	year         = 1997,
	booktitle    = {International Conference on Machine Learning (ICML)},
	pages        = {21--29}
}
@article{auer2002finite,
	title        = {Finite-time analysis of the multiarmed bandit problem},
	author       = {Peter Auer and Nicolo Cesa-Bianchi and Paul Fischer},
	year         = 2002,
	journal      = {Machine learning},
	volume       = 47,
	number       = 2,
	pages        = {235--256}
}
@article{Auer2002nonstochastic,
	title        = {The nonstochastic multiarmed bandit problem},
	author       = {Auer, Peter and Cesa-Bianchi, Nicolo and Freund, Yoav and Schapire, Robert E},
	year         = 2002,
	journal      = {SIAM journal on computing},
	publisher    = {SIAM},
	volume       = 32,
	number       = 1,
	pages        = {48--77}
}
@article{Auer2002stochastic,
	title        = {{Finite-time analysis of the multiarmed bandit problem}},
	author       = {Auer, Peter and {Cesa-Bianchi}, Nicol\`{o} and Fischer, Paul},
	year         = 2002,
	journal      = {Machine Learning},
	volume       = 47,
	number       = {2-3},
	pages        = {235--256},
	doi          = {10.1023/A:1013689704352},
	annote       = {
		This is for the case when there is a fixed (but unknown) distribution where the feedbacks are generated.

		It is different from the other type of bandit work where there is no distribution.
	},
	file         = {:C$\backslash$:/Users/Zeyuan/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Auer, Cesa-Bianchi, Fischer - 2002 - Finite-time analysis of the multiarmed bandit problem.pdf:pdf},
	mendeley-groups = {Optimization/Bandit}
}
@inproceedings{auer2007dbpedia,
	title        = {{DB}pedia: A nucleus for a web of open data},
	author       = {Sören Auer and Christian Bizer and Georgi Kobilarov and Jens Lehmann and Richard Cyganiak and Zachary G. Ives},
	year         = 2007,
	booktitle    = {International semantic web conference and Asian semantic web conference (ISWC/ASWC)},
	pages        = {722--735}
}
@inproceedings{auer2007logarithmic,
	title        = {Logarithmic online regret bounds for undiscounted reinforcement learning},
	author       = {Auer, Peter and Ortner, Ronald},
	year         = 2007,
	booktitle    = {Advances in Neural Information Processing Systems},
	pages        = {49--56}
}
@article{auffinger2013complexity,
	title        = {Complexity of random smooth functions on the high-dimensional sphere},
	author       = {Auffinger, Antonio and Arous, Gerard Ben and others},
	year         = 2013,
	journal      = {The Annals of Probability},
	publisher    = {Institute of Mathematical Statistics},
	volume       = 41,
	number       = 6,
	pages        = {4214--4247}
}
@article{auffinger2013random,
	title        = {Random matrices and complexity of spin glasses},
	author       = {Auffinger, Antonio and Arous, G{\'e}rard Ben and {\v{C}}ern{\`y}, Ji{\v{r}}{\'\i}},
	year         = 2013,
	journal      = {Communications on Pure and Applied Mathematics},
	publisher    = {Wiley Online Library},
	volume       = 66,
	number       = 2,
	pages        = {165--201}
}
@inproceedings{auli2011efficient,
	title        = {Efficient {CCG} parsing: A* versus adaptive supertagging},
	author       = {Miachel Auli and Adam Lopez},
	year         = 2011,
	booktitle    = {Association for Computational Linguistics (ACL)}
}
@article{aumann1995backward,
	title        = {Backward induction and common knowledge of rationality},
	author       = {Robert J Aumann},
	year         = 1995,
	journal      = {Games and Economic Behavior},
	volume       = 8,
	number       = 1,
	pages        = {6--19}
}
@article{austin2008exchangeable,
	title        = {On exchangeable random variables and the statistics of large graphs and hypergraphs},
	author       = {T. Austin},
	year         = 2008,
	journal      = {Probab. Survey},
	volume       = 5,
	pages        = {80--145}
}
@book{austin62do,
	title        = {How to do Things with Words: The {W}illiam {J}ames Lectures delivered at {H}arvard University in 1955},
	author       = {John Langshaw Austin},
	year         = 1962,
	publisher    = {Oxford}
}
@article{aviv2017human,
	title        = {The human cell atlas},
	author       = {Regev Aviv and Sarah A Teichmann and Eric S Lander and Amit Ido and Benoist Christophe and Birney Ewan and Bodenmiller Bernd and Peter Campbell and Carninci Piero and Clatworthy Menna and others},
	year         = 2017,
	journal      = {Elife},
	volume       = 6
}
@article{avsec2019deep,
	title        = {Deep learning at base-resolution reveals motif syntax of the cis-regulatory code},
	author       = {{\v{Z}}iga Avsec and Melanie Weilert and Avanti Shrikumar and Amr Alexandari and Sabrina Krueger and Khyati Dalal and Robin Fropf and Charles McAnany and Julien Gagneur and Anshul Kundaje and Julia Zeitlinger},
	year         = 2019,
	journal      = {bioRxiv}
}
@article{awasthi2012improved,
	title        = {Improved spectral-norm bounds for clustering},
	author       = {Pranjal Awasthi and Or Sheffet},
	year         = 2012,
	journal      = {Approximation, Randomization, and Combinatorial Optimization},
	pages        = {37--49}
}
@inproceedings{awasthi2013learning,
	title        = {Learning Using Local Membership Queries},
	author       = {Pranjal Awasthi and Vitaly Feldman and Varun Kanade},
	year         = 2013,
	booktitle    = {Conference on Learning Theory (COLT)},
	pages        = {398--431}
}
@inproceedings{awasthi2014learning,
	title        = {Learning mixtures of ranking models},
	author       = {Awasthi, Pranjal and Blum, Avrim and Sheffet, Or and Vijayaraghavan, Aravindan},
	year         = 2014,
	booktitle    = {Advances in Neural Information Processing Systems},
	pages        = {2609--2617}
}
@inproceedings{awasthi2014power,
	title        = {The power of localization for efficiently learning linear separators with noise},
	author       = {Pranjal Awasthi and Maria Florina Balcan and Philip M. Long},
	year         = 2014,
	booktitle    = {Symposium on Theory of Computing (STOC)},
	pages        = {449--458}
}
@inproceedings{awerbuch2004adaptive,
	title        = {Adaptive routing with end-to-end feedback: Distributed learning and geometric approaches},
	author       = {Awerbuch, Baruch and Kleinberg, Robert D},
	year         = 2004,
	booktitle    = {Proceedings of the thirty-sixth annual ACM symposium on Theory of computing},
	pages        = {45--53},
	organization = {ACM}
}
@article{Awerbuch2008,
	title        = {{Stateless distributed gradient descent for positive linear programs}},
	author       = {Awerbuch, Baruch and Khandekar, Rohit},
	year         = 2008,
	journal      = {Proceedings of the fourtieth annual ACM symposium on Theory of computing - STOC 08},
	publisher    = {ACM Press},
	address      = {New York, New York, USA},
	pages        = 691,
	doi          = {10.1145/1374376.1374476},
	isbn         = 9781605580470,
	file         = {:C$\backslash$:/Users/Zeyuan/Documents/Mendeley Desktop/Awerbuch, Khandekar - 2008 - Stateless distributed gradient descent for positive linear programs.pdf:pdf},
	keywords     = {convergence,distributed and stateless algorithms,fast,gradient descent,linear programming},
	mendeley-groups = {Algorithms/Multiplicative Weight/LP}
}
@inproceedings{AwerbuchAzarKhandekar2008soda,
	title        = {Fast Load Balancing via Bounded Best Response},
	author       = {Awerbuch, Baruch and Azar, Yossi and Khandekar, Rohit},
	year         = 2008,
	booktitle    = {Proceedings of the Nineteenth Annual ACM-SIAM Symposium on Discrete Algorithms},
	location     = {San Francisco, California},
	publisher    = {Society for Industrial and Applied Mathematics},
	address      = {Philadelphia, PA, USA},
	series       = {SODA '08},
	pages        = {314--322},
	numpages     = 9,
	acmid        = 1347117
}
@incollection{AwerbuchKhandekar2008latin,
	title        = {Stateless near optimal flow control with poly-logarithmic convergence},
	author       = {Awerbuch, Baruch and Khandekar, Rohit},
	year         = 2008,
	booktitle    = {LATIN 2008: Theoretical Informatics},
	publisher    = {Springer},
	pages        = {580--592}
}
@article{AwerbuchKhandekar2009DistributedComputing,
	title        = {Greedy distributed optimization of multi-commodity flows},
	author       = {Awerbuch, Baruch and Khandekar, Rohit},
	year         = 2009,
	journal      = {Distributed Computing},
	publisher    = {Springer-Verlag},
	volume       = 21,
	number       = 5,
	pages        = {317--329},
	doi          = {10.1007/s00446-008-0074-0},
	issn         = {0178-2770},
	keywords     = {Multi-commodity flows; Distributed algorithms; Statelessness; Self-stabilization}
}
@article{AwerbuchKR2012,
	title        = {{Distributed algorithms for multicommodity flow problems via approximate steepest descent framework}},
	author       = {Awerbuch, Baruch and Khandekar, Rohit and Rao, Satish},
	year         = 2012,
	month        = dec,
	journal      = {ACM Transactions on Algorithms},
	volume       = 9,
	number       = 1,
	pages        = {1--14},
	doi          = {10.1145/2390176.2390179},
	issn         = 15496325,
	file         = {:C$\backslash$:/Users/Zeyuan/Documents/Mendeley Desktop/Awerbuch, Khandekar, Rao - 2012 - Distributed algorithms for multicommodity flow problems via approximate steepest descent framework.pdf:pdf},
	mendeley-groups = {Algorithms/Multiplicative Weight/Flow}
}
@inproceedings{AY2015-coord,
	title        = {Even Faster Accelerated Coordinate Descent Using Non-Uniform Sampling},
	author       = {{Allen-Zhu}, Zeyuan and Richt\'arik, Peter and Qu, Zheng and Yuan, Yang},
	year         = 2016,
	booktitle    = {ICML}
}
@inproceedings{AY2015-univr,
	title        = {{Improved SVRG for Non-Strongly-Convex or Sum-of-Non-Convex Objectives}},
	author       = {{Allen-Zhu}, Zeyuan and Yuan, Yang},
	year         = 2016,
	booktitle    = {ICML}
}
@inproceedings{aydemir2011search,
	title        = {Search in the real world: Active visual object search based on spatial relations},
	author       = {A. Aydemir and K. Sjoo and J. Folkesson and A. Pronobis and P. Jensfelt},
	year         = 2011,
	booktitle    = {International Conference on Robotics and Automation (ICRA)},
	pages        = {2818--2824}
}
@inproceedings{ayoub2020model,
	title        = {Model-Based Reinforcement Learning with Value-Targeted Regression},
	author       = {Ayoub, Alex and Jia, Zeyu and Szepesvari, Csaba and Wang, Mengdi and Yang, Lin F},
	year         = 2020,
	booktitle    = {Proceedings of the 37th International Conference on Machine Learning}
}
@article{aytar2018playing,
	title        = {Playing hard exploration games by watching YouTube},
	author       = {Y. Aytar and T. Pfaff and D Budden and T. L. Paine and Z. Wang and N. de Freitas},
	year         = 2018,
	journal      = {arXiv preprint arXiv:1805.11592}
}
@article{azar2011reinforcement,
	title        = {Reinforcement learning with a near optimal rate of convergence},
	author       = {Azar, Mohammad Gheshlaghi and Munos, R{\'e}mi and Ghavamzadeh, Mohammad and Kappen, Hilbert},
	year         = 2011
}
@inproceedings{azar2011speedy,
	title        = {Speedy Q-learning},
	author       = {Azar, Mohammad Gheshlaghi and Munos, Remi and Ghavamzadeh, Mohammad and Kappen, Hilbert},
	year         = 2011,
	booktitle    = {Advances in neural information processing systems}
}
@article{azar2012sample,
	title        = {On the sample complexity of reinforcement learning with a generative model},
	author       = {Azar, Mohammad Gheshlaghi and Munos, R{\'e}mi and Kappen, Bert},
	year         = 2012,
	journal      = {arXiv preprint arXiv:1206.6461}
}
@book{azar2012theory,
	title        = {On the theory of reinforcement learning: methods, convergence analysis and sample complexity},
	author       = {Azar, Mohammad Gheshlaghi},
	year         = 2012,
	publisher    = {UB Nijmegen [host]}
}
@article{azar2013minimax,
	title        = {Minimax {PAC} bounds on the sample complexity of reinforcement learning with a generative model},
	author       = {Azar, Mohammad Gheshlaghi and Munos, R{\'e}mi and Kappen, Hilbert J},
	year         = 2013,
	journal      = {Machine learning},
	publisher    = {Springer},
	volume       = 91,
	number       = 3,
	pages        = {325--349}
}
@inproceedings{azar2017minimax,
	title        = {Minimax regret bounds for reinforcement learning},
	author       = {Azar, Mohammad Gheshlaghi and Osband, Ian and Munos, R{\'e}mi},
	year         = 2017,
	booktitle    = {Proceedings of the 34th International Conference on Machine Learning},
	pages        = {263--272}
}
@inproceedings{azaria2016instructable,
	title        = {Instructable Intelligent Personal Agent},
	author       = {Amos Azaria and Jayant Krishnamurthy and Tom M. Mitchell},
	year         = 2016,
	booktitle    = {Association for the Advancement of Artificial Intelligence (AAAI)},
	pages        = {2681--2689}
}
@article{azizi2021big,
	title        = {Big self-supervised models advance medical image classification},
	author       = {Shekoofeh Azizi and Basil Mustafa and Fiona Ryan and Zachary Beaver and Jan Freyberg and Jonathan Deaton and Aaron Loh and Alan Karthikesalingam and Simon Kornblith and Ting Chen and others},
	year         = 2021,
	journal      = {arXiv preprint arXiv:2101.05224}
}
@article{azizyan2013density,
	title        = {Density-sensitive semisupervised inference},
	author       = {Azizyan, Martin and Singh, Aarti and Wasserman, Larry and others},
	year         = 2013,
	journal      = {The Annals of Statistics},
	publisher    = {Institute of Mathematical Statistics},
	volume       = 41,
	number       = 2,
	pages        = {751--771}
}
@article{azizzadenesheli2016contextual,
	title        = {Reinforcement Learning in Rich-Observation MDPs using Spectral Methods},
	author       = {Azizzadenesheli, Kamyar and Lazaric, Alessandro and Anandkumar, Animashree},
	year         = 2016,
	journal      = {arXiv preprint arXiv:1611.03907}
}
@article{azizzadenesheli2016reinforcement,
	title        = {Reinforcement learning of POMDPs using spectral methods},
	author       = {Azizzadenesheli, Kamyar and Lazaric, Alessandro and Anandkumar, Animashree},
	year         = 2016,
	journal      = {arXiv preprint arXiv:1602.07764}
}
@inproceedings{azizzadenesheli2019reglabel,
	title        = {Regularized Learning for Domain Adaptation under Label Shifts},
	author       = {Kamyar Azizzadenesheli and Anqi Liu and Fanny Yang and Animashree Anandkumar},
	year         = 2019,
	booktitle    = {International Conference on Learning Representations (ICLR)}
}
@article{azzalini2012some,
	title        = {Some properties of skew-symmetric distributions},
	author       = {Adelchi Azzalini and Giuliana Regoli},
	year         = 2012,
	journal      = {Annals of the Institute of Statistical Mathematics},
	volume       = 64,
	number       = 4,
	pages        = {857--879}
}
@article{b94,
	title        = {Approximation and estimation bounds for artificial neural networks},
	author       = {Barron, Andrew R},
	year         = 1994,
	journal      = {Machine learning},
	publisher    = {Springer},
	volume       = 14,
	number       = 1,
	pages        = {115--133}
}
@inproceedings{ba2013adaptive,
	title        = {Adaptive dropout for training deep neural networks},
	author       = {Jimmy Ba and Brendan Frey},
	year         = 2013,
	booktitle    = {Advances in Neural Information Processing Systems (NeurIPS)},
	pages        = {3084--3092}
}
@inproceedings{ba2015multiple,
	title        = {Multiple object recognition with visual attention},
	author       = {Jimmy Ba and Volodymyr Mnih and Koray Kavukcuoglu},
	year         = 2015,
	booktitle    = {International Conference on Learning Representations (ICLR)}
}
@book{baader2003description,
	title        = {The description logic handbook: theory, implementation, and applications},
	author       = {Franz Baader},
	year         = 2003,
	publisher    = {Cambridge University Press}
}
@inproceedings{baarslag2016negotiation,
	title        = {Negotiation as an Interaction Mechanism for Deciding App Permissions},
	author       = {Tim Baarslag and Alper T. Alan and Richard C. Gomer and Ilaria Liccardi and Helia Marreiros and Enrico Gerding and M. C. Schraefel},
	year         = 2016,
	booktitle    = {Conference on Human Factors in Computing Systems (CHI)}
}
@inproceedings{babaioff2009characterizing,
	title        = {Characterizing truthful multi-armed bandit mechanisms},
	author       = {Babaioff, Moshe and Sharma, Yogeshwer and Slivkins, Aleksandrs},
	year         = 2009,
	booktitle    = {Proceedings of the 10th ACM conference on Electronic commerce},
	pages        = {79--88},
	organization = {ACM}
}
@inproceedings{babenko2009visual,
	title        = {Visual tracking with online multiple instance learning},
	author       = {Boris Babenko and Ming-Hsuan Yang and Serge Belongie},
	year         = 2009,
	booktitle    = {Computer Vision and Pattern Recognition (CVPR)},
	pages        = {983--990}
}
@inproceedings{bacchus1996rewarding,
	title        = {Rewarding behaviors},
	author       = {Bacchus, Fahiem and Boutilier, Craig and Grove, Adam},
	year         = 1996,
	booktitle    = {Proceedings of the National Conference on Artificial Intelligence},
	pages        = {1160--1167}
}
@inproceedings{bach17structure,
	title        = {Learning the Structure of Generative Models without Labeled Data},
	author       = {Bach, Stephen H. and He, Bryan and Ratner, Alexander and R'e, Christopher},
	year         = 2017,
	booktitle    = {International Conference on Machine Learning (ICML)}
}
@article{bach2010self,
	title        = {Self-concordant analysis for logistic regression},
	author       = {Francis Bach and others},
	year         = 2010,
	journal      = {Electronic Journal of Statistics},
	volume       = 4,
	pages        = {384--414}
}
@inproceedings{bach2010structured,
	title        = {Structured sparsity-inducing norms through submodular functions},
	author       = {Francis R. Bach},
	year         = 2010,
	booktitle    = {Advances in Neural Information Processing Systems (NeurIPS)},
	pages        = {118--126}
}
@article{bachman2019learning,
	title        = {Learning representations by maximizing mutual information across views},
	author       = {Bachman, Philip and Hjelm, R Devon and Buchwalter, William},
	year         = 2019,
	journal      = {arXiv preprint arXiv:1906.00910}
}
@inproceedings{Bachpaper,
	title        = {A stochastic gradient method with an exponential convergence \_rate for finite training sets},
	author       = {Roux, Nicolas L and Schmidt, Mark and Bach, Francis R},
	year         = 2012,
	booktitle    = {Advances in Neural Information Processing Systems},
	pages        = {2663--2671}
}
@incollection{Backprop,
	title        = {Neurocomputing: foundations of research},
	author       = {Rumelhart, David E. and Hinton, Geoffrey E. and Williams, Ronald J.},
	year         = 1988,
	publisher    = {MIT Press},
	address      = {Cambridge, MA, USA},
	pages        = {696--699},
	isbn         = {0-262-01097-6},
	url          = {http://dl.acm.org/citation.cfm?id=65669.104451},
	editor       = {Anderson, James A. and Rosenfeld, Edward},
	chapter      = {Learning representations by back-propagating errors},
	acmid        = 104451,
	numpages     = 4
}
@inproceedings{bacon2017option,
	title        = {The Option-Critic Architecture},
	author       = {P. Bacon and J. Harb and D. Precup},
	year         = 2017,
	booktitle    = {Association for the Advancement of Artificial Intelligence (AAAI)},
	pages        = {1726--1734}
}
@book{bacsar2008optimal,
	title        = {{H}-infinity optimal control and related minimax design problems: a dynamic game approach},
	author       = {Tamer Ba{\c{s}}ar and Pierre Bernhard},
	year         = 2008,
	publisher    = {Springer Science \& Business Media}
}
@article{bader2008discussion,
	title        = {Discussion tracking in Enron email using {PARAFAC}},
	author       = {Bader, Brett W and Berry, Michael W and Browne, Murray},
	year         = 2008,
	journal      = {Survey of Text Mining II},
	volume       = 1,
	pages        = {147--163}
}
@inproceedings{bader2019getafix,
	title        = {Getafix: Learning to fix bugs automatically},
	author       = {Johannes Bader and Andrew Scott and Michael Pradel and Satish Chandra},
	year         = 2019,
	booktitle    = {Object-Oriented Programming, Systems, Languages, and Applications (OOPSLA)}
}
@article{badgeley2019deep,
	title        = {Deep learning predicts hip fracture using confounding patient and healthcare variables},
	author       = {Marcus A Badgeley and John R Zech and Luke Oakden-Rayner and Benjamin S Glicksberg and Manway Liu and William Gale and Michael V McConnell and Bethany Percha and Thomas M Snyder and Joel T Dudley},
	year         = 2019,
	journal      = {npj Digital Medicine},
	volume       = 2
}
@article{badia2020never,
	title        = {Never Give Up: Learning Directed Exploration Strategies},
	author       = {Adri{\`a} Puigdom{\`e}nech Badia and Pablo Sprechmann and Alex Vitvitskyi and Daniel Guo and Bilal Piot and Steven Kapturowski and Olivier Tieleman and Mart{'\i}n Arjovsky and Alexander Pritzel and Andew Bolt and others},
	year         = 2020,
	journal      = {arXiv preprint arXiv:2002.06038}
}
@inproceedings{Badoiu2002,
	title        = {{Approximate clustering via core-sets}},
	author       = {{B{\u{a}}doiu}, Mihai and {Har-Peled}, Sariel and Indyk, Piotr},
	year         = 2002,
	booktitle    = {Proceedings of the thiry-fourth annual ACM symposium on Theory of computing - STOC '02},
	publisher    = {ACM Press},
	address      = {New York, New York, USA},
	pages        = 250,
	doi          = {10.1145/509907.509947},
	isbn         = 1581134959,
	mendeley-groups = {Algorithms/Computational Geometry}
}
@article{baes2009estimate,
	title        = {Estimate sequence methods: extensions and approximations},
	author       = {Baes, Michel},
	year         = 2009,
	journal      = {Institute for Operations Research, ETH, Z{\"u}rich, Switzerland}
}
@inproceedings{bagnell2004policy,
	title        = {Policy search by dynamic programming},
	author       = {Bagnell, J Andrew and Kakade, Sham M and Schneider, Jeff G and Ng, Andrew Y},
	year         = 2004,
	booktitle    = {Advances in neural information processing systems},
	pages        = {831--838}
}
@inproceedings{bagnell2005robust,
	title        = {Robust supervised learning},
	author       = {J Andrew Bagnell},
	year         = 2005,
	booktitle    = {Proceedings of the 20th national conference on Artificial intelligence-Volume 2},
	pages        = {714--719}
}
@article{bagnoli2005logconcave,
	title        = {Log-concave probability and its applications},
	author       = {Mark Bagnoli and Ted Bergstrom},
	year         = 2005,
	journal      = {Economic Theory},
	volume       = 26,
	pages        = {445--469}
}
@inproceedings{bahdanau2015neural,
	title        = {Neural machine translation by jointly learning to align and translate},
	author       = {Dzmitry Bahdanau and Kyunghyun Cho and Yoshua Bengio},
	year         = 2015,
	booktitle    = {International Conference on Learning Representations (ICLR)}
}
@inproceedings{bahdanau2017actor,
	title        = {An actor-critic algorithm for sequence prediction},
	author       = {Dzmitry Bahdanau and Philemon Brakel and Kelvin Xu and Anirudh Goyal and Ryan Lowe and Joelle Pineau and Aaron Courville and Yoshua Bengio},
	year         = 2017,
	booktitle    = {International Conference on Learning Representations (ICLR)}
}
@inproceedings{bahdanau2019reward,
	title        = {Learning to Understand Goal Specifications by Modelling Reward},
	author       = {Dzmitry Bahdanau and Felix Hill and Jan Leike and Edward Hughes and S. A. Hosseini and Pushmeet Kohli and Edward Grefenstette},
	year         = 2019,
	booktitle    = {International Conference on Learning Representations (ICLR)}
}
@article{bai2019beyond,
	title        = {Beyond Linearization: On Quadratic and Higher-Order Approximation of Wide Neural Networks},
	author       = {Bai, Yu and Lee, Jason D},
	year         = 2020,
	journal      = {International Conference on Learning Representations (ICLR)}
}
@inproceedings{bai2019provably,
	title        = {Provably efficient q-learning with low switching cost},
	author       = {Bai, Yu and Xie, Tengyang and Jiang, Nan and Wang, Yu-Xiang},
	year         = 2019,
	booktitle    = {Advances in Neural Information Processing Systems},
	pages        = {8004--8013}
}
@article{bai2020provable,
	title        = {Provable Self-Play Algorithms for Competitive Reinforcement Learning},
	author       = {Bai, Yu and Jin, Chi},
	year         = 2020,
	journal      = {arXiv preprint arXiv:2002.04017}
}
@article{Bailly11,
	title        = {Quadratic weighted automata: Spectral algorithm and likelihood maximization},
	author       = {R. Bailly},
	year         = 2011,
	journal      = {Journal of Machine Learning Research}
}
@inproceedings{bailly2010spectral,
	title        = {A spectral approach for probabilistic grammatical inference on trees},
	author       = {R. Bailly and A. Habrard and F. Denis},
	year         = 2010,
	booktitle    = {Algorithmic Learning Theory},
	pages        = {74--88}
}
@article{bair2006prediction,
	title        = {Prediction by supervised principal components},
	author       = {Eric Bair and Trevor Hastie and Debashis Paul and Robert Tibshirani},
	year         = 2006,
	journal      = {Journal of the American Statistical Association (JASA)},
	volume       = 101,
	number       = 473,
	pages        = {119--137}
}
@inproceedings{bajcsy2017learning,
	title        = {Learning Robot Objectives from Physical Human Interaction},
	author       = {Andrea Bajcsy and Dylan P. Losey and M. O'Malley and A. Dragan},
	year         = 2017,
	booktitle    = {Conference on Robot Learning (CORL)}
}
@article{bakker03task,
	title        = {Task clustering and gating for {B}ayesian multitask learning},
	author       = {B. Bakker and T. Heskes},
	year         = 2003,
	journal      = {Journal of Machine Learning Research (JMLR)},
	volume       = 4,
	pages        = {83--99}
}
@incollection{bakry1985diffusions,
	title        = {Diffusions hypercontractives},
	author       = {Dominique Bakry and Michel {\'E}mery},
	year         = 1985,
	booktitle    = {S{\'e}minaire de Probabilit{\'e}s XIX 1983/84},
	pages        = {177--206}
}
@inproceedings{balaji2018metareg,
	title        = {Metareg: Towards domain generalization using meta-regularization},
	author       = {Yogesh Balaji and Swami Sankaranarayanan and Rama Chellappa},
	year         = 2018,
	booktitle    = {Advances in Neural Information Processing Systems (NeurIPS)},
	pages        = {998--1008}
}
@article{balakrishnan2016statistical,
	title        = {Statistical guarantees for the EM algorithm: From population to sample-based analysis},
	author       = {Balakrishnan, Sivaraman and Wainwright, Martin J and Yu, Bin},
	year         = 2016,
	journal      = {Annals of Stat},
	publisher    = {Institute of Mathematical Statistics},
	volume       = 45,
	number       = 1,
	pages        = {77--120}
}
@inproceedings{balakrishnan2017computationally,
	title        = {Computationally Efficient Robust Sparse Estimation in High Dimensions},
	author       = {Balakrishnan, Sivaraman and Du, Simon S. and Li, Jerry and Singh, Aarti},
	year         = 2017,
	month        = {07--10 Jul},
	booktitle    = {Proceedings of the 2017 Conference on Learning Theory},
	publisher    = {PMLR},
	series       = {Proceedings of Machine Learning Research},
	volume       = 65,
	pages        = {169--212},
	url          = {http://proceedings.mlr.press/v65/balakrishnan17a.html},
	editor       = {Kale, Satyen and Shamir, Ohad},
	pdf          = {http://proceedings.mlr.press/v65/balakrishnan17a/balakrishnan17a.pdf},
	abstract     = {Many conventional statistical procedures are extremely sensitive to seemingly minor deviations from modeling assumptions. This problem is exacerbated in modern high-dimensional settings, where the problem dimension can grow with and possibly exceed the sample size. We consider the problem of robust estimation of sparse functionals, and provide a computationally and statistically efficient algorithm in the high-dimensional setting. Our theory identifies a unified set of deterministic conditions under which our algorithm guarantees accurate recovery. By further establishing that these deterministic conditions hold with high-probability for a wide range of statistical models, our theory applies to many problems of considerable interest including sparse mean and covariance estimation; sparse linear regression; and sparse generalized linear models. In certain settings, such as the detection and estimation of sparse principal components in the spiked covariance model, our general theory does not yield optimal sample complexity, and we provide a novel algorithm based on the same intuition which is able to take advantage of further structure of the problem to achieve nearly optimal rates.}
}
@article{balakrishnan2017hypothesis,
	title        = {Hypothesis Testing for High-Dimensional Multinomials: A Selective Review},
	author       = {Sivaraman Balakrishnan and Larry Wasserman},
	year         = 2017,
	journal      = {arXiv preprint arXiv:1712.06120}
}
@inproceedings{balakrishnan2017sparse,
	title        = {Computationally Efficient Robust Sparse Estimation in High Dimensions},
	author       = {Sivaraman Balakrishnan and Simon S. Du and Jerry Li and Aarti Singh},
	year         = 2017,
	booktitle    = {Conference on Learning Theory (COLT)},
	pages        = {169--212}
}
@article{balamurugan2016stochastic,
	title        = {Stochastic Variance Reduction Methods for Saddle-Point Problems},
	author       = {Balamurugan, P and Bach, Francis},
	year         = 2016,
	journal      = {arXiv preprint arXiv:1605.06398}
}
@article{balasubramanian2011unsupervised,
	title        = {Unsupervised supervised learning {II}: Margin-based classification without labels},
	author       = {Krishnakumar Balasubramanian and Pinar Donmez and Guy Lebanon},
	year         = 2011,
	journal      = {Journal of Machine Learning Research (JMLR)},
	volume       = 12,
	pages        = {3119--3145}
}
@article{balcan2005co,
	title        = {Co-training and expansion: Towards bridging theory and practice},
	author       = {Balcan, Maria-Florina and Blum, Avrim and Yang, Ke},
	year         = 2005,
	journal      = {Advances in neural information processing systems},
	publisher    = {MIT Press},
	volume       = 17,
	pages        = {89--96}
}
@inproceedings{balcan2007margin,
	title        = {Margin based active learning},
	author       = {Maria-Florina Balcan and Andrei Broder and Tong Zhang},
	year         = 2007,
	booktitle    = {International Conference on Computational Learning Theory}
}
@inproceedings{balcan2008discriminative,
	title        = {A discriminative framework for clustering via similarity functions},
	author       = {Maria-Florina Balcan and Avrim Blum and Santosh Vempala},
	year         = 2008,
	booktitle    = {Symposium on Theory of Computing (STOC)},
	pages        = {671--680}
}
@inproceedings{balcan2009agnostic,
	title        = {Agnostic clustering},
	author       = {Maria Florina Balcan and Heiko R{\"o}glin and Shang-Hua Teng},
	year         = 2009,
	booktitle    = {International Conference on Algorithmic Learning Theory},
	pages        = {384--398}
}
@article{balcan2010discriminative,
	title        = {A discriminative model for semi-supervised learning},
	author       = {Maria-Florina Balcan and Avrim Blum},
	year         = 2010,
	journal      = {Journal of the ACM (JACM)},
	volume       = 57,
	number       = 3
}
@inproceedings{balcan2013active,
	title        = {Active and passive learning of linear separators under log-concave distributions},
	author       = {Maria-Florina Balcan and Phil Long},
	year         = 2013,
	booktitle    = {Conference on Learning Theory (COLT)}
}
@inproceedings{balcan2016improved,
	title        = {An Improved Gap-Dependency Analysis of the Noisy Power Method},
	author       = {Maria-Florina Balcan and Simon Shaolei Du and Yining Wang and Adams Wei Yu},
	year         = 2016,
	month        = {23--26 Jun},
	booktitle    = {29th Annual Conference on Learning Theory},
	publisher    = {PMLR},
	address      = {Columbia University, New York, New York, USA},
	series       = {Proceedings of Machine Learning Research},
	volume       = 49,
	pages        = {284--309},
	url          = {http://proceedings.mlr.press/v49/balcan16a.html},
	editor       = {Vitaly Feldman and Alexander Rakhlin and Ohad Shamir},
	pdf          = {http://proceedings.mlr.press/v49/balcan16a.pdf},
	abstract     = {We consider the \emphnoisy power method algorithm, which has wide applications in machine learning and statistics, especially those related to principal component analysis (PCA) under resource (communication, memory or privacy) constraints. Existing analysis of the noisy power method shows an unsatisfactory dependency over the “consecutive" spectral gap (\sigma_k-\sigma_k+1) of an input data matrix, which could be very small and hence limits the algorithm’s applicability. In this paper, we present a new analysis of the noisy power method that achieves improved gap dependency for both sample complexity and noise tolerance bounds. More specifically, we improve the dependency over (\sigma_k-\sigma_k+1) to dependency over (\sigma_k-\sigma_q+1), where q is an intermediate algorithm parameter and could be much larger than the target rank k. Our proofs are built upon a novel characterization of proximity between two subspaces that differ from canonical angle characterizations analyzed in previous works. Finally, we apply our improved bounds to distributed private PCA and memory-efficient streaming PCA and obtain bounds that are superior to existing results in the literature.}
}
@article{baldi1989neural,
	title        = {Neural networks and principal component analysis: Learning from examples without local minima},
	author       = {Baldi, Pierre and Hornik, Kurt},
	year         = 1989,
	month        = jan,
	journal      = {Neural networks},
	publisher    = {Elsevier},
	address      = {Oxford, UK, UK},
	volume       = 2,
	number       = 1,
	pages        = {53--58},
	doi          = {10.1016/0893-6080(89)90014-2},
	issn         = {0893-6080},
	url          = {http://dx.doi.org/10.1016/0893-6080(89)90014-2},
	issue_date   = 1989,
	numpages     = 6,
	acmid        = 70362
}
@inproceedings{baldi2013understanding,
	title        = {Understanding dropout},
	author       = {Pierre Baldi and Peter J Sadowski},
	year         = 2013,
	booktitle    = {Advances in Neural Information Processing Systems (NeurIPS)},
	pages        = {2814--2822}
}
@article{baldi2014dropout,
	title        = {The dropout learning algorithm},
	author       = {Pierre Baldi and Peter Sadowski},
	year         = 2014,
	journal      = {Artificial intelligence},
	volume       = 210,
	pages        = {78--122}
}
@inproceedings{baldridge02ccg,
	title        = {Coupling {CCG} with Hybrid Logic Dependency Semantics},
	author       = {Jason Baldridge and Geert-Jan M. Kruijff},
	year         = 2002,
	booktitle    = {Association for Computational Linguistics (ACL)},
	pages        = {319--326}
}
@article{balle2014spectral,
	title        = {Spectral learning of weighted automata - a forward-backward perspective},
	author       = {Borja Balle and Xavier Carreras and Franco M. Luque and Ariadna Quattoni},
	year         = 2014,
	journal      = {Machine Learning},
	volume       = 96,
	number       = 1,
	pages        = {33--63}
}
@article{balog2016deepcoder,
	title        = {Deepcoder: Learning to write programs},
	author       = {Matej Balog and Alexander L Gaunt and Marc Brockschmidt and Sebastian Nowozin and Daniel Tarlow},
	year         = 2016,
	journal      = {arXiv preprint arXiv:1611.01989}
}
@inproceedings{Balsubramani2013-incrementalPCA,
	title        = {The fast convergence of incremental pca},
	author       = {Balsubramani, Akshay and Dasgupta, Sanjoy and Freund, Yoav},
	year         = 2013,
	booktitle    = {NIPS},
	pages        = {3174--3182}
}
@inproceedings{balsubramani2015scalable,
	title        = {Scalable semi-supervised aggregation of classifiers},
	author       = {Akshay Balsubramani and Yoav Freund},
	year         = 2015,
	booktitle    = {Advances in Neural Information Processing Systems (NeurIPS)},
	pages        = {1351--1359}
}
@article{balsubramani2016learning,
	title        = {Learning to Abstain from Binary Prediction},
	author       = {Akshay Balsubramani},
	year         = 2016,
	journal      = {arXiv preprint arXiv:1602.08151}
}
@article{baltruvsaitis2017multimodal,
	title        = {Multimodal Machine Learning: A Survey and Taxonomy},
	author       = {Tadas Baltru{\v{s}}aitis and Chaitanya Ahuja and Louis-Philippe Morency},
	year         = 2017,
	journal      = {arXiv preprint arXiv:1705.09406}
}
@inproceedings{balzano2010column,
	title        = {Column subset selection with missing data},
	author       = {Balzano, Laura and Nowak, Robert and Bajwa, Waheed},
	year         = 2010,
	booktitle    = {NIPS Workshop on Low-Rank Methods for Large-Scale Machine Learning},
	volume       = 1,
	organization = {Citeseer}
}
@inproceedings{banarescu2013amr,
	title        = {Abstract Meaning Representation for Sembanking},
	author       = {Laura Banarescu and Claire Bonial Shu Cai and Madalina Georgescu and Kira Griffitt and Ulf Hermjakob and Kevin Knight and Philipp Koehn and Martha Palmer and Nathan Schneider},
	year         = 2013,
	booktitle    = {7th Linguistic Annotation Workshop and Interoperability with Discourse}
}
@inproceedings{bandeira2014multireference,
	title        = {Multireference alignment using semidefinite programming},
	author       = {Bandeira, Afonso S and Charikar, Moses and Singer, Amit and Zhu, Andy},
	year         = 2014,
	booktitle    = {Proceedings of the 5th conference on Innovations in theoretical computer science},
	pages        = {459--470},
	organization = {ACM}
}
@article{bandeira2014sharp,
	title        = {Sharp nonasymptotic bounds on the norm of random matrices with independent entries},
	author       = {Afonso S. Bandeira and Ramon van Handel},
	year         = 2014,
	journal      = {arXiv}
}
@inproceedings{bandeira2016low,
	title        = {On the low-rank approach for semidefinite programs arising in synchronization and community detection},
	author       = {Bandeira, Afonso S and Boumal, Nicolas and Voroninski, Vladislav},
	year         = 2016,
	journal      = {arXiv preprint arXiv:1602.04426},
	booktitle    = {Conference on learning theory},
	pages        = {361--382},
	organization = {PMLR}
}
@article{bandi2018detection,
	title        = {From detection of individual metastases to classification of lymph node status at the patient level: the {CAMELYON17} challenge},
	author       = {Peter Bandi and Oscar Geessink and Quirine Manson and Marcory Van Dijk and Maschenka Balkenhol and Meyke Hermsen and Babak Ehteshami Bejnordi and Byungjae Lee and Kyunghyun Paeng and Aoxiao Zhong and others},
	year         = 2018,
	journal      = {IEEE Transactions on Medical Imaging},
	volume       = 38,
	number       = 2,
	pages        = {550--560}
}
@article{banerjee2005clustering,
	title        = {Clustering with Bregman divergences},
	author       = {Banerjee, Arindam and Merugu, Srujana and Dhillon, Inderjit S. and Ghosh, Joydeep},
	year         = 2005,
	journal      = {The Journal of Machine Learning Research},
	publisher    = {JMLR. org},
	volume       = 6,
	pages        = {1705--1749}
}
@inproceedings{banerjee2005meteor,
	title        = {METEOR: An automatic metric for mt evaluation with improved correlation with human judgments},
	author       = {Satanjeev Banerjee and Alon Lavie},
	year         = 2005,
	booktitle    = {Association for Computational Linguistics (ACL)}
}
@inproceedings{banko2007open,
	title        = {Open Information Extraction from the Web},
	author       = {Michele Banko and Michael J Cafarella and Stephen Soderland and Matthew Broadhead and Oren Etzioni},
	year         = 2007,
	booktitle    = {International Joint Conference on Artificial Intelligence (IJCAI)},
	pages        = {2670--2676}
}
@article{banks2016information,
	title        = {Information-theoretic thresholds for community detection in sparse networks},
	author       = {Jess Banks and Christopher Moore},
	year         = 2016,
	journal      = {arXiv}
}
@inproceedings{bannard2005paraphrasing,
	title        = {Paraphrasing with bilingual parallel corpora},
	author       = {Colin Bannard and Chris Callison-Burch},
	year         = 2005,
	booktitle    = {Association for Computational Linguistics (ACL)},
	pages        = {597--604}
}
@inproceedings{bansal2006automatic,
	title        = {Automatic Generation of Peephole Superoptimizers},
	author       = {Sorav Bansal and Alex Aiken},
	year         = 2006,
	booktitle    = {Architectural Support for Programming Languages and Operating Systems (ASPLOS)}
}
@inproceedings{Bansal2011,
	title        = {{Min-max Graph Partitioning and Small Set Expansion}},
	author       = {Bansal, Nikhil and Feige, Uriel and Krauthgamer, Robert and Makarychev, Konstantin and Nagarajan, Viswanath and Naor, Joseph (Seffi) and Schwartz, Roy},
	year         = 2011,
	month        = oct,
	journal      = {SIAM Journal on Computing},
	booktitle    = {2011 IEEE 52nd Annual Symposium on Foundations of Computer Science},
	publisher    = {IEEE},
	volume       = 43,
	number       = 2,
	pages        = {17--26},
	doi          = {10.1109/FOCS.2011.79},
	isbn         = {978-0-7695-4571-4},
	abstract     = {We study graph partitioning problems from a min-max perspective, in which an input graph on n vertices should be partitioned into k parts, and the objective is to minimize the maximum number of edges leaving a single part. The two main versions we consider are where the k parts need to be of equal-size, and where they must separate a set of k given terminals. We consider a common generalization of these two problems, and design for it an \$O(\backslash sqrt\{\backslash log n\backslash log k\})\$-approximation algorithm. This improves over an \$O(\backslash log\^{}2 n)\$ approximation for the second version, and roughly \$O(k\backslash log n)\$ approximation for the first version that follows from other previous work. We also give an improved O(1)-approximation algorithm for graphs that exclude any fixed minor. Our algorithm uses a new procedure for solving the Small-Set Expansion problem. In this problem, we are given a graph G and the goal is to find a non-empty set \$S\backslash subseteq V\$ of size \$|S| \backslash leq \backslash rho n\$ with minimum edge-expansion. We give an \$O(\backslash sqrt\{\backslash log\{n\}\backslash log\{(1/\backslash rho)\}\})\$ bicriteria approximation algorithm for the general case of Small-Set Expansion, and O(1) approximation algorithm for graphs that exclude any fixed minor.},
	archiveprefix = {arXiv},
	arxivid      = {1110.4319},
	eprint       = {1110.4319},
	file         = {:C$\backslash$:/Users/Zeyuan/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Bansal et al. - 2011 - Min-max Graph Partitioning and Small Set Expansion.pdf:pdf},
	mendeley-groups = {Algorithms/Sparsest Cut,Algorithms/Small Set Expansion,Algorithms/Sparsest Cut/SSE}
}
@inproceedings{bansal2014provable,
	title        = {A provable {SVD}-based algorithm for learning topics in dominant admixture corpus},
	author       = {Trapit Bansal and Chiranjib Bhattacharyya and Ravindran Kannan},
	year         = 2014,
	booktitle    = {Advances in Neural Information Processing Systems (NeurIPS)}
}
@inproceedings{bansal2017hamilton,
	title        = {Hamilton-Jacobi reachability: A brief overview and recent advances},
	author       = {Bansal, Somil and Chen, Mo and Herbert, Sylvia and Tomlin, Claire J},
	year         = 2017,
	booktitle    = {2017 IEEE 56th Annual Conference on Decision and Control (CDC)},
	pages        = {2242--2253},
	organization = {IEEE}
}
@article{bansal2020self,
	title        = {For self-supervised learning, Rationality implies generalization, provably},
	author       = {Bansal, Yamini and Kaplun, Gal and Barak, Boaz},
	year         = 2020,
	journal      = {arXiv preprint arXiv:2010.08508}
}
@article{bao2014approximation,
	title        = {Approximation analysis of convolutional neural networks},
	author       = {Bao, Chenglong and Li, Qianxiao and Shen, Zuowei and Tai, Cheng and Wu, Lei and Xiang, Xueshuang},
	year         = 2014,
	journal      = {work},
	volume       = 65
}
@inproceedings{bao2014qa,
	title        = {Knowledge-based Question Answering as Machine Translation},
	author       = {J. Bao and Nan Duan and Ming Zhou and Tiejun Zhao},
	year         = 2014,
	booktitle    = {Association for Computational Linguistics (ACL)}
}
@inproceedings{bao2016constraint,
	title        = {Constraint-Based Question Answering with Knowledge Graph},
	author       = {Junwei Bao and Nan Duan and Zhao Yan and Ming Zhou and Tiejun Zhao},
	year         = 2016,
	booktitle    = {International Conference on Computational Linguistics (COLING)}
}
@book{bar1964language,
	title        = {Language and Information: Selected Essays on Their Theory and Application},
	author       = {Y Bar-Hillel},
	year         = 1964,
	publisher    = {Addison-Wesley/The Jerusalem Academic Press}
}
@inproceedings{barak2012hypercontractivity,
	title        = {Hypercontractivity, sum-of-squares proofs, and their applications},
	author       = {Boaz Barak and Fernando Brand{\~a}o and Aram Harrow and Jonathan Kelner and David Steurer and Yuan Zhou},
	year         = 2012,
	booktitle    = {Symposium on Theory of Computing (STOC)},
	pages        = {307--326}
}
@article{barak2014,
	title        = {Dictionary Learning and Tensor Decomposition via the Sum-of-Squares Method},
	author       = {Barak, Boaz and Kelner, Jonathan and Steurer, David},
	year         = 2014,
	journal      = {arXiv preprint arXiv:1407.1543}
}
@article{barak2014sum,
	title        = {Sum-of-squares proofs and the quest toward optimal algorithms},
	author       = {Barak, Boaz and Steurer, David},
	year         = 2014,
	journal      = {arXiv preprint arXiv:1404.5236}
}
@inproceedings{barak2016nearly,
	title        = {A nearly tight sum-of-squares lower bound for the planted clique problem},
	author       = {Boaz Barak and Samuel B. Hopkins and Jonathan Kelner and Pravesh Kothari and Ankur Moitra and Aaron Potechin},
	year         = 2016,
	booktitle    = {Foundations of Computer Science (FOCS)},
	pages        = {428--437}
}
@misc{barak2016tutorial,
	title        = {Proofs, beliefs, and algorithms through the lens of sum-of-squares},
	author       = {Boaz Barak and David Steurer},
	year         = 2016,
	howpublished = {\url{https://www.sumofsquares.org/public/index.html}}
}
@inproceedings{baram2017end,
	title        = {End-to-end differentiable adversarial imitation learning},
	author       = {N. Baram and O. Anschel and I. Caspi and S. Mannor},
	year         = 2017,
	booktitle    = {International Conference on Machine Learning (ICML)},
	pages        = {390--399}
}
@article{baraniuk2008simple,
	title        = {A simple proof of the restricted isometry property for random matrices},
	author       = {Baraniuk, Richard and Davenport, Mark and DeVore, Ronald and Wakin, Michael},
	year         = 2008,
	journal      = {Constructive Approximation},
	publisher    = {Springer},
	volume       = 28,
	number       = 3,
	pages        = {253--263}
}
@article{barany2012notes,
	title        = {Notes about the {C}arath{\'e}odory number},
	author       = {Imre B{\'a}r{\'a}ny and Roman Karasev},
	year         = 2012,
	journal      = {Discrete \& Computational Geometry},
	volume       = 48,
	number       = 3,
	pages        = {783--792}
}
@inproceedings{barber2003algorithm,
	title        = {The {IM} algorithm: a variational approach to information maximization},
	author       = {David Barber and Felix V Agakov},
	year         = 2003,
	booktitle    = {Advances in neural information processing systems}
}
@article{barber2019conformal,
	title        = {Conformal prediction under covariate shift},
	author       = {Barber, Rina Foygel and Candes, Emmanuel J and Ramdas, Aaditya and Tibshirani, Ryan J},
	year         = 2019,
	journal      = {arXiv preprint arXiv:1904.06019}
}
@article{barber2019limits,
	title        = {The limits of distribution-free conditional predictive inference},
	author       = {Barber, Rina Foygel and Candes, Emmanuel J and Ramdas, Aaditya and Tibshirani, Ryan J},
	year         = 2019,
	journal      = {arXiv preprint arXiv:1903.04684}
}
@article{barber2019predictive,
	title        = {Predictive inference with the jackknife+},
	author       = {Barber, Rina Foygel and Candes, Emmanuel J and Ramdas, Aaditya and Tibshirani, Ryan J},
	year         = 2019,
	journal      = {arXiv preprint arXiv:1905.02928}
}
@article{barbu05swendsen,
	title        = {Generalizing {S}wendsen-{W}ang to sampling arbitrary posterior probabilities},
	author       = {A. Barbu and S. C. Zhu},
	year         = 2005,
	journal      = {IEEE Transactions on Pattern Analysis and Machine Intelligence (PAMI)},
	volume       = 27,
	pages        = {1239--1253}
}
@article{barbu2009training,
	title        = {Training an active random field for real-time image denoising},
	author       = {Adrian Barbu},
	year         = 2009,
	journal      = {IEEE Transactions on Image Processing},
	volume       = 18,
	number       = 11,
	pages        = {2451--2462}
}
@inproceedings{barbu2019objectnet,
	title        = {Objectnet: A large-scale bias-controlled dataset for pushing the limits of object recognition models},
	author       = {Andrei Barbu and David Mayo and Julian Alverio and William Luo and Christopher Wang and Dan Gutfreund and Josh Tenenbaum and Boris Katz},
	year         = 2019,
	booktitle    = {Advances in Neural Information Processing Systems (NeurIPS)},
	pages        = {9453--9463}
}
@inproceedings{barcelo11grpah,
	title        = {Querying Graph Patterns},
	author       = {Pablo Barcelo and Leonid Libkin and Juan Reutter},
	year         = 2011,
	booktitle    = {Symposium on Principles of Database Systems}
}
@article{bard1991some,
	title        = {Some properties of the bilevel programming problem},
	author       = {Jonathan F Bard},
	year         = 1991,
	journal      = {Journal of optimization theory and applications},
	volume       = 68,
	number       = 2,
	pages        = {371--378}
}
@book{bard1999,
	title        = {Practical Bilevel Optimization: Algorithms and Applications},
	author       = {Jonathan F. Bard},
	year         = 1999,
	publisher    = {Springer}
}
@article{bardes2021vicreg,
	title        = {VICReg: Variance-Invariance-Covariance Regularization for Self-Supervised Learning},
	author       = {Bardes, Adrien and Ponce, Jean and LeCun, Yann},
	year         = 2021,
	journal      = {arXiv preprint arXiv:2105.04906}
}
@article{bardet2018functional,
	title        = {Functional inequalities for {G}aussian convolutions of compactly supported measures: explicit bounds and dimension dependence},
	author       = {Jean-Baptiste Bardet and Natha{\"e}l Gozlan and Florent Malrieu and Pierre-Andr{\'e} Zitt},
	year         = 2018,
	journal      = {Bernoulli},
	volume       = 24,
	pages        = {333--353}
}
@inproceedings{barhaim2008,
	title        = {Efficient Semantic Deduction and Approximate Matching over Compact Parse Forests},
	author       = {Roy Bar-Haim and Jonathan Berant and Ido Dagan and Iddo Greental and Shachar Mirkin and Eyal Shnarch and Idan Szpektor},
	year         = {2008 2008},
	booktitle    = {Text Analysis Conference}
}
@inproceedings{barhaim2009forest,
	title        = {A Compact Forest for Scalable Inference over Entailment and Paraphrase Rules},
	author       = {Roy Bar-Haim and Jonathan Berant and Ido Dagan},
	year         = 2009,
	booktitle    = {Empirical Methods in Natural Language Processing (EMNLP)}
}
@article{barker02continuations,
	title        = {Continuations and the nature of quantification},
	author       = {Chris Barker},
	year         = 2002,
	journal      = {Natural Language Semantics},
	volume       = 10,
	pages        = {211--242}
}
@article{barocas2016,
	title        = {Big Data's Disparate Impact},
	author       = {Solon Barocas and Andrew D. Selbst},
	year         = 2016,
	journal      = {104 California Law Review},
	volume       = 3,
	pages        = {671--732}
}
@inproceedings{baroni2010nouns,
	title        = {Nouns are vectors, adjectives are matrices: Representing adjective-noun constructions in semantic space},
	author       = {Marco Baroni and Roberto Zamparelli},
	year         = 2010,
	booktitle    = {Empirical Methods in Natural Language Processing (EMNLP)},
	pages        = {1183--1193}
}
@article{barreno2010security,
	title        = {The security of machine learning},
	author       = {Marco Barreno and Blaine Nelson and Anthony D. Joseph and J. D. Tygar},
	year         = 2010,
	journal      = {Machine Learning},
	volume       = 81,
	number       = 2,
	pages        = {121--148}
}
@article{barreto2011computing,
	title        = {Computing the stationary distribution of a finite Markov chain through stochastic factorization},
	author       = {Barreto, Andr{\'e} MS and Fragoso, Marcelo D},
	year         = 2011,
	journal      = {SIAM Journal on Matrix Analysis and Applications},
	publisher    = {SIAM}
}
@inproceedings{barreto2011reinforcement,
	title        = {Reinforcement learning using kernel-based stochastic factorization},
	author       = {Barreto, Andre and Precup, Doina and Pineau, Joelle},
	year         = 2011,
	booktitle    = {Advances in Neural Information Processing Systems}
}
@article{barreto2014policy,
	title        = {Policy iteration based on stochastic factorization},
	author       = {Barreto, Andr\'e M. S. and Pineau, Joelle and Precup, Doina},
	year         = 2014,
	journal      = {J. Artificial Intelligence Res.},
	volume       = 50,
	pages        = {763--803},
	issn         = {1076-9757},
	fjournal     = {Journal of Artificial Intelligence Research},
	mrclass      = {90C40 (68T20 90C39)},
	mrnumber     = 3254852,
	mrreviewer   = {Masayuki Horiguchi}
}
@inproceedings{barrio2016comprehension,
	title        = {Improving the Comprehension of Numbers in the News},
	author       = {Pablo J. Barrio and Daniel G. Goldstein and Jake M. Hofman},
	year         = 2016,
	booktitle    = {Conference on Human Factors in Computing Systems (CHI)}
}
@article{barron1993universal,
	title        = {Universal approximation bounds for superpositions of a sigmoidal function},
	author       = {Barron, Andrew R},
	year         = 1993,
	journal      = {IEEE Transactions on Information theory},
	publisher    = {IEEE},
	volume       = 39,
	number       = 3,
	pages        = {930--945}
}
@book{barroso2009datacenter,
	title        = {
		The Datacenter as a Computer: An Introduction to the Design of Warehouse-Scale

		Machines
	},
	author       = {Barroso, Luiz A. and H\"{o}lzle, Urs},
	year         = 2009,
	publisher    = {Morgan and Claypool Publishers},
	isbn         = {159829556X, 9781598295566},
	edition      = {1st},
	abstract     = {
		As computation continues to move into the cloud, the computing platform

		of interest no longer re- sembles a pizza box or a refrigerator,

		but a warehouse full of computers. These new large datacenters are

		quite different from traditional hosting facilities of earlier times

		and cannot be viewed simply as a collection of co-located servers.

		Large portions of the hardware and software resources in these facilities

		must work in concert to efficiently deliver good levels of Internet

		service performance, something that can only be achieved by a holistic

		approach to their design and deployment. In other words, we must

		treat the datacenter itself as one massive warehouse-scale computer

		(WSC). We describe the architecture of WSCs, the main factors influencing

		their design, operation, and cost structure, and the characteristics

		of their software base. We hope it will be useful to architects and

		programmers of today's WSCs, as well as those of future many-core

		platforms which may one day implement the equivalent of today's WSCs

		on a single board.
	},
	comment      = {
		Pretty extensive description of the reasons behind scaling out vs.

		scaling up with commodity hardware and the resulting implications.
	},
	keywords     = {datacenter, google},
	myurl        = {http://www.morganclaypool.com/doi/abs/10.2200/S00193ED1V01Y200905CAC006}
}
@inproceedings{barry2013manipulation,
	title        = {Manipulation with multiple action types},
	author       = {J. Barry and K. Hsiao and L. P. Kaelbling and T. Lozano-P{'e}rez},
	year         = 2013,
	booktitle    = {Expermental Robotics},
	pages        = {531--545}
}
@inproceedings{BartalByersRaz1997,
	title        = {{Global optimization using local information with applications to flow control}},
	author       = {Bartal, Yair and Byers, John W. and Raz, Danny},
	year         = 1997,
	booktitle    = {Proceedings 38th Annual Symposium on Foundations of Computer Science},
	publisher    = {IEEE Comput. Soc},
	pages        = {303--312},
	doi          = {10.1109/SFCS.1997.646119},
	isbn         = {0-8186-8197-7},
	file         = {:C$\backslash$:/Users/Zeyuan/Documents/Mendeley Desktop/Bartal, Byers, Raz - 1997 - Global optimization using local information with applications to flow control.pdf:pdf},
	mendeley-groups = {Algorithms/Multiplicative Weight/LP}
}
@article{BartalByersRaz2004,
	title        = {{Fast, Distributed Approximation Algorithms for Positive Linear Programming with Applications to Flow Control}},
	author       = {Bartal, Yair and Byers, John W. and Raz, Danny},
	year         = 2004,
	month        = jan,
	journal      = {SIAM Journal on Computing},
	volume       = 33,
	number       = 6,
	pages        = {1261--1279},
	doi          = {10.1137/S0097539700379383},
	issn         = {0097-5397},
	file         = {:C$\backslash$:/Users/Zeyuan/Documents/Mendeley Desktop/Bartal, Byers, Raz - 2004 - Fast, Distributed Approximation Algorithms for Positive Linear Programming with Applications to Flow Control.pdf:pdf},
	keywords     = {1,10,1137,68w15,68w25,ams subject classifications,approximation algorithm,doi,environment must make decisions,flow control,introduction,linear programming,primal-dual,processors in a distributed,s0097539700379383},
	mendeley-groups = {Algorithms/Multiplicative Weight/LP}
}
@inproceedings{bartlett01rademacher,
	title        = {{R}ademacher and {G}aussian complexities: Risk bounds and structural results},
	author       = {P. L. Bartlett and S. Mendelson},
	year         = 2001,
	booktitle    = {Conference on Learning Theory (COLT)},
	pages        = {224--240}
}
@article{bartlett05local,
	title        = {Local {R}ademacher complexities},
	author       = {Peter L. Bartlett and Olivier Bousquet and Shahar Mendelson},
	year         = 2005,
	journal      = {Annals of Statistics},
	volume       = 33,
	number       = 4,
	pages        = {1497--1537}
}
@inproceedings{bartlett1992learning,
	title        = {Learning with a slowly changing distribution},
	author       = {Peter L. Bartlett},
	year         = 1992,
	booktitle    = {Conference on Learning Theory (COLT)}
}
@article{bartlett1996learning,
	title        = {Learning Changing Concepts by Exploiting the Structure of Change},
	author       = {Peter L. Bartlett and Shai Ben-David and Sanjeev R. Kulkarni},
	year         = 1996,
	journal      = {Machine Learning},
	volume       = 41
}
@article{bartlett2002rademacher,
	title        = {Rademacher and Gaussian complexities: Risk bounds and structural results},
	author       = {Bartlett, Peter L and Mendelson, Shahar},
	year         = 2002,
	journal      = {Journal of Machine Learning Research},
	volume       = 3,
	number       = {Nov},
	pages        = {463--482}
}
@article{bartlett2008classification,
	title        = {Classification with a reject option using a hinge loss},
	author       = {Peter L Bartlett and Marten H Wegkamp},
	year         = 2008,
	journal      = {Journal of Machine Learning Research (JMLR)},
	volume       = 9,
	number       = {0},
	pages        = {1823--1840}
}
@article{bartlett2008high,
	title        = {High-probability regret bounds for bandit online linear optimization},
	author       = {Bartlett, Peter L and Dani, Varsha and Hayes, Thomas and Kakade, Sham and Rakhlin, Alexander and Tewari, Ambuj},
	year         = 2008,
	booktitle    = {COLT 2008},
	file         = {:D$\backslash$:/Mendeley Desktop/Bartlett et al. - 2008 - High-probability regret bounds for bandit online linear optimization.pdf:pdf},
	mendeley-groups = {Optimization/Bandit}
}
@inproceedings{bartlett2009regal,
	title        = {REGAL: a regularization based algorithm for reinforcement learning in weakly communicating MDPs},
	author       = {Bartlett, Peter L and Tewari, Ambuj},
	year         = 2009,
	journal      = {arXiv preprint arXiv:1205.2661},
	booktitle    = {Proceedings of the 25th Conference on Uncertainty in Artificial Intelligence (UAI 2009))}
}
@inproceedings{bartlett2017spectral,
	title        = {Spectrally-normalized margin bounds for neural networks},
	author       = {Peter Bartlett and Dylan J. Foster and Matus Telgarsky},
	year         = 2017,
	booktitle    = {Advances in Neural Information Processing Systems (NeurIPS)}
}
@article{bartlett2017spectrally,
	title        = {Spectrally-normalized margin bounds for neural networks},
	author       = {Bartlett, Peter and Foster, Dylan J and Telgarsky, Matus},
	year         = 2017,
	journal      = {arXiv preprint arXiv:1706.08498}
}
@article{bartlett2019benign,
	title        = {Benign Overfitting in Linear Regression},
	author       = {Peter L. Bartlett and Philip M. Long and G´abor Lugosi and Alexander Tsigler},
	year         = 2019,
	journal      = {arXiv}
}
@article{bartlett2019nearly,
	title        = {Nearly-tight VC-dimension and pseudodimension bounds for piecewise linear neural networks},
	author       = {Bartlett, Peter L and Harvey, Nick and Liaw, Christopher and Mehrabian, Abbas},
	year         = 2019,
	journal      = {The Journal of Machine Learning Research},
	publisher    = {JMLR. org},
	volume       = 20,
	number       = 1,
	pages        = {2285--2301}
}
@article{bartlett53approximate,
	title        = {Approximate confidence intervals. {II}. {M}ore than one unknown parameter},
	author       = {M. S. Bartlett},
	year         = 1953,
	journal      = {Biometrika},
	volume       = 40,
	pages        = {306--317}
}
@book{barto1998reinforcement,
	title        = {Reinforcement learning: An introduction},
	author       = {Barto, Andrew G},
	year         = 1998,
	publisher    = {MIT press}
}
@article{barvinok95problems,
	title        = {Problems of Distance Geometry and Convex Properties of Quadratic Maps},
	author       = {A. I. Barvinok},
	year         = 1995,
	journal      = {Discrete \& Computational Geometry},
	volume       = 13,
	pages        = {189--202}
}
@inproceedings{barzilay04content,
	title        = {Catching the Drift: Probabilistic Content Models, with Applications to Generation and Summarization},
	author       = {Regina Barzilay and Lillian Lee},
	year         = 2004,
	booktitle    = {Human Language Technology and North American Association for Computational Linguistics (HLT/NAACL)}
}
@inproceedings{barzilay05content,
	title        = {Collective Content Selection for Concept-To-Text Generation},
	author       = {Regina Barzilay and Mirella Lapata},
	year         = 2005,
	booktitle    = {Human Language Technology and Empirical Methods in Natural Language Processing (HLT/EMNLP)},
	pages        = {331--338}
}
@inproceedings{barzilay06aggregation,
	title        = {Aggregation via Set Partitioning for Natural Language Generation},
	author       = {Regina Barzilay and Mirella Lapata},
	year         = 2006,
	booktitle    = {North American Association for Computational Linguistics (NAACL)}
}
@article{barzilay08coherence,
	title        = {Modeling Local Coherence: An Entity-based Approach},
	author       = {Regina Barzilay and Mirella Lapata},
	year         = 2008,
	journal      = {Computational Linguistics},
	volume       = 34,
	pages        = {1--34}
}
@inproceedings{barzilay2003learning,
	title        = {Learning to paraphrase: An unsupervised approach using multiple-sequence alignment},
	author       = {Regina Barzilay and Lillian Lee},
	year         = 2003,
	booktitle    = {Human Language Technology and North American Association for Computational Linguistics (HLT/NAACL)},
	pages        = {16--23}
}
@inproceedings{bash2007cool,
	title        = {
		Cool job allocation: measuring the power savings of placing jobs

		at cooling-efficient locations in the data center
	},
	author       = {Bash, Cullen and Forman, George},
	year         = 2007,
	booktitle    = {
		2007 USENIX Annual Technical Conference on Proceedings of the USENIX

		Annual Technical Conference
	},
	location     = {Santa Clara, CA},
	publisher    = {USENIX Association},
	address      = {Berkeley, CA, USA},
	pages        = {29:1--29:6},
	isbn         = {999-8888-77-6},
	acmid        = 1364414,
	articleno    = 29,
	myurl        = {http://dl.acm.org/citation.cfm?id=1364385.1364414},
	numpages     = 6
}
@article{basseville1988detecting,
	title        = {Detecting changes in signals and systems--A survey},
	author       = {Mich{\`e}le Basseville},
	year         = 1988,
	journal      = {Automatica},
	volume       = 24,
	number       = 3,
	pages        = {309--326}
}
@article{bassiri2011interactional,
	title        = {Interactional feedback and the impact of attitude and motivation on noticing l2 form},
	author       = {Mohammad Amin Bassiri},
	year         = 2011,
	journal      = {English Language and Literature Studies},
	volume       = 1,
	number       = 2,
	pages        = {61--73}
}
@inproceedings{bastani2016measuring,
	title        = {Measuring neural net robustness with constraints},
	author       = {Osbert Bastani and Yani Ioannou and Leonidas Lampropoulos and Dimitrios Vytiniotis and Aditya Nori and Antonio Criminisi},
	year         = 2016,
	booktitle    = {Advances in Neural Information Processing Systems (NeurIPS)},
	pages        = {2613--2621}
}
@inproceedings{bastani2017synthesizing,
	title        = {Synthesizing Program Input Grammars},
	author       = {Osbert Bastani and Rahul Sharma and Alex Aiken and Percy Liang},
	year         = 2017,
	booktitle    = {Programming Language Design and Implementation (PLDI)}
}
@inproceedings{bastani2018active,
	title        = {Active Learning of Points-To Specifications},
	author       = {Osbert Bastani and Rahul Sharma and Alex Aiken and Percy Liang},
	year         = 2018,
	booktitle    = {Programming Language Design and Implementation (PLDI)}
}
@article{batson2012twice,
	title        = {Twice-ramanujan sparsifiers},
	author       = {Batson, Joshua and Spielman, Daniel A and Srivastava, Nikhil},
	year         = 2012,
	month        = may,
	journal      = {SIAM Journal on Computing},
	publisher    = {SIAM},
	address      = {New York, New York, USA},
	volume       = 41,
	number       = 6,
	pages        = {1704--1721},
	doi          = {10.1137/130949117},
	isbn         = 9781605585062,
	issn         = {0036-1445},
	abstract     = {We prove that every graph has a spectral sparsifier with a number of edges linear in its number of vertices. As linear-sized spectral sparsifiers of complete graphs are expanders, our sparsifiers of arbitrary graphs can be viewed as generalizations of expander graphs. In particular, we prove that for every \$d>1\$ and every undirected, weighted graph \$G=(V,E,w)\$ on \$n\$ vertices, there exists a weighted graph \$H=(V,F,\backslash tilde\{w\})\$ with at most \$\backslash ceil\{d(n-1)\}\$ edges such that for every \$x \backslash in \backslash R\^{}\{V\}\$, $\backslash$[ x\^{}\{T\}L\_\{G\}x $\backslash$leq x\^{}\{T\}L\_\{H\}x $\backslash$leq ($\backslash$frac\{d+1+2$\backslash$sqrt\{d\}\}\{d+1-2$\backslash$sqrt\{d\}\})$\backslash$cdot x\^{}\{T\}L\_\{G\}x $\backslash$] where \$L\_\{G\}\$ and \$L\_\{H\}\$ are the Laplacian matrices of \$G\$ and \$H\$, respectively. Thus, \$H\$ approximates \$G\$ spectrally at least as well as a Ramanujan expander with \$dn/2\$ edges approximates the complete graph. We give an elementary deterministic polynomial time algorithm for constructing \$H\$.},
	archiveprefix = {arXiv},
	arxivid      = {0808.0163},
	eprint       = {0808.0163},
	file         = {:C$\backslash$:/Users/Zeyuan/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Batson, Spielman, Srivastava - 2009 - Twice-\{R\}amanujan Sparsifiers.pdf:pdf},
	mendeley-groups = {Algorithms/Sparsification}
}
@inproceedings{bau2017network,
	title        = {Network dissection: Quantifying interpretability of deep visual representations},
	author       = {David Bau and Bolei Zhou and Aditya Khosla and Aude Oliva and Antonio Torralba},
	year         = 2017,
	booktitle    = {Computer Vision and Pattern Recognition (CVPR)},
	pages        = {6541--6549}
}
@phdthesis{Bau96,
	title        = {Projection Algorithms and Monotone Operators},
	author       = {Bauschke, Heinz H.},
	year         = 1996,
	address      = {Simon Fraser University},
	isbn         = {0-612-16789-5},
	advisor      = {Borwein, Jonathan M.}
}
@article{baum1970maximization,
	title        = {
		A Maximization Technique Occurring in the Statistical Analysis of

		Probabilistic Functions of {M}arkov Chains
	},
	author       = {Baum, Leonard E. and Petrie, Ted and Soules, George and Weiss, Norman},
	year         = 1970,
	journal      = {The Annals of Mathematical Statistics},
	publisher    = {Institute of Mathematical Statistics},
	volume       = 41,
	number       = 1,
	pages        = {164--171},
	issn         = {00034851},
	copyright    = {Copyright ? 1970 Institute of Mathematical Statistics},
	jstor_formatteddate = {Feb., 1970},
	language     = {English},
	myurl        = {http://www.jstor.org/stable/2239727}
}
@article{baum1990polynomial,
	title        = {A polynomial time algorithm that learns two hidden unit nets},
	author       =
Download .txt
gitextract_7v14hukr/

├── .gitignore
├── Templates/
│   ├── macros.tex
│   ├── master.tex
│   ├── template.tex
│   ├── yoursunetID.tex
│   └── yoursunetID2.tex
└── tex/
    ├── all.bib
    ├── bibliography.bib
    ├── collection/
    │   ├── 01supervised.tex
    │   ├── 02-01-2021.tex
    │   ├── 02asymptotics.tex
    │   ├── 03concentration.tex
    │   ├── 04-01-uniform.tex
    │   ├── 04-02-uniform.tex
    │   ├── 04-03-uniform.tex
    │   ├── 05-01-concrete-models.tex
    │   ├── 05-02-concrete-models.tex
    │   ├── 05-03-deep-nets.tex
    │   ├── 06-dltheory.tex
    │   ├── 07-01-nonconvex.tex
    │   ├── 07-02-nonconvex.tex
    │   ├── 07-03-nonconvex.tex
    │   ├── 07-03-ntk.tex
    │   ├── 07-05-ntk-limitation.tex
    │   ├── 08-01-algorithmic.tex
    │   ├── 08-02-algorithmic.tex
    │   ├── 08-03-algorithmic-new.tex
    │   ├── 08-03-algorithmic.tex
    │   ├── 09-01-data-dependent.tex
    │   ├── 09-01-unsupervised.tex
    │   ├── 10-01-online.tex
    │   └── 10-02-online.tex
    ├── figures/
    │   ├── chaining_figures.pptx
    │   ├── figures.pptx
    │   ├── multilayer_covering.pptx
    │   └── ntk-1d.py
    ├── macros.tex
    └── master.tex
Download .txt
SYMBOL INDEX (1 symbols across 1 files)

FILE: tex/figures/ntk-1d.py
  function fun (line 11) | def fun(alpha, i):
Condensed preview — 38 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (3,016K chars).
[
  {
    "path": ".gitignore",
    "chars": 2722,
    "preview": "## Core latex/pdflatex auxiliary files:\n*.aux\n*.lof\n*.log\n*.lot\n*.fls\n*.out\n*.toc\n*.fmt\n*.fot\n*.cb\n*.cb2\n.*.lb\n\n## Inter"
  },
  {
    "path": "Templates/macros.tex",
    "chars": 2174,
    "preview": "\\usepackage{color}\n\\usepackage{lipsum}\n\n\n\n\\ifnum\\lectureformat=1\n\\newcommand{\\metadata}[3]\n{\n\t\\newpage\n\t\n\t\\def\\lectureID"
  },
  {
    "path": "Templates/master.tex",
    "chars": 3317,
    "preview": "%% filename: amsbook-template.tex\n%% version: 1.1\n%% date: 2014/07/24\n%%\n%% American Mathematical Society\n%% Technical S"
  },
  {
    "path": "Templates/template.tex",
    "chars": 1768,
    "preview": "\t\\documentclass[11pt]{book}\n\t\n\t\\usepackage{amsfonts,amsthm, bm,amsmath, bbm,amssymb,mathtools}\n\t\\usepackage{fullpage}\n\t\n"
  },
  {
    "path": "Templates/yoursunetID.tex",
    "chars": 1191,
    "preview": "%\\newcommand{\\Exp}{\\mathbb{E}}\n\n% reset section counter\n\\setcounter{section}{0}\n\n%\\metadata{lecture ID}{Your names}{date"
  },
  {
    "path": "Templates/yoursunetID2.tex",
    "chars": 986,
    "preview": "%\\newcommand{\\Exp}{\\mathbb{E}}\n\n% reset section counter\n\\setcounter{section}{0}\n\n\\metadata{2}{Mary and Alex}{Jan 3rd, 20"
  },
  {
    "path": "tex/all.bib",
    "chars": 2315248,
    "preview": "@inproceedings{chung2007four,\n\ttitle={Four proofs for the Cheeger inequality and graph partition algorithms},\n\tauthor={C"
  },
  {
    "path": "tex/bibliography.bib",
    "chars": 6324,
    "preview": "@inproceedings{arora2005fast,\n  title={Fast algorithms for approximate semidefinite programming using the multiplicative"
  },
  {
    "path": "tex/collection/01supervised.tex",
    "chars": 7175,
    "preview": "% reset section counter\n\n\\setcounter{section}{0}\n\n\\metadata{1}{Anusri Pampari and Gabriel Poesia}{Jan 11th, 2021}\n\n\nIn t"
  },
  {
    "path": "tex/collection/02-01-2021.tex",
    "chars": 8928,
    "preview": "% reset section counter\n%\\setcounter{section}{0}\n\n%\\metadata{lecture ID}{Your names}{date}\n\\metadata{6}{Daniel Do}{Febru"
  },
  {
    "path": "tex/collection/02asymptotics.tex",
    "chars": 18597,
    "preview": "% reset section counter\n\\setcounter{section}{0}\n\n%\\metadata{lecture ID}{Your names}{date}\n\\metadata{2}{Alexander Ke and "
  },
  {
    "path": "tex/collection/03concentration.tex",
    "chars": 25802,
    "preview": "% reset section counter\n\\setcounter{section}{0}\n\n%\\metadata{lecture ID}{Your names}{date}\n\\metadata{3}{Brad Ross and Rob"
  },
  {
    "path": "tex/collection/04-01-uniform.tex",
    "chars": 24406,
    "preview": "% reset section counter\n\\setcounter{section}{0}\n\n%\\metadata{lecture ID}{Your names}{date}\n\\metadata{4}{Yizhou Qian}{Jan "
  },
  {
    "path": "tex/collection/04-02-uniform.tex",
    "chars": 21633,
    "preview": "% reset section counter\n%\\setcounter{section}{0}\n\n%\\metadata{lecture ID}{Your names}{date}\n\\metadata{5}{Will Song}{Jan 2"
  },
  {
    "path": "tex/collection/04-03-uniform.tex",
    "chars": 31563,
    "preview": " % reset section counter\n%\\setcounter{section}{0}\n\\metadata{8}{David Lin and Jinhui Wang}{Feb.~8th, 2021}\n\n\\sec{Covering"
  },
  {
    "path": "tex/collection/05-01-concrete-models.tex",
    "chars": 9738,
    "preview": "% reset section counter\n%\\setcounter{section}{0}\n\n%\\metadata{lecture ID}{Your names}{date}\n\\metadata{6}{Daniel Do}{Febru"
  },
  {
    "path": "tex/collection/05-02-concrete-models.tex",
    "chars": 37296,
    "preview": "% reset section counter\n%\\setcounter{section}{0}\n\n%\\metadata{lecture ID}{Your names}{date}\n\\metadata{7}{Spencer M.~Richa"
  },
  {
    "path": "tex/collection/05-03-deep-nets.tex",
    "chars": 44747,
    "preview": "\\sec{Deep neural nets (via covering number)}\\label{sec:deep_nets}\nIn Section~\\ref{lec9:sec:cover_to_radem}, we discuss h"
  },
  {
    "path": "tex/collection/06-dltheory.tex",
    "chars": 12573,
    "preview": "% reset section counter\n\\setcounter{section}{0}\n\n%\\metadata{lecture ID}{Your names}{date}\n\\metadata{9}{Rafael Rafailov a"
  },
  {
    "path": "tex/collection/07-01-nonconvex.tex",
    "chars": 12871,
    "preview": "% reset section counter\n\\setcounter{section}{0}\n\n%\\metadata{lecture ID}{Your names}{date}\n\\metadata{10}{Kevin Han and Ha"
  },
  {
    "path": "tex/collection/07-02-nonconvex.tex",
    "chars": 14643,
    "preview": "% reset section counter\n%\\setcounter{section}{0}\n\n%\\metadata{lecture ID}{Your names}{date}\n\\metadata{11}{Andrew Wang}{Fe"
  },
  {
    "path": "tex/collection/07-03-nonconvex.tex",
    "chars": 13089,
    "preview": "% reset section counter\n%\\setcounter{section}{0}\n\n%\\metadata{lecture ID}{Your names}{date}\n\\metadata{12}{Rohan Taori and"
  },
  {
    "path": "tex/collection/07-03-ntk.tex",
    "chars": 42427,
    "preview": "% reset section counter\n%\\setcounter{section}{0}\n\n%\\metadata{lecture ID}{Your names}{date}\n\\metadata{13}{Justin Young an"
  },
  {
    "path": "tex/collection/07-05-ntk-limitation.tex",
    "chars": 3055,
    "preview": "\\subsec{Limitations of NTK}\n\nThe NTK approach has its limitations.\n\\begin{itemize}\n    \\item Empirically, optimizing $g_"
  },
  {
    "path": "tex/collection/08-01-algorithmic.tex",
    "chars": 22025,
    "preview": "% reset section counter\n\\setcounter{section}{0}\n\n%\\metadata{lecture ID}{Your names}{date}\n\\metadata{13}{Rohith Kuditipud"
  },
  {
    "path": "tex/collection/08-02-algorithmic.tex",
    "chars": 14195,
    "preview": "% reset section counter\n%\\setcounter{section}{0}\n\n%\\metadata{lecture ID}{Your names}{date}\n\\metadata{14}{Roshni Sahoo an"
  },
  {
    "path": "tex/collection/08-03-algorithmic-new.tex",
    "chars": 45219,
    "preview": "\\metadata{16}{Leah Reeder and Trevor Maxfield}{Nov 10th, 2021}\n\n\\sec{From small to large initialization: a precise chara"
  },
  {
    "path": "tex/collection/08-03-algorithmic.tex",
    "chars": 6073,
    "preview": "% reset section counter\n%\\setcounter{section}{0}\n\n%\\metadata{lecture ID}{Your names}{date}\n\\metadata{17}{Jeff Z. HaoChen"
  },
  {
    "path": "tex/collection/09-01-data-dependent.tex",
    "chars": 13608,
    "preview": "% reset section counter\n\\setcounter{section}{0}\n\n%\\metadata{lecture ID}{Your names}{date}\n\\metadata{18}{Kaidi Cao, Ruoch"
  },
  {
    "path": "tex/collection/09-01-unsupervised.tex",
    "chars": 62235,
    "preview": "\\newcommand{\\jnote}[1]{{\\color{red}\\authnoteimp{JH}{#1}}}\n\n\\metadata{18}{Haoran Xu and Lewis Liu}{Nov 17th, 2021}\n\nWe ve"
  },
  {
    "path": "tex/collection/10-01-online.tex",
    "chars": 25247,
    "preview": "% reset section counter\n\\setcounter{section}{0}\n\n\\metadata{15}{Tianyu Du, Xin Lu and Soham Sinha}{Mar 8th, 2021}\n\nIn thi"
  },
  {
    "path": "tex/collection/10-02-online.tex",
    "chars": 23889,
    "preview": "% reset section counter\n%\\setcounter{section}{0}\n\n%\\metadata{lecture ID}{Your names}{date}\n\\metadata{16}{Kevin Guo}{Mar "
  },
  {
    "path": "tex/figures/ntk-1d.py",
    "chars": 882,
    "preview": "import matplotlib.pyplot as plt\nimport numpy as np\n\n# Data for plotting\nt = np.arange(0.0, 1.3, 0.01)\n\nfig, ax = plt.sub"
  },
  {
    "path": "tex/macros.tex",
    "chars": 4303,
    "preview": "\\usepackage{color}\n\\usepackage{lipsum}\n\\usepackage{enumitem}\n\n% for potential improvements\n\\def\\shownotes{0}  %set 1 to "
  },
  {
    "path": "tex/master.tex",
    "chars": 6478,
    "preview": "%% filename: amsbook-template.tex\n%% version: 1.1\n%% date: 2014/07/24\n%%\n%% American Mathematical Society\n%% Technical S"
  }
]

// ... and 3 more files (download for full content)

About this extraction

This page contains the full source code of the tengyuma/cs229m_notes GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 38 files (2.8 MB), approximately 723.4k tokens, and a symbol index with 1 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!