|
-
- @InProceedings{ Ahmed.Aly.Gonzalez.ea.2012,
- title = {Scalable inference in latent variable models},
- author = {Ahmed, Amr and Aly, Moahmed and Gonzalez, Joseph and
- Narayanamurthy, Shravan and Smola, Alexander J},
- booktitle = {Proceedings of the fifth ACM international conference on
- Web search and data mining},
- pages = {123--132},
- year = {2012},
- organization = {ACM}
- }
-
- @Article{ Aji.McEliece.2000,
- title = {The generalized distributive law},
- author = {Aji, Srinivas M and McEliece, Robert J},
- journal = {IEEE transactions on Information Theory},
- volume = {46},
- number = {2},
- pages = {325--343},
- year = {2000},
- publisher = {IEEE}
- }
-
- @Article{ Ba.Kiros.Hinton.2016,
- title = {Layer normalization},
- author = {Ba, Jimmy Lei and Kiros, Jamie Ryan and Hinton, Geoffrey
- E},
- journal = {arXiv preprint arXiv:1607.06450},
- year = {2016}
- }
-
- @Article{ Bahdanau.Cho.Bengio.2014,
- title = {Neural machine translation by jointly learning to align
- and translate},
- author = {Bahdanau, Dzmitry and Cho, Kyunghyun and Bengio, Yoshua},
- journal = {arXiv preprint arXiv:1409.0473},
- year = {2014}
- }
-
- @InProceedings{ Bay.Tuytelaars.Van-Gool.2006,
- title = {Surf: Speeded up robust features},
- author = {Bay, Herbert and Tuytelaars, Tinne and Van Gool, Luc},
- booktitle = {European conference on computer vision},
- pages = {404--417},
- year = {2006},
- organization = {Springer}
- }
-
- @Article{ Bengio.Ducharme.Vincent.ea.2003,
- title = {A neural probabilistic language model},
- author = {Bengio, Yoshua and Ducharme, R{\'e}jean and Vincent,
- Pascal and Jauvin, Christian},
- journal = {Journal of machine learning research},
- volume = {3},
- number = {Feb},
- pages = {1137--1155},
- year = {2003}
- }
-
- @Article{ Bishop.1995,
- title = {Training with noise is equivalent to Tikhonov
- regularization},
- author = {Bishop, Chris M},
- journal = {Neural computation},
- volume = {7},
- number = {1},
- pages = {108--116},
- year = {1995},
- publisher = {MIT Press}
- }
-
- @Book{ Bishop.2006,
- title = {Pattern recognition and machine learning},
- author = {Bishop, Christopher M},
- year = {2006},
- publisher = {springer}
- }
-
- @InProceedings{ Bodla.Singh.Chellappa.ea.2017,
- title = {Soft-NMS--improving object detection with one line of
- code},
- author = {Bodla, Navaneeth and Singh, Bharat and Chellappa, Rama and
- Davis, Larry S},
- booktitle = {Proceedings of the IEEE international conference on
- computer vision},
- pages = {5561--5569},
- year = {2017}
- }
-
- @Article{ Bojanowski.Grave.Joulin.ea.2017,
- title = {Enriching word vectors with subword information},
- author = {Bojanowski, Piotr and Grave, Edouard and Joulin, Armand
- and Mikolov, Tomas},
- journal = {Transactions of the Association for Computational
- Linguistics},
- volume = {5},
- pages = {135--146},
- year = {2017},
- publisher = {MIT Press}
- }
-
- @Book{ Bollobas.1999,
- title = {Linear analysis},
- author = {Bollob{\'a}s, B},
- year = {1999},
- publisher = {Cambridge University Press, Cambridge}
- }
-
- @Article{ Bowman.Angeli.Potts.ea.2015,
- title = {A large annotated corpus for learning natural language
- inference},
- author = {Bowman, Samuel R and Angeli, Gabor and Potts, Christopher
- and Manning, Christopher D},
- journal = {arXiv preprint arXiv:1508.05326},
- year = {2015}
- }
-
- @Book{ Boyd.Vandenberghe.2004,
- address = {Cambridge, England},
- author = {Stephen Boyd and Lieven Vandenberghe},
- publisher = {Cambridge University Press},
- title = {Convex Optimization},
- year = 2004
- }
-
- @InProceedings{ Brown.Cocke.Della-Pietra.ea.1988,
- title = {A statistical approach to language translation},
- author = {Brown, Peter F and Cocke, John and Della Pietra, Stephen A
- and Della Pietra, Vincent J and Jelinek, Frederick and
- Mercer, Robert L and Roossin, Paul},
- booktitle = {Coling Budapest 1988 Volume 1: International Conference on
- Computational Linguistics},
- year = {1988}
- }
-
- @Article{ Brown.Cocke.Della-Pietra.ea.1990,
- title = {A statistical approach to machine translation},
- author = {Brown, Peter F and Cocke, John and Della Pietra, Stephen A
- and Della Pietra, Vincent J and Jelinek, Frederick and
- Lafferty, John and Mercer, Robert L and Roossin, Paul S},
- journal = {Computational linguistics},
- volume = {16},
- number = {2},
- pages = {79--85},
- year = {1990}
- }
-
- @InProceedings{ Brown.Sandholm.2017,
- title = {Libratus: The Superhuman AI for No-Limit Poker.},
- author = {Brown, Noam and Sandholm, Tuomas},
- booktitle = {IJCAI},
- pages = {5226--5228},
- year = {2017}
- }
-
- @Article{ Campbell.Hoane-Jr.Hsu.2002,
- title = {Deep blue},
- author = {Campbell, Murray and Hoane Jr, A Joseph and Hsu,
- Feng-hsiung},
- journal = {Artificial intelligence},
- volume = {134},
- number = {1-2},
- pages = {57--83},
- year = {2002},
- publisher = {Elsevier}
- }
-
- @InCollection{ Canny.1987,
- title = {A computational approach to edge detection},
- author = {Canny, John},
- booktitle = {Readings in computer vision},
- pages = {184--203},
- year = {1987},
- publisher = {Elsevier}
- }
-
- @InProceedings{ Cer.Diab.Agirre.ea.2017,
- title = {SemEval-2017 Task 1: Semantic Textual Similarity
- Multilingual and Crosslingual Focused Evaluation},
- author = {Cer, Daniel and Diab, Mona and Agirre, Eneko and
- Lopez-Gazpio, I{\~n}igo and Specia, Lucia},
- booktitle = {Proceedings of the 11th International Workshop on Semantic
- Evaluation (SemEval-2017)},
- pages = {1--14},
- year = {2017}
- }
-
- @InProceedings{ Cheng.Dong.Lapata.2016,
- title = {Long Short-Term Memory-Networks for Machine Reading},
- author = {Cheng, Jianpeng and Dong, Li and Lapata, Mirella},
- booktitle = {Proceedings of the 2016 Conference on Empirical Methods in
- Natural Language Processing},
- pages = {551--561},
- year = {2016}
- }
-
- @Article{ Cho.Van-Merrienboer.Bahdanau.ea.2014,
- title = {On the properties of neural machine translation:
- Encoder-decoder approaches},
- author = {Cho, Kyunghyun and Van Merri{\"e}nboer, Bart and Bahdanau,
- Dzmitry and Bengio, Yoshua},
- journal = {arXiv preprint arXiv:1409.1259},
- year = {2014}
- }
-
- @Article{ Cho.Van-Merrienboer.Gulcehre.ea.2014,
- title = {Learning phrase representations using RNN encoder-decoder
- for statistical machine translation},
- author = {Cho, Kyunghyun and Van Merri{\"e}nboer, Bart and Gulcehre,
- Caglar and Bahdanau, Dzmitry and Bougares, Fethi and
- Schwenk, Holger and Bengio, Yoshua},
- journal = {arXiv preprint arXiv:1406.1078},
- year = {2014}
- }
-
- @Book{ Chowdhury.2010,
- title = {Introduction to modern information retrieval},
- author = {Chowdhury, Gobinda G},
- year = {2010},
- publisher = {Facet publishing}
- }
-
- @Article{ Chung.Gulcehre.Cho.ea.2014,
- title = {Empirical evaluation of gated recurrent neural networks on
- sequence modeling},
- author = {Chung, Junyoung and Gulcehre, Caglar and Cho, KyungHyun
- and Bengio, Yoshua},
- journal = {arXiv preprint arXiv:1412.3555},
- year = {2014}
- }
-
- @Article{ Collobert.Weston.Bottou.ea.2011,
- title = {Natural language processing (almost) from scratch},
- author = {Collobert, Ronan and Weston, Jason and Bottou, L{\'e}on
- and Karlen, Michael and Kavukcuoglu, Koray and Kuksa,
- Pavel},
- journal = {Journal of machine learning research},
- volume = {12},
- number = {ARTICLE},
- pages = {2493--2537},
- year = {2011}
- }
-
- @Article{ Csiszar.2008,
- title = {Axiomatic characterizations of information measures},
- author = {Csisz{\'a}r, Imre},
- journal = {Entropy},
- volume = {10},
- number = {3},
- pages = {261--273},
- year = {2008},
- publisher = {Molecular Diversity Preservation International}
- }
-
- @InProceedings{ Dalal.Triggs.2005,
- title = {Histograms of oriented gradients for human detection},
- author = {Dalal, Navneet and Triggs, Bill},
- booktitle = {2005 IEEE computer society conference on computer vision
- and pattern recognition (CVPR'05)},
- volume = {1},
- pages = {886--893},
- year = {2005},
- organization = {IEEE}
- }
-
- @Article{ De-Cock.2011,
- title = {Ames, Iowa: Alternative to the Boston housing data as an
- end of semester regression project},
- author = {De Cock, Dean},
- journal = {Journal of Statistics Education},
- volume = {19},
- number = {3},
- year = {2011},
- publisher = {Taylor \& Francis}
- }
-
- @InProceedings{ DeCandia.Hastorun.Jampani.ea.2007,
- title = {Dynamo: Amazon's highly available key-value store},
- author = {DeCandia, Giuseppe and Hastorun, Deniz and Jampani, Madan
- and Kakulapati, Gunavardhan and Lakshman, Avinash and
- Pilchin, Alex and Sivasubramanian, Swaminathan and
- Vosshall, Peter and Vogels, Werner},
- booktitle = {ACM SIGOPS operating systems review},
- volume = {41},
- number = {6},
- pages = {205--220},
- year = {2007},
- organization = {ACM}
- }
-
- @Article{ Devlin.Chang.Lee.ea.2018,
- title = {Bert: Pre-training of deep bidirectional transformers for
- language understanding},
- author = {Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and
- Toutanova, Kristina},
- journal = {arXiv preprint arXiv:1810.04805},
- year = {2018}
- }
-
- @InProceedings{ Doersch.Gupta.Efros.2015,
- title = {Unsupervised visual representation learning by context
- prediction},
- author = {Doersch, Carl and Gupta, Abhinav and Efros, Alexei A},
- booktitle = {Proceedings of the IEEE international conference on
- computer vision},
- pages = {1422--1430},
- year = {2015}
- }
-
- @InProceedings{ Dosovitskiy.Beyer.Kolesnikov.ea.2021,
- title = {An image is worth 16x16 words: Transformers for image
- recognition at scale},
- author = {Dosovitskiy, Alexey and Beyer, Lucas and Kolesnikov,
- Alexander and Weissenborn, Dirk and Zhai, Xiaohua and
- Unterthiner, Thomas and Dehghani, Mostafa and Minderer,
- Matthias and Heigold, Georg and Gelly, Sylvain and others},
- booktitle = {International Conference on Learning Representations},
- year = {2021}
- }
-
- @InCollection{ Doucet.De-Freitas.Gordon.2001,
- title = {An introduction to sequential Monte Carlo methods},
- author = {Doucet, Arnaud and De Freitas, Nando and Gordon, Neil},
- booktitle = {Sequential Monte Carlo methods in practice},
- pages = {3--14},
- year = {2001},
- publisher = {Springer}
- }
-
- @Article{ Duchi.Hazan.Singer.2011,
- title = {Adaptive subgradient methods for online learning and
- stochastic optimization},
- author = {Duchi, John and Hazan, Elad and Singer, Yoram},
- journal = {Journal of Machine Learning Research},
- volume = {12},
- number = {Jul},
- pages = {2121--2159},
- year = {2011}
- }
-
- @Article{ Dumoulin.Visin.2016,
- title = {A guide to convolution arithmetic for deep learning},
- author = {Dumoulin, Vincent and Visin, Francesco},
- journal = {arXiv preprint arXiv:1603.07285},
- year = {2016}
- }
-
- @Article{ Edelman.Ostrovsky.Schwarz.2007,
- title = {Internet advertising and the generalized second-price
- auction: Selling billions of dollars worth of keywords},
- author = {Edelman, Benjamin and Ostrovsky, Michael and Schwarz,
- Michael},
- journal = {American economic review},
- volume = {97},
- number = {1},
- pages = {242--259},
- year = {2007}
- }
-
- @InProceedings{ Flammarion.Bach.2015,
- title = {From averaging to acceleration, there is only a
- step-size},
- author = {Flammarion, Nicolas and Bach, Francis},
- booktitle = {Conference on Learning Theory},
- pages = {658--695},
- year = {2015}
- }
-
- @InProceedings{ Gatys.Ecker.Bethge.2016,
- title = {Image style transfer using convolutional neural networks},
- author = {Gatys, Leon A and Ecker, Alexander S and Bethge,
- Matthias},
- booktitle = {Proceedings of the IEEE conference on computer vision and
- pattern recognition},
- pages = {2414--2423},
- year = {2016}
- }
-
- @Article{ Ginibre.1965,
- title = {Statistical ensembles of complex, quaternion, and real
- matrices},
- author = {Ginibre, Jean},
- journal = {Journal of Mathematical Physics},
- volume = {6},
- number = {3},
- pages = {440--449},
- year = {1965},
- publisher = {AIP}
- }
-
- @InProceedings{ Girshick.2015,
- title = {Fast r-cnn},
- author = {Girshick, Ross},
- booktitle = {Proceedings of the IEEE international conference on
- computer vision},
- pages = {1440--1448},
- year = {2015}
- }
-
- @InProceedings{ Girshick.Donahue.Darrell.ea.2014,
- title = {Rich feature hierarchies for accurate object detection and
- semantic segmentation},
- author = {Girshick, Ross and Donahue, Jeff and Darrell, Trevor and
- Malik, Jitendra},
- booktitle = {Proceedings of the IEEE conference on computer vision and
- pattern recognition},
- pages = {580--587},
- year = {2014}
- }
-
- @InProceedings{ Glorot.Bengio.2010,
- title = {Understanding the difficulty of training deep feedforward
- neural networks},
- author = {Glorot, Xavier and Bengio, Yoshua},
- booktitle = {Proceedings of the thirteenth international conference on
- artificial intelligence and statistics},
- pages = {249--256},
- year = {2010}
- }
-
- @Article{ Goh.2017,
- author = {Goh, Gabriel},
- title = {Why Momentum Really Works},
- journal = {Distill},
- year = {2017},
- url = {http://distill.pub/2017/momentum},
- doi = {10.23915/distill.00006}
- }
-
- @Article{ Goldberg.Nichols.Oki.ea.1992,
- title = {Using collaborative filtering to weave an information
- tapestry},
- author = {Goldberg, David and Nichols, David and Oki, Brian M and
- Terry, Douglas},
- journal = {Communications of the ACM},
- volume = {35},
- number = {12},
- pages = {61--71},
- year = {1992},
- publisher = {Association for Computing Machinery, Inc.}
- }
-
- @Book{ Goodfellow.Bengio.Courville.2016,
- title = {Deep Learning},
- author = {Ian Goodfellow and Yoshua Bengio and Aaron Courville},
- publisher = {MIT Press},
- note = {\url{http://www.deeplearningbook.org}},
- year = {2016}
- }
-
- @InProceedings{ Goodfellow.Pouget-Abadie.Mirza.ea.2014,
- title = {Generative adversarial nets},
- author = {Goodfellow, Ian and Pouget-Abadie, Jean and Mirza, Mehdi
- and Xu, Bing and Warde-Farley, David and Ozair, Sherjil and
- Courville, Aaron and Bengio, Yoshua},
- booktitle = {Advances in neural information processing systems},
- pages = {2672--2680},
- year = {2014}
- }
-
- @Article{ Gotmare.Keskar.Xiong.ea.2018,
- title = {A Closer Look at Deep Learning Heuristics: Learning rate
- restarts, Warmup and Distillation},
- author = {Gotmare, Akhilesh and Keskar, Nitish Shirish and Xiong,
- Caiming and Socher, Richard},
- journal = {arXiv preprint arXiv:1810.13243},
- year = {2018}
- }
-
- @Article{ Graves.2013,
- title = {Generating sequences with recurrent neural networks},
- author = {Graves, Alex},
- journal = {arXiv preprint arXiv:1308.0850},
- year = {2013}
- }
-
- @Article{ Graves.Schmidhuber.2005,
- title = {Framewise phoneme classification with bidirectional LSTM
- and other neural network architectures},
- author = {Graves, Alex and Schmidhuber, J{\"u}rgen},
- journal = {Neural networks},
- volume = {18},
- number = {5-6},
- pages = {602--610},
- year = {2005},
- publisher = {Elsevier}
- }
-
- @InCollection{ Gunawardana.Shani.2015,
- title = {Evaluating recommender systems},
- author = {Gunawardana, Asela and Shani, Guy},
- booktitle = {Recommender systems handbook},
- pages = {265--308},
- year = {2015},
- publisher = {Springer}
- }
-
- @InProceedings{ Guo.Tang.Ye.ea.2017,
- title = {DeepFM: a factorization-machine based neural network for
- CTR prediction},
- author = {Guo, Huifeng and Tang, Ruiming and Ye, Yunming and Li,
- Zhenguo and He, Xiuqiang},
- booktitle = {Proceedings of the 26th International Joint Conference on
- Artificial Intelligence},
- pages = {1725--1731},
- year = {2017},
- organization = {AAAI Press}
- }
-
- @Article{ Hadjis.Zhang.Mitliagkas.ea.2016,
- title = {Omnivore: An optimizer for multi-device deep learning on
- cpus and gpus},
- author = {Hadjis, Stefan and Zhang, Ce and Mitliagkas, Ioannis and
- Iter, Dan and R{\'e}, Christopher},
- journal = {arXiv preprint arXiv:1606.04487},
- year = {2016}
- }
-
- @InProceedings{ Hazan.Rakhlin.Bartlett.2008,
- title = {Adaptive online gradient descent},
- author = {Hazan, Elad and Rakhlin, Alexander and Bartlett, Peter L},
- booktitle = {Advances in Neural Information Processing Systems},
- pages = {65--72},
- year = {2008}
- }
-
- @InProceedings{ He.Chua.2017,
- title = {Neural factorization machines for sparse predictive
- analytics},
- author = {He, Xiangnan and Chua, Tat-Seng},
- booktitle = {Proceedings of the 40th International ACM SIGIR conference
- on Research and Development in Information Retrieval},
- pages = {355--364},
- year = {2017},
- organization = {ACM}
- }
-
- @InProceedings{ He.Gkioxari.Dollar.ea.2017,
- title = {Mask r-cnn},
- author = {He, Kaiming and Gkioxari, Georgia and Doll{\'a}r, Piotr
- and Girshick, Ross},
- booktitle = {Proceedings of the IEEE international conference on
- computer vision},
- pages = {2961--2969},
- year = {2017}
- }
-
- @InProceedings{ He.Liao.Zhang.ea.2017,
- title = {Neural collaborative filtering},
- author = {He, Xiangnan and Liao, Lizi and Zhang, Hanwang and Nie,
- Liqiang and Hu, Xia and Chua, Tat-Seng},
- booktitle = {Proceedings of the 26th international conference on world
- wide web},
- pages = {173--182},
- year = {2017},
- organization = {International World Wide Web Conferences Steering
- Committee}
- }
-
- @InProceedings{ He.Zhang.Ren.ea.2015,
- title = {Delving deep into rectifiers: Surpassing human-level
- performance on imagenet classification},
- author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun,
- Jian},
- booktitle = {Proceedings of the IEEE international conference on
- computer vision},
- pages = {1026--1034},
- year = {2015}
- }
-
- @InProceedings{ He.Zhang.Ren.ea.2016,
- title = {Deep residual learning for image recognition},
- author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun,
- Jian},
- booktitle = {Proceedings of the IEEE conference on computer vision and
- pattern recognition},
- pages = {770--778},
- year = {2016}
- }
-
- @InProceedings{ He.Zhang.Ren.ea.2016*1,
- title = {Identity mappings in deep residual networks},
- author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun,
- Jian},
- booktitle = {European conference on computer vision},
- pages = {630--645},
- year = {2016},
- organization = {Springer}
- }
-
- @Book{ Hebb.Hebb.1949,
- title = {The organization of behavior},
- author = {Hebb, Donald Olding and Hebb, DO},
- volume = {65},
- year = {1949},
- publisher = {Wiley New York}
- }
-
- @Article{ Hendrycks.Gimpel.2016,
- title = {Gaussian error linear units (gelus)},
- author = {Hendrycks, Dan and Gimpel, Kevin},
- journal = {arXiv preprint arXiv:1606.08415},
- year = {2016}
- }
-
- @Book{ Hennessy.Patterson.2011,
- title = {Computer architecture: a quantitative approach},
- author = {Hennessy, John L and Patterson, David A},
- year = {2011},
- publisher = {Elsevier}
- }
-
- @InProceedings{ Herlocker.Konstan.Borchers.ea.1999,
- title = {An algorithmic framework for performing collaborative
- filtering},
- author = {Herlocker, Jonathan L and Konstan, Joseph A and Borchers,
- Al and Riedl, John},
- booktitle = {22nd Annual International ACM SIGIR Conference on Research
- and Development in Information Retrieval, SIGIR 1999},
- pages = {230--237},
- year = {1999},
- organization = {Association for Computing Machinery, Inc}
- }
-
- @Article{ Hidasi.Karatzoglou.Baltrunas.ea.2015,
- title = {Session-based recommendations with recurrent neural
- networks},
- author = {Hidasi, Bal{\'a}zs and Karatzoglou, Alexandros and
- Baltrunas, Linas and Tikk, Domonkos},
- journal = {arXiv preprint arXiv:1511.06939},
- year = {2015}
- }
-
- @Misc{ Hochreiter.Bengio.Frasconi.ea.2001,
- title = {Gradient flow in recurrent nets: the difficulty of
- learning long-term dependencies},
- author = {Hochreiter, Sepp and Bengio, Yoshua and Frasconi, Paolo
- and Schmidhuber, J{\"u}rgen and others},
- year = {2001},
- publisher = {A field guide to dynamical recurrent neural networks. IEEE
- Press}
- }
-
- @Article{ Hochreiter.Schmidhuber.1997,
- title = {Long short-term memory},
- author = {Hochreiter, Sepp and Schmidhuber, J{\"u}rgen},
- journal = {Neural computation},
- volume = {9},
- number = {8},
- pages = {1735--1780},
- year = {1997},
- publisher = {MIT Press}
- }
-
- @InProceedings{ Hoyer.Janzing.Mooij.ea.2009,
- title = {Nonlinear causal discovery with additive noise models},
- author = {Hoyer, Patrik O and Janzing, Dominik and Mooij, Joris M
- and Peters, Jonas and Sch{\"o}lkopf, Bernhard},
- booktitle = {Advances in neural information processing systems},
- pages = {689--696},
- year = {2009}
- }
-
- @InProceedings{ Hu.Koren.Volinsky.2008,
- title = {Collaborative filtering for implicit feedback datasets},
- author = {Hu, Yifan and Koren, Yehuda and Volinsky, Chris},
- booktitle = {2008 Eighth IEEE International Conference on Data Mining},
- pages = {263--272},
- year = {2008},
- organization = {Ieee}
- }
-
- @Article{ Hu.Lee.Aggarwal.ea.2020,
- title = {Text Style Transfer: A Review and Experimental
- Evaluation},
- author = {Hu, Zhiqiang and Lee, Roy Ka-Wei and Aggarwal, Charu C and
- Zhang, Aston},
- journal = {arXiv preprint arXiv:2010.12742},
- year = {2020}
- }
-
- @InProceedings{ Hu.Shen.Sun.2018,
- title = {Squeeze-and-excitation networks},
- author = {Hu, Jie and Shen, Li and Sun, Gang},
- booktitle = {Proceedings of the IEEE conference on computer vision and
- pattern recognition},
- pages = {7132--7141},
- year = {2018}
- }
-
- @InProceedings{ Huang.Liu.Van-Der-Maaten.ea.2017,
- title = {Densely connected convolutional networks},
- author = {Huang, Gao and Liu, Zhuang and Van Der Maaten, Laurens and
- Weinberger, Kilian Q},
- booktitle = {Proceedings of the IEEE conference on computer vision and
- pattern recognition},
- pages = {4700--4708},
- year = {2017}
- }
-
- @InProceedings{ Ioffe.2017,
- title = {Batch renormalization: Towards reducing minibatch
- dependence in batch-normalized models},
- author = {Ioffe, Sergey},
- booktitle = {Advances in neural information processing systems},
- pages = {1945--1953},
- year = {2017}
- }
-
- @Article{ Ioffe.Szegedy.2015,
- title = {Batch normalization: Accelerating deep network training by
- reducing internal covariate shift},
- author = {Ioffe, Sergey and Szegedy, Christian},
- journal = {arXiv preprint arXiv:1502.03167},
- year = {2015}
- }
-
- @Article{ Izmailov.Podoprikhin.Garipov.ea.2018,
- title = {Averaging weights leads to wider optima and better
- generalization},
- author = {Izmailov, Pavel and Podoprikhin, Dmitrii and Garipov,
- Timur and Vetrov, Dmitry and Wilson, Andrew Gordon},
- journal = {arXiv preprint arXiv:1803.05407},
- year = {2018}
- }
-
- @Book{ Jaeger.2002,
- title = {Tutorial on training recurrent neural networks, covering
- BPPT, RTRL, EKF and the" echo state network" approach},
- author = {Jaeger, Herbert},
- volume = {5},
- year = {2002},
- publisher = {GMD-Forschungszentrum Informationstechnik Bonn}
- }
-
- @Book{ James.2007,
- title = {The principles of psychology},
- author = {James, William},
- volume = {1},
- year = {2007},
- publisher = {Cosimo, Inc.}
- }
-
- @Article{ Jia.Song.He.ea.2018,
- title = {Highly scalable deep learning training system with
- mixed-precision: Training imagenet in four minutes},
- author = {Jia, Xianyan and Song, Shutao and He, Wei and Wang,
- Yangzihao and Rong, Haidong and Zhou, Feihu and Xie,
- Liqiang and Guo, Zhenyu and Yang, Yuanzhou and Yu, Liwei
- and others},
- journal = {arXiv preprint arXiv:1807.11205},
- year = {2018}
- }
-
- @InProceedings{ Jouppi.Young.Patil.ea.2017,
- title = {In-datacenter performance analysis of a tensor processing
- unit},
- author = {Jouppi, Norman P and Young, Cliff and Patil, Nishant and
- Patterson, David and Agrawal, Gaurav and Bajwa, Raminder
- and Bates, Sarah and Bhatia, Suresh and Boden, Nan and
- Borchers, Al and others},
- booktitle = {2017 ACM/IEEE 44th Annual International Symposium on
- Computer Architecture (ISCA)},
- pages = {1--12},
- year = {2017},
- organization = {IEEE}
- }
-
- @Article{ Karras.Aila.Laine.ea.2017,
- title = {Progressive growing of gans for improved quality,
- stability, and variation},
- author = {Karras, Tero and Aila, Timo and Laine, Samuli and
- Lehtinen, Jaakko},
- journal = {arXiv preprint arXiv:1710.10196},
- year = {2017}
- }
-
- @Article{ Kim.2014,
- title = {Convolutional neural networks for sentence
- classification},
- author = {Kim, Yoon},
- journal = {arXiv preprint arXiv:1408.5882},
- year = {2014}
- }
-
- @Article{ Kingma.Ba.2014,
- title = {Adam: A method for stochastic optimization},
- author = {Kingma, Diederik P and Ba, Jimmy},
- journal = {arXiv preprint arXiv:1412.6980},
- year = {2014}
- }
-
- @Book{ Koller.Friedman.2009,
- title = {Probabilistic graphical models: principles and
- techniques},
- author = {Koller, Daphne and Friedman, Nir},
- year = {2009},
- publisher = {MIT press}
- }
-
- @Article{ Kolter.2008,
- title = {Linear Algebra Review and Reference},
- author = {Kolter, Zico},
- journal = {Available online: http},
- year = {2008}
- }
-
- @InProceedings{ Koren.2009,
- title = {Collaborative filtering with temporal dynamics},
- author = {Koren, Yehuda},
- booktitle = {Proceedings of the 15th ACM SIGKDD international
- conference on Knowledge discovery and data mining},
- pages = {447--456},
- year = {2009},
- organization = {ACM}
- }
-
- @Article{ Koren.Bell.Volinsky.2009,
- title = {Matrix factorization techniques for recommender systems},
- author = {Koren, Yehuda and Bell, Robert and Volinsky, Chris},
- journal = {Computer},
- number = {8},
- pages = {30--37},
- year = {2009},
- publisher = {IEEE}
- }
-
- @InProceedings{ Krizhevsky.Sutskever.Hinton.2012,
- title = {Imagenet classification with deep convolutional neural
- networks},
- author = {Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey
- E},
- booktitle = {Advances in neural information processing systems},
- pages = {1097--1105},
- year = {2012}
- }
-
- @Article{ Kung.1988,
- title = {VLSI array processors},
- author = {Kung, Sun Yuan},
- journal = {Englewood Cliffs, NJ, Prentice Hall, 1988, 685 p. Research
- supported by the Semiconductor Research Corp., SDIO, NSF,
- and US Navy.},
- year = {1988}
- }
-
- @Article{ LeCun.Bottou.Bengio.ea.1998,
- title = {Gradient-based learning applied to document recognition},
- author = {LeCun, Yann and Bottou, L{\'e}on and Bengio, Yoshua and
- Haffner, Patrick and others},
- journal = {Proceedings of the IEEE},
- volume = {86},
- number = {11},
- pages = {2278--2324},
- year = {1998},
- publisher = {Taipei, Taiwan}
- }
-
- @PhDThesis{ Li.2017,
- title = {Scaling Distributed Machine Learning with System and
- Algorithm Co-design},
- author = {Li, Mu},
- year = {2017},
- school = {PhD Thesis, CMU}
- }
-
- @InProceedings{ Li.Andersen.Park.ea.2014,
- title = {Scaling distributed machine learning with the parameter
- server},
- author = {Li, Mu and Andersen, David G and Park, Jun Woo and Smola,
- Alexander J and Ahmed, Amr and Josifovski, Vanja and Long,
- James and Shekita, Eugene J and Su, Bor-Yiing},
- booktitle = {11th $\{$USENIX$\}$ Symposium on Operating Systems Design
- and Implementation ($\{$OSDI$\}$ 14)},
- pages = {583--598},
- year = {2014}
- }
-
- @Article{ Lin.Chen.Yan.2013,
- title = {Network in network},
- author = {Lin, Min and Chen, Qiang and Yan, Shuicheng},
- journal = {arXiv preprint arXiv:1312.4400},
- year = {2013}
- }
-
- @Article{ Lin.Feng.Santos.ea.2017,
- title = {A structured self-attentive sentence embedding},
- author = {Lin, Zhouhan and Feng, Minwei and Santos, Cicero Nogueira
- dos and Yu, Mo and Xiang, Bing and Zhou, Bowen and Bengio,
- Yoshua},
- journal = {arXiv preprint arXiv:1703.03130},
- year = {2017}
- }
-
- @InProceedings{ Lin.Goyal.Girshick.ea.2017,
- title = {Focal loss for dense object detection},
- author = {Lin, Tsung-Yi and Goyal, Priya and Girshick, Ross and He,
- Kaiming and Doll{\'a}r, Piotr},
- booktitle = {Proceedings of the IEEE international conference on
- computer vision},
- pages = {2980--2988},
- year = {2017}
- }
-
- @Article{ Lin.Lv.Zhu.ea.2010,
- title = {Imagenet classification: fast descriptor coding and
- large-scale svm training},
- author = {Lin, Yuanqing and Lv, F and Zhu, S and Yang, M and Cour, T
- and Yu, K and Cao, L and Li, Z and Tsai, MH and Zhou, X and
- others},
- journal = {Large scale visual recognition challenge},
- year = {2010}
- }
-
- @Article{ Lipton.Steinhardt.2018,
- title = {Troubling trends in machine learning scholarship},
- author = {Lipton, Zachary C and Steinhardt, Jacob},
- journal = {arXiv preprint arXiv:1807.03341},
- year = {2018}
- }
-
- @InProceedings{ Liu.Anguelov.Erhan.ea.2016,
- title = {Ssd: Single shot multibox detector},
- author = {Liu, Wei and Anguelov, Dragomir and Erhan, Dumitru and
- Szegedy, Christian and Reed, Scott and Fu, Cheng-Yang and
- Berg, Alexander C},
- booktitle = {European conference on computer vision},
- pages = {21--37},
- year = {2016},
- organization = {Springer}
- }
-
- @Article{ Liu.Ott.Goyal.ea.2019,
- title = {Roberta: A robustly optimized bert pretraining approach},
- author = {Liu, Yinhan and Ott, Myle and Goyal, Naman and Du, Jingfei
- and Joshi, Mandar and Chen, Danqi and Levy, Omer and Lewis,
- Mike and Zettlemoyer, Luke and Stoyanov, Veselin},
- journal = {arXiv preprint arXiv:1907.11692},
- year = {2019}
- }
-
- @InProceedings{ Long.Shelhamer.Darrell.2015,
- title = {Fully convolutional networks for semantic segmentation},
- author = {Long, Jonathan and Shelhamer, Evan and Darrell, Trevor},
- booktitle = {Proceedings of the IEEE conference on computer vision and
- pattern recognition},
- pages = {3431--3440},
- year = {2015}
- }
-
- @Article{ Loshchilov.Hutter.2016,
- title = {Sgdr: Stochastic gradient descent with warm restarts},
- author = {Loshchilov, Ilya and Hutter, Frank},
- journal = {arXiv preprint arXiv:1608.03983},
- year = {2016}
- }
-
- @Article{ Lowe.2004,
- title = {Distinctive image features from scale-invariant
- keypoints},
- author = {Lowe, David G},
- journal = {International journal of computer vision},
- volume = {60},
- number = {2},
- pages = {91--110},
- year = {2004},
- publisher = {Springer}
- }
-
- @Article{ Luo.Wang.Shao.ea.2018,
- title = {Towards understanding regularization in batch
- normalization},
- author = {Luo, Ping and Wang, Xinjiang and Shao, Wenqi and Peng,
- Zhanglin},
- journal = {arXiv preprint},
- year = {2018}
- }
-
- @InProceedings{ Maas.Daly.Pham.ea.2011,
- title = {Learning word vectors for sentiment analysis},
- author = {Maas, Andrew L and Daly, Raymond E and Pham, Peter T and
- Huang, Dan and Ng, Andrew Y and Potts, Christopher},
- booktitle = {Proceedings of the 49th annual meeting of the association
- for computational linguistics: Human language
- technologies-volume 1},
- pages = {142--150},
- year = {2011},
- organization = {Association for Computational Linguistics}
- }
-
- @InProceedings{ McCann.Bradbury.Xiong.ea.2017,
- title = {Learned in translation: Contextualized word vectors},
- author = {McCann, Bryan and Bradbury, James and Xiong, Caiming and
- Socher, Richard},
- booktitle = {Advances in Neural Information Processing Systems},
- pages = {6294--6305},
- year = {2017}
- }
-
- @Article{ McCulloch.Pitts.1943,
- title = {A logical calculus of the ideas immanent in nervous
- activity},
- author = {McCulloch, Warren S and Pitts, Walter},
- journal = {The bulletin of mathematical biophysics},
- volume = {5},
- number = {4},
- pages = {115--133},
- year = {1943},
- publisher = {Springer}
- }
-
- @InProceedings{ McMahan.Holt.Sculley.ea.2013,
- title = {Ad click prediction: a view from the trenches},
- author = {McMahan, H Brendan and Holt, Gary and Sculley, David and
- Young, Michael and Ebner, Dietmar and Grady, Julian and
- Nie, Lan and Phillips, Todd and Davydov, Eugene and
- Golovin, Daniel and others},
- booktitle = {Proceedings of the 19th ACM SIGKDD international
- conference on Knowledge discovery and data mining},
- pages = {1222--1230},
- year = {2013},
- organization = {ACM}
- }
-
- @Article{ Merity.Xiong.Bradbury.ea.2016,
- title = {Pointer sentinel mixture models},
- author = {Merity, Stephen and Xiong, Caiming and Bradbury, James and
- Socher, Richard},
- journal = {arXiv preprint arXiv:1609.07843},
- year = {2016}
- }
-
- @Article{ Mikolov.Chen.Corrado.ea.2013,
- title = {Efficient estimation of word representations in vector
- space},
- author = {Mikolov, Tomas and Chen, Kai and Corrado, Greg and Dean,
- Jeffrey},
- journal = {arXiv preprint arXiv:1301.3781},
- year = {2013}
- }
-
- @InProceedings{ Mikolov.Sutskever.Chen.ea.2013,
- title = {Distributed representations of words and phrases and their
- compositionality},
- author = {Mikolov, Tomas and Sutskever, Ilya and Chen, Kai and
- Corrado, Greg S and Dean, Jeff},
- booktitle = {Advances in neural information processing systems},
- pages = {3111--3119},
- year = {2013}
- }
-
- @InProceedings{ Mirhoseini.Pham.Le.ea.2017,
- title = {Device placement optimization with reinforcement
- learning},
- author = {Mirhoseini, Azalia and Pham, Hieu and Le, Quoc V and
- Steiner, Benoit and Larsen, Rasmus and Zhou, Yuefeng and
- Kumar, Naveen and Norouzi, Mohammad and Bengio, Samy and
- Dean, Jeff},
- booktitle = {Proceedings of the 34th International Conference on
- Machine Learning-Volume 70},
- pages = {2430--2439},
- year = {2017},
- organization = {JMLR. org}
- }
-
- @InProceedings{ Mnih.Heess.Graves.ea.2014,
- title = {Recurrent models of visual attention},
- author = {Mnih, Volodymyr and Heess, Nicolas and Graves, Alex and
- others},
- booktitle = {Advances in neural information processing systems},
- pages = {2204--2212},
- year = {2014}
- }
-
- @Article{ Morey.Hoekstra.Rouder.ea.2016,
- title = {The fallacy of placing confidence in confidence
- intervals},
- author = {Morey, Richard D and Hoekstra, Rink and Rouder, Jeffrey N
- and Lee, Michael D and Wagenmakers, Eric-Jan},
- journal = {Psychonomic bulletin \& review},
- volume = {23},
- number = {1},
- pages = {103--123},
- year = {2016},
- publisher = {Springer}
- }
-
- @Article{ Nadaraya.1964,
- title = {On estimating regression},
- author = {Nadaraya, Elizbar A},
- journal = {Theory of Probability \& Its Applications},
- volume = {9},
- number = {1},
- pages = {141--142},
- year = {1964},
- publisher = {SIAM}
- }
-
- @Book{ Nesterov.2018,
- title = {Lectures on convex optimization},
- author = {Nesterov, Yurii},
- volume = {137},
- year = {2018},
- publisher = {Springer}
- }
-
- @Misc{ Nesterov.Vial.2000,
- title = {Confidence level solutions for stochastic programming,
- Stochastic Programming E-Print Series},
- author = {Nesterov, Yu and Vial, J-Ph},
- year = {2000}
- }
-
- @Article{ Neyman.1937,
- title = {Outline of a theory of statistical estimation based on the
- classical theory of probability},
- author = {Neyman, Jerzy},
- journal = {Philosophical Transactions of the Royal Society of London.
- Series A, Mathematical and Physical Sciences},
- volume = {236},
- number = {767},
- pages = {333--380},
- year = {1937},
- publisher = {The Royal Society London}
- }
-
- @InProceedings{ Papineni.Roukos.Ward.ea.2002,
- title = {BLEU: a method for automatic evaluation of machine
- translation},
- author = {Papineni, Kishore and Roukos, Salim and Ward, Todd and
- Zhu, Wei-Jing},
- booktitle = {Proceedings of the 40th annual meeting of the Association
- for Computational Linguistics},
- pages = {311--318},
- year = {2002}
- }
-
- @Article{ Parikh.Tackstrom.Das.ea.2016,
- title = {A decomposable attention model for natural language
- inference},
- author = {Parikh, Ankur P and T{\"a}ckstr{\"o}m, Oscar and Das,
- Dipanjan and Uszkoreit, Jakob},
- journal = {arXiv preprint arXiv:1606.01933},
- year = {2016}
- }
-
- @InProceedings{ Park.Liu.Wang.ea.2019,
- title = {Semantic image synthesis with spatially-adaptive
- normalization},
- author = {Park, Taesung and Liu, Ming-Yu and Wang, Ting-Chun and
- Zhu, Jun-Yan},
- booktitle = {Proceedings of the IEEE Conference on Computer Vision and
- Pattern Recognition},
- pages = {2337--2346},
- year = {2019}
- }
-
- @Article{ Paulus.Xiong.Socher.2017,
- title = {A deep reinforced model for abstractive summarization},
- author = {Paulus, Romain and Xiong, Caiming and Socher, Richard},
- journal = {arXiv preprint arXiv:1705.04304},
- year = {2017}
- }
-
- @InProceedings{ Pennington.Schoenholz.Ganguli.2017,
- title = {Resurrecting the sigmoid in deep learning through
- dynamical isometry: theory and practice},
- author = {Pennington, Jeffrey and Schoenholz, Samuel and Ganguli,
- Surya},
- booktitle = {Advances in neural information processing systems},
- pages = {4785--4795},
- year = {2017}
- }
-
- @InProceedings{ Pennington.Socher.Manning.2014,
- title = {Glove: Global vectors for word representation},
- author = {Pennington, Jeffrey and Socher, Richard and Manning,
- Christopher},
- booktitle = {Proceedings of the 2014 conference on empirical methods in
- natural language processing (EMNLP)},
- pages = {1532--1543},
- year = {2014}
- }
-
- @InProceedings{ Peters.Ammar.Bhagavatula.ea.2017,
- title = {Semi-supervised sequence tagging with bidirectional
- language models},
- author = {Peters, Matthew and Ammar, Waleed and Bhagavatula, Chandra
- and Power, Russell},
- booktitle = {Proceedings of the 55th Annual Meeting of the Association
- for Computational Linguistics (Volume 1: Long Papers)},
- pages = {1756--1765},
- year = {2017}
- }
-
- @Book{ Peters.Janzing.Scholkopf.2017,
- title = {Elements of causal inference: foundations and learning
- algorithms},
- author = {Peters, Jonas and Janzing, Dominik and Sch{\"o}lkopf,
- Bernhard},
- year = {2017},
- publisher = {MIT press}
- }
-
- @InProceedings{ Peters.Neumann.Iyyer.ea.2018,
- title = {Deep Contextualized Word Representations},
- author = {Peters, Matthew and Neumann, Mark and Iyyer, Mohit and
- Gardner, Matt and Clark, Christopher and Lee, Kenton and
- Zettlemoyer, Luke},
- booktitle = {Proceedings of the 2018 Conference of the North American
- Chapter of the Association for Computational Linguistics:
- Human Language Technologies, Volume 1 (Long Papers)},
- pages = {2227--2237},
- year = {2018}
- }
-
- @Article{ Petersen.Pedersen.ea.2008,
- title = {The matrix cookbook},
- author = {Petersen, Kaare Brandt and Pedersen, Michael Syskind and
- others},
- journal = {Technical University of Denmark},
- volume = {7},
- number = {15},
- pages = {510},
- year = {2008}
- }
-
- @Article{ Polyak.1964,
- title = {Some methods of speeding up the convergence of iteration
- methods},
- author = {Polyak, Boris T},
- journal = {USSR Computational Mathematics and Mathematical Physics},
- volume = {4},
- number = {5},
- pages = {1--17},
- year = {1964},
- publisher = {Elsevier}
- }
-
- @Article{ Quadrana.Cremonesi.Jannach.2018,
- title = {Sequence-aware recommender systems},
- author = {Quadrana, Massimo and Cremonesi, Paolo and Jannach,
- Dietmar},
- journal = {ACM Computing Surveys (CSUR)},
- volume = {51},
- number = {4},
- pages = {66},
- year = {2018},
- publisher = {ACM}
- }
-
- @Article{ Radford.Metz.Chintala.2015,
- title = {Unsupervised representation learning with deep
- convolutional generative adversarial networks},
- author = {Radford, Alec and Metz, Luke and Chintala, Soumith},
- journal = {arXiv preprint arXiv:1511.06434},
- year = {2015}
- }
-
- @Article{ Radford.Narasimhan.Salimans.ea.2018,
- title = {Improving language understanding by generative
- pre-training},
- author = {Radford, Alec and Narasimhan, Karthik and Salimans, Tim
- and Sutskever, Ilya},
- journal = {OpenAI},
- year = {2018}
- }
-
- @Article{ Radford.Wu.Child.ea.2019,
- title = {Language models are unsupervised multitask learners},
- author = {Radford, Alec and Wu, Jeffrey and Child, Rewon and Luan,
- David and Amodei, Dario and Sutskever, Ilya},
- journal = {OpenAI Blog},
- volume = {1},
- number = {8},
- pages = {9},
- year = {2019}
- }
-
- @Article{ Rajpurkar.Zhang.Lopyrev.ea.2016,
- title = {Squad: 100,000+ questions for machine comprehension of
- text},
- author = {Rajpurkar, Pranav and Zhang, Jian and Lopyrev, Konstantin
- and Liang, Percy},
- journal = {arXiv preprint arXiv:1606.05250},
- year = {2016}
- }
-
- @Article{ Reddi.Kale.Kumar.2019,
- title = {On the convergence of Adam and beyond},
- author = {Reddi, Sashank J and Kale, Satyen and Kumar, Sanjiv},
- journal = {arXiv preprint arXiv:1904.09237},
- year = {2019}
- }
-
- @InProceedings{ Redmon.Divvala.Girshick.ea.2016,
- title = {You only look once: Unified, real-time object detection},
- author = {Redmon, Joseph and Divvala, Santosh and Girshick, Ross and
- Farhadi, Ali},
- booktitle = {Proceedings of the IEEE conference on computer vision and
- pattern recognition},
- pages = {779--788},
- year = {2016}
- }
-
- @Article{ Reed.De-Freitas.2015,
- title = {Neural programmer-interpreters},
- author = {Reed, Scott and De Freitas, Nando},
- journal = {arXiv preprint arXiv:1511.06279},
- year = {2015}
- }
-
- @InProceedings{ Ren.He.Girshick.ea.2015,
- title = {Faster r-cnn: Towards real-time object detection with
- region proposal networks},
- author = {Ren, Shaoqing and He, Kaiming and Girshick, Ross and Sun,
- Jian},
- booktitle = {Advances in neural information processing systems},
- pages = {91--99},
- year = {2015}
- }
-
- @InProceedings{ Rendle.2010,
- title = {Factorization machines},
- author = {Rendle, Steffen},
- booktitle = {2010 IEEE International Conference on Data Mining},
- pages = {995--1000},
- year = {2010},
- organization = {IEEE}
- }
-
- @InProceedings{ Rendle.Freudenthaler.Gantner.ea.2009,
- title = {BPR: Bayesian personalized ranking from implicit
- feedback},
- author = {Rendle, Steffen and Freudenthaler, Christoph and Gantner,
- Zeno and Schmidt-Thieme, Lars},
- booktitle = {Proceedings of the twenty-fifth conference on uncertainty
- in artificial intelligence},
- pages = {452--461},
- year = {2009},
- organization = {AUAI Press}
- }
-
- @Article{ Rumelhart.Hinton.Williams.ea.1988,
- title = {Learning representations by back-propagating errors},
- author = {Rumelhart, David E and Hinton, Geoffrey E and Williams,
- Ronald J and others},
- journal = {Cognitive modeling},
- volume = {5},
- number = {3},
- pages = {1},
- year = {1988}
- }
-
- @Book{ Russell.Norvig.2016,
- title = {Artificial intelligence: a modern approach},
- author = {Russell, Stuart J and Norvig, Peter},
- year = {2016},
- publisher = {Malaysia; Pearson Education Limited,}
- }
-
- @Article{ Salton.Wong.Yang.1975,
- title = {A vector space model for automatic indexing},
- author = {Salton, Gerard and Wong, Anita and Yang, Chung-Shu},
- journal = {Communications of the ACM},
- volume = {18},
- number = {11},
- pages = {613--620},
- year = {1975},
- publisher = {ACM}
- }
-
- @InProceedings{ Santurkar.Tsipras.Ilyas.ea.2018,
- title = {How does batch normalization help optimization?},
- author = {Santurkar, Shibani and Tsipras, Dimitris and Ilyas, Andrew
- and Madry, Aleksander},
- booktitle = {Advances in Neural Information Processing Systems},
- pages = {2483--2493},
- year = {2018}
- }
-
- @Article{ Sarwar.Karypis.Konstan.ea.2001,
- title = {Item-based collaborative filtering recommendation
- algorithms.},
- author = {Sarwar, Badrul Munir and Karypis, George and Konstan,
- Joseph A and Riedl, John and others},
- journal = {Www},
- volume = {1},
- pages = {285--295},
- year = {2001}
- }
-
- @InProceedings{ Schein.Popescul.Ungar.ea.2002,
- title = {Methods and metrics for cold-start recommendations},
- author = {Schein, Andrew I and Popescul, Alexandrin and Ungar, Lyle
- H and Pennock, David M},
- booktitle = {Proceedings of the 25th annual international ACM SIGIR
- conference on Research and development in information
- retrieval},
- pages = {253--260},
- year = {2002},
- organization = {ACM}
- }
-
- @Article{ Schuster.Paliwal.1997,
- title = {Bidirectional recurrent neural networks},
- author = {Schuster, Mike and Paliwal, Kuldip K},
- journal = {IEEE Transactions on Signal Processing},
- volume = {45},
- number = {11},
- pages = {2673--2681},
- year = {1997},
- publisher = {IEEE}
- }
-
- @InProceedings{ Sedhain.Menon.Sanner.ea.2015,
- title = {Autorec: Autoencoders meet collaborative filtering},
- author = {Sedhain, Suvash and Menon, Aditya Krishna and Sanner,
- Scott and Xie, Lexing},
- booktitle = {Proceedings of the 24th International Conference on World
- Wide Web},
- pages = {111--112},
- year = {2015},
- organization = {ACM}
- }
-
- @Article{ Sennrich.Haddow.Birch.2015,
- title = {Neural machine translation of rare words with subword
- units},
- author = {Sennrich, Rico and Haddow, Barry and Birch, Alexandra},
- journal = {arXiv preprint arXiv:1508.07909},
- year = {2015}
- }
-
- @Article{ Sergeev.Del-Balso.2018,
- title = {Horovod: fast and easy distributed deep learning in
- TensorFlow},
- author = {Sergeev, Alexander and Del Balso, Mike},
- journal = {arXiv preprint arXiv:1802.05799},
- year = {2018}
- }
-
- @Article{ Shannon.1948,
- author = {Shannon, Claude Elwood},
- journal = {The Bell System Technical Journal},
- month = {7},
- number = 3,
- pages = {379--423},
- publisher = {Nokia Bell Labs},
- title = {A Mathematical Theory of Communication},
- volume = 27,
- year = 1948
- }
-
- @InProceedings{ Shao.Yao.Sun.ea.2020,
- title = {ControlVAE: Controllable Variational Autoencoder},
- author = {Shao, Huajie and Yao, Shuochao and Sun, Dachun and Zhang,
- Aston and Liu, Shengzhong and Liu, Dongxin and Wang, Jun
- and Abdelzaher, Tarek},
- booktitle = {Proceedings of the 37th International Conference on
- Machine Learning},
- year = {2020},
- organization = {JMLR. org}
- }
-
- @Article{ Silver.Huang.Maddison.ea.2016,
- title = {Mastering the game of Go with deep neural networks and
- tree search},
- author = {Silver, David and Huang, Aja and Maddison, Chris J and
- Guez, Arthur and Sifre, Laurent and Van Den Driessche,
- George and Schrittwieser, Julian and Antonoglou, Ioannis
- and Panneershelvam, Veda and Lanctot, Marc and others},
- journal = {nature},
- volume = {529},
- number = {7587},
- pages = {484},
- year = {2016},
- publisher = {Nature Publishing Group}
- }
-
- @Article{ Simonyan.Zisserman.2014,
- title = {Very deep convolutional networks for large-scale image
- recognition},
- author = {Simonyan, Karen and Zisserman, Andrew},
- journal = {arXiv preprint arXiv:1409.1556},
- year = {2014}
- }
-
- @Article{ Smola.Narayanamurthy.2010,
- title = {An architecture for parallel topic models},
- author = {Smola, Alexander and Narayanamurthy, Shravan},
- journal = {Proceedings of the VLDB Endowment},
- volume = {3},
- number = {1-2},
- pages = {703--710},
- year = {2010},
- publisher = {VLDB Endowment}
- }
-
- @Article{ Srivastava.Hinton.Krizhevsky.ea.2014,
- title = {Dropout: a simple way to prevent neural networks from
- overfitting},
- author = {Srivastava, Nitish and Hinton, Geoffrey and Krizhevsky,
- Alex and Sutskever, Ilya and Salakhutdinov, Ruslan},
- journal = {The Journal of Machine Learning Research},
- volume = {15},
- number = {1},
- pages = {1929--1958},
- year = {2014},
- publisher = {JMLR. org}
- }
-
- @Book{ Strang.1993,
- title = {Introduction to linear algebra},
- author = {Strang, Gilbert},
- volume = {3},
- year = {1993},
- publisher = {Wellesley-Cambridge Press Wellesley, MA}
- }
-
- @Article{ Su.Khoshgoftaar.2009,
- title = {A survey of collaborative filtering techniques},
- author = {Su, Xiaoyuan and Khoshgoftaar, Taghi M},
- journal = {Advances in artificial intelligence},
- volume = {2009},
- year = {2009},
- publisher = {Hindawi}
- }
-
- @InProceedings{ Sukhbaatar.Weston.Fergus.ea.2015,
- title = {End-to-end memory networks},
- author = {Sukhbaatar, Sainbayar and Weston, Jason and Fergus, Rob
- and others},
- booktitle = {Advances in neural information processing systems},
- pages = {2440--2448},
- year = {2015}
- }
-
- @InProceedings{ Sutskever.Martens.Dahl.ea.2013,
- title = {On the importance of initialization and momentum in deep
- learning},
- author = {Sutskever, Ilya and Martens, James and Dahl, George and
- Hinton, Geoffrey},
- booktitle = {International conference on machine learning},
- pages = {1139--1147},
- year = {2013}
- }
-
- @InProceedings{ Sutskever.Vinyals.Le.2014,
- title = {Sequence to sequence learning with neural networks},
- author = {Sutskever, Ilya and Vinyals, Oriol and Le, Quoc V},
- booktitle = {Advances in neural information processing systems},
- pages = {3104--3112},
- year = {2014}
- }
-
- @InProceedings{ Szegedy.Ioffe.Vanhoucke.ea.2017,
- title = {Inception-v4, inception-resnet and the impact of residual
- connections on learning},
- author = {Szegedy, Christian and Ioffe, Sergey and Vanhoucke,
- Vincent and Alemi, Alexander A},
- booktitle = {Thirty-First AAAI Conference on Artificial Intelligence},
- year = {2017}
- }
-
- @InProceedings{ Szegedy.Liu.Jia.ea.2015,
- title = {Going deeper with convolutions},
- author = {Szegedy, Christian and Liu, Wei and Jia, Yangqing and
- Sermanet, Pierre and Reed, Scott and Anguelov, Dragomir and
- Erhan, Dumitru and Vanhoucke, Vincent and Rabinovich,
- Andrew},
- booktitle = {Proceedings of the IEEE conference on computer vision and
- pattern recognition},
- pages = {1--9},
- year = {2015}
- }
-
- @InProceedings{ Szegedy.Vanhoucke.Ioffe.ea.2016,
- title = {Rethinking the inception architecture for computer
- vision},
- author = {Szegedy, Christian and Vanhoucke, Vincent and Ioffe,
- Sergey and Shlens, Jon and Wojna, Zbigniew},
- booktitle = {Proceedings of the IEEE conference on computer vision and
- pattern recognition},
- pages = {2818--2826},
- year = {2016}
- }
-
- @Article{ Tallec.Ollivier.2017,
- title = {Unbiasing truncated backpropagation through time},
- author = {Tallec, Corentin and Ollivier, Yann},
- journal = {arXiv preprint arXiv:1705.08209},
- year = {2017}
- }
-
- @InProceedings{ Tang.Wang.2018,
- title = {Personalized top-n sequential recommendation via
- convolutional sequence embedding},
- author = {Tang, Jiaxi and Wang, Ke},
- booktitle = {Proceedings of the Eleventh ACM International Conference
- on Web Search and Data Mining},
- pages = {565--573},
- year = {2018},
- organization = {ACM}
- }
-
- @Article{ Tay.Dehghani.Bahri.ea.2020,
- title = {Efficient transformers: A survey},
- author = {Tay, Yi and Dehghani, Mostafa and Bahri, Dara and Metzler,
- Donald},
- journal = {arXiv preprint arXiv:2009.06732},
- year = {2020}
- }
-
- @Article{ Teye.Azizpour.Smith.2018,
- title = {Bayesian uncertainty estimation for batch normalized deep
- networks},
- author = {Teye, Mattias and Azizpour, Hossein and Smith, Kevin},
- journal = {arXiv preprint arXiv:1802.06455},
- year = {2018}
- }
-
- @Article{ Tieleman.Hinton.2012,
- title = {Lecture 6.5-rmsprop: Divide the gradient by a running
- average of its recent magnitude},
- author = {Tieleman, Tijmen and Hinton, Geoffrey},
- journal = {COURSERA: Neural networks for machine learning},
- volume = {4},
- number = {2},
- pages = {26--31},
- year = {2012}
- }
-
- @Article{ Toscher.Jahrer.Bell.2009,
- title = {The bigchaos solution to the netflix grand prize},
- author = {T{\"o}scher, Andreas and Jahrer, Michael and Bell, Robert
- M},
- journal = {Netflix prize documentation},
- pages = {1--52},
- year = {2009}
- }
-
- @Article{ Treisman.Gelade.1980,
- title = {A feature-integration theory of attention},
- author = {Treisman, Anne M and Gelade, Garry},
- journal = {Cognitive psychology},
- volume = {12},
- number = {1},
- pages = {97--136},
- year = {1980},
- publisher = {Elsevier}
- }
-
- @Article{ Turing.1950,
- title = {Computing machinery and intelligence},
- author = {Turing, Alan},
- journal = {Mind},
- volume = {59},
- number = {236},
- pages = {433},
- year = {1950}
- }
-
- @Article{ Uijlings.Van-De-Sande.Gevers.ea.2013,
- title = {Selective search for object recognition},
- author = {Uijlings, Jasper RR and Van De Sande, Koen EA and Gevers,
- Theo and Smeulders, Arnold WM},
- journal = {International journal of computer vision},
- volume = {104},
- number = {2},
- pages = {154--171},
- year = {2013},
- publisher = {Springer}
- }
-
- @Book{ Van-Loan.Golub.1983,
- title = {Matrix computations},
- author = {Van Loan, Charles F and Golub, Gene H},
- year = {1983},
- publisher = {Johns Hopkins University Press}
- }
-
- @InProceedings{ Vaswani.Shazeer.Parmar.ea.2017,
- title = {Attention is all you need},
- author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and
- Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and
- Kaiser, {\L}ukasz and Polosukhin, Illia},
- booktitle = {Advances in neural information processing systems},
- pages = {5998--6008},
- year = {2017}
- }
-
- @InProceedings{ Wang.Davidson.Pan.ea.2016,
- title = {Gunrock: A high-performance graph processing library on
- the GPU},
- author = {Wang, Yangzihao and Davidson, Andrew and Pan, Yuechao and
- Wu, Yuduo and Riffel, Andy and Owens, John D},
- booktitle = {ACM SIGPLAN Notices},
- volume = {51},
- number = {8},
- pages = {11},
- year = {2016},
- organization = {ACM}
- }
-
- @Article{ Wang.Li.Liberty.ea.2018,
- title = {Optimal Message Scheduling for Aggregation},
- author = {Wang, Leyuan and Li, Mu and Liberty, Edo and Smola, Alex
- J},
- journal = {NETWORKS},
- volume = {2},
- number = {3},
- pages = {2--3},
- year = {2018}
- }
-
- @Article{ Warstadt.Singh.Bowman.2019,
- title = {Neural network acceptability judgments},
- author = {Warstadt, Alex and Singh, Amanpreet and Bowman, Samuel R},
- journal = {Transactions of the Association for Computational
- Linguistics},
- volume = {7},
- pages = {625--641},
- year = {2019},
- publisher = {MIT Press}
- }
-
- @Book{ Wasserman.2013,
- title = {All of statistics: a concise course in statistical
- inference},
- author = {Wasserman, Larry},
- year = {2013},
- publisher = {Springer Science \& Business Media}
- }
-
- @Article{ Watkins.Dayan.1992,
- title = {Q-learning},
- author = {Watkins, Christopher JCH and Dayan, Peter},
- journal = {Machine learning},
- volume = {8},
- number = {3-4},
- pages = {279--292},
- year = {1992},
- publisher = {Springer}
- }
-
- @Article{ Watson.1964,
- title = {Smooth regression analysis},
- author = {Watson, Geoffrey S},
- journal = {Sankhy{\=a}: The Indian Journal of Statistics, Series A},
- pages = {359--372},
- year = {1964},
- publisher = {JSTOR}
- }
-
- @InProceedings{ Welling.Teh.2011,
- title = {Bayesian learning via stochastic gradient Langevin
- dynamics},
- author = {Welling, Max and Teh, Yee W},
- booktitle = {Proceedings of the 28th international conference on
- machine learning (ICML-11)},
- pages = {681--688},
- year = {2011}
- }
-
- @Article{ Werbos.1990,
- title = {Backpropagation through time: what it does and how to do
- it},
- author = {Werbos, Paul J},
- journal = {Proceedings of the IEEE},
- volume = {78},
- number = {10},
- pages = {1550--1560},
- year = {1990},
- publisher = {IEEE}
- }
-
- @InProceedings{ Wigner.1958,
- title = {On the distribution of the roots of certain symmetric
- matrices},
- author = {Wigner, Eugene P.},
- booktitle = {Ann. Math},
- pages = {325--327},
- year = {1958}
- }
-
- @TechReport{ Williams.Waterman.Patterson.2009,
- title = {Roofline: An insightful visual performance model for
- floating-point programs and multicore architectures},
- author = {Williams, Samuel and Waterman, Andrew and Patterson,
- David},
- year = {2009},
- institution = {Lawrence Berkeley National Lab.(LBNL), Berkeley, CA
- (United States)}
- }
-
- @Article{ Wood.Gasthaus.Archambeau.ea.2011,
- title = {The sequence memoizer},
- author = {Wood, Frank and Gasthaus, Jan and Archambeau, C{\'e}dric
- and James, Lancelot and Teh, Yee Whye},
- journal = {Communications of the ACM},
- volume = {54},
- number = {2},
- pages = {91--98},
- year = {2011},
- publisher = {ACM}
- }
-
- @InProceedings{ Wu.Ahmed.Beutel.ea.2017,
- title = {Recurrent recommender networks},
- author = {Wu, Chao-Yuan and Ahmed, Amr and Beutel, Alex and Smola,
- Alexander J and Jing, How},
- booktitle = {Proceedings of the tenth ACM international conference on
- web search and data mining},
- pages = {495--503},
- year = {2017},
- organization = {ACM}
- }
-
- @Article{ Wu.Schuster.Chen.ea.2016,
- title = {Google's neural machine translation system: Bridging the
- gap between human and machine translation},
- author = {Wu, Yonghui and Schuster, Mike and Chen, Zhifeng and Le,
- Quoc V and Norouzi, Mohammad and Macherey, Wolfgang and
- Krikun, Maxim and Cao, Yuan and Gao, Qin and Macherey,
- Klaus and others},
- journal = {arXiv preprint arXiv:1609.08144},
- year = {2016}
- }
-
- @InProceedings{ Xiao.Bahri.Sohl-Dickstein.ea.2018,
- title = {Dynamical Isometry and a Mean Field Theory of CNNs: How to
- Train 10,000-Layer Vanilla Convolutional Neural Networks},
- author = {Xiao, Lechao and Bahri, Yasaman and Sohl-Dickstein, Jascha
- and Schoenholz, Samuel and Pennington, Jeffrey},
- booktitle = {International Conference on Machine Learning},
- pages = {5393--5402},
- year = {2018}
- }
-
- @Article{ Xiao.Rasul.Vollgraf.2017,
- title = {Fashion-mnist: a novel image dataset for benchmarking
- machine learning algorithms},
- author = {Xiao, Han and Rasul, Kashif and Vollgraf, Roland},
- journal = {arXiv preprint arXiv:1708.07747},
- year = {2017}
- }
-
- @InProceedings{ Xiong.Wu.Alleva.ea.2018,
- title = {The Microsoft 2017 conversational speech recognition
- system},
- author = {Xiong, Wayne and Wu, Lingfeng and Alleva, Fil and Droppo,
- Jasha and Huang, Xuedong and Stolcke, Andreas},
- booktitle = {2018 IEEE International Conference on Acoustics, Speech
- and Signal Processing (ICASSP)},
- pages = {5934--5938},
- year = {2018},
- organization = {IEEE}
- }
-
- @InProceedings{ Ye.Yin.Lee.ea.2011,
- title = {Exploiting geographical influence for collaborative
- point-of-interest recommendation},
- author = {Ye, Mao and Yin, Peifeng and Lee, Wang-Chien and Lee,
- Dik-Lun},
- booktitle = {Proceedings of the 34th international ACM SIGIR conference
- on Research and development in Information Retrieval},
- pages = {325--334},
- year = {2011},
- organization = {ACM}
- }
-
- @Article{ You.Gitman.Ginsburg.2017,
- title = {Large batch training of convolutional networks},
- author = {You, Yang and Gitman, Igor and Ginsburg, Boris},
- journal = {arXiv preprint arXiv:1708.03888},
- year = {2017}
- }
-
- @InProceedings{ Zaheer.Reddi.Sachan.ea.2018,
- title = {Adaptive methods for nonconvex optimization},
- author = {Zaheer, Manzil and Reddi, Sashank and Sachan, Devendra and
- Kale, Satyen and Kumar, Sanjiv},
- booktitle = {Advances in Neural Information Processing Systems},
- pages = {9793--9803},
- year = {2018}
- }
-
- @Article{ Zeiler.2012,
- title = {ADADELTA: an adaptive learning rate method},
- author = {Zeiler, Matthew D},
- journal = {arXiv preprint arXiv:1212.5701},
- year = {2012}
- }
-
- @InProceedings{ Zhang.Tay.Zhang.ea.2021,
- title = {Beyond Fully-Connected Layers with Quaternions:
- Parameterization of Hypercomplex Multiplications with 1/n
- Parameters},
- author = {Zhang, Aston and Tay, Yi and Zhang, Shuai and Chan, Alvin
- and Luu, Anh Tuan and Hui, Siu Cheung and Fu, Jie},
- booktitle = {International Conference on Learning Representations},
- year = {2021}
- }
-
- @Article{ Zhang.Yao.Sun.ea.2019,
- title = {Deep learning based recommender system: A survey and new
- perspectives},
- author = {Zhang, Shuai and Yao, Lina and Sun, Aixin and Tay, Yi},
- journal = {ACM Computing Surveys (CSUR)},
- volume = {52},
- number = {1},
- pages = {5},
- year = {2019},
- publisher = {ACM}
- }
-
- @Article{ Zhao.Zheng.Xu.ea.2019,
- title = {Object detection with deep learning: A review},
- author = {Zhao, Zhong-Qiu and Zheng, Peng and Xu, Shou-tao and Wu,
- Xindong},
- journal = {IEEE transactions on neural networks and learning
- systems},
- volume = {30},
- number = {11},
- pages = {3212--3232},
- year = {2019},
- publisher = {IEEE}
- }
-
- @InProceedings{ Zhu.Kiros.Zemel.ea.2015,
- title = {Aligning books and movies: Towards story-like visual
- explanations by watching movies and reading books},
- author = {Zhu, Yukun and Kiros, Ryan and Zemel, Rich and
- Salakhutdinov, Ruslan and Urtasun, Raquel and Torralba,
- Antonio and Fidler, Sanja},
- booktitle = {Proceedings of the IEEE international conference on
- computer vision},
- pages = {19--27},
- year = {2015}
- }
-
- @InProceedings{ Zhu.Park.Isola.ea.2017,
- title = {Unpaired image-to-image translation using cycle-consistent
- adversarial networks},
- author = {Zhu, Jun-Yan and Park, Taesung and Isola, Phillip and
- Efros, Alexei A},
- booktitle = {Proceedings of the IEEE international conference on
- computer vision},
- pages = {2223--2232},
- year = {2017}
- }
|