default search action
Search dblp for Publications
export results for "stream:conf/rlc:"
@article{DBLP:conf/rlc/0001S024, author = {Alexander Levine and Peter Stone and Amy Zhang}, title = {Multistep Inverse Is Not All You Need}, journal = {{RLJ}}, volume = {2}, pages = {884--925}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/0001S024.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/0002024, author = {Rafael Rodr{\'{\i}}guez{-}S{\'{a}}nchez and George Konidaris}, title = {Learning Abstract World Models for Value-preserving Planning with Options}, journal = {{RLJ}}, volume = {4}, pages = {1733--1758}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/0002024.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/0003LLM24, author = {Mohamed Elsayed and Qingfeng Lan and Clare Lyle and A. Rupam Mahmood}, title = {Weight Clipping for Deep Continual and Reinforcement Learning}, journal = {{RLJ}}, volume = {5}, pages = {2198--2217}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/0003LLM24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/0012LX24, author = {Nan Jiang and Jinzhao Li and Yexiang Xue}, title = {A Tighter Convergence Proof of Reverse Experience Replay}, journal = {{RLJ}}, volume = {1}, pages = {470--480}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/0012LX24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/AbelHH24, author = {David Abel and Mark K. Ho and Anna Harutyunyan}, title = {Three Dogmas of Reinforcement Learning}, journal = {{RLJ}}, volume = {2}, pages = {629--644}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/AbelHH24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/AckermannOS24, author = {Johannes Ackermann and Takayuki Osa and Masashi Sugiyama}, title = {Offline Reinforcement Learning from Datasets with Structured Non-Stationarity}, journal = {{RLJ}}, volume = {5}, pages = {2140--2161}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/AckermannOS24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/AdamczykMTK24, author = {Jacob Adamczyk and Volodymyr Makarenko and Stas Tiomkin and Rahul V. Kulkarni}, title = {Boosting Soft Q-Learning by Bounding}, journal = {{RLJ}}, volume = {5}, pages = {2373--2399}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/AdamczykMTK24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/AghajohariCDAC24, author = {Milad Aghajohari and Tim Cooijmans and Juan Agustin Duque and Shunichi Akatsuka and Aaron C. Courville}, title = {Best Response Shaping}, journal = {{RLJ}}, volume = {2}, pages = {798--818}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/AghajohariCDAC24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/AgostinelliS24, author = {Forest Agostinelli and Misagh Soltani}, title = {Learning Discrete World Models for Heuristic Search}, journal = {{RLJ}}, volume = {4}, pages = {1781--1792}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/AgostinelliS24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/Almuzairee0C24, author = {Abdulaziz Almuzairee and Nicklas Hansen and Henrik I. Christensen}, title = {A Recipe for Unbounded Data Augmentation in Visual Reinforcement Learning}, journal = {{RLJ}}, volume = {1}, pages = {130--157}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/Almuzairee0C24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/AmirNL24, author = {Nadav Amir and Yael Niv and Angela Langdon}, title = {States as goal-directed concepts: an epistemic approach to state-representation learning}, journal = {{RLJ}}, volume = {5}, pages = {2096--2106}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/AmirNL24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/AntonovD24, author = {Georgy Antonov and Peter Dayan}, title = {Exploring Uncertainty in Distributional Reinforcement Learning}, journal = {{RLJ}}, volume = {2}, pages = {961--978}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/AntonovD24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/AsisS24, author = {Kris De Asis and Richard S. Sutton}, title = {An Idiosyncrasy of Time-discretization in Reinforcement Learning}, journal = {{RLJ}}, volume = {3}, pages = {1306--1316}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/AsisS24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/AsriST24, author = {Zakariae El Asri and Olivier Sigaud and Nicolas Thome}, title = {Physics-Informed Model and Hybrid Planning for Efficient Dyna-Style Reinforcement Learning}, journal = {{RLJ}}, volume = {2}, pages = {693--713}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/AsriST24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/AvalosBNRO24, author = {Rapha{\"{e}}l Avalos and Eugenio Bargiacchi and Ann Now{\'{e}} and Diederik M. Roijers and Frans A. Oliehoek}, title = {Online Planning in POMDPs with State-Requests}, journal = {{RLJ}}, volume = {1}, pages = {108--129}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/AvalosBNRO24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/AyoubSZCGSS24, author = {Alex Ayoub and David Szepesvari and Francesco Zanini and Bryan Chan and Dhawal Gupta and Bruno Castro da Silva and Dale Schuurmans}, title = {Mitigating the Curse of Horizon in Monte-Carlo Returns}, journal = {{RLJ}}, volume = {2}, pages = {563--572}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/AyoubSZCGSS24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/AziziDA0VG24, author = {Mohammad Javad Azizi and Thang Duong and Yasin Abbasi{-}Yadkori and Andr{\'{a}}s Gy{\"{o}}rgy and Claire Vernade and Mohammad Ghavamzadeh}, title = {Non-stationary Bandits and Meta-Learning with a Small Set of Optimal Arms}, journal = {{RLJ}}, volume = {5}, pages = {2461--2491}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/AziziDA0VG24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/BeckJVXW24, author = {Jacob Beck and Matthew Thomas Jackson and Risto Vuorio and Zheng Xiong and Shimon Whiteson}, title = {SplAgger: Split Aggregation for Meta-Reinforcement Learning}, journal = {{RLJ}}, volume = {1}, pages = {450--469}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/BeckJVXW24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/BeckerMON24, author = {Philipp Becker and Sebastian Mossburger and Fabian Otto and Gerhard Neumann}, title = {Combining Reconstruction and Contrastive Methods for Multimodal Representations in {RL}}, journal = {{RLJ}}, volume = {4}, pages = {1619--1655}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/BeckerMON24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/BedaywiRF24, author = {Mark Bedaywi and Amin Rakhsha and Amir{-}massoud Farahmand}, title = {{PID} Accelerated Temporal Difference Algorithms}, journal = {{RLJ}}, volume = {5}, pages = {2071--2095}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/BedaywiRF24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/BoigeFQFRP24, author = {Rapha{\"{e}}l Boige and Yannis Flet{-}Berliac and Lars C. P. M. Quaedvlieg and Arthur Flajolet and Guillaume Richard and Thomas Pierrot}, title = {{PASTA:} Pretrained Action-State Transformer Agents}, journal = {{RLJ}}, volume = {3}, pages = {1511--1532}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/BoigeFQFRP24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/ChangFMD24, author = {Wei{-}Di Chang and Scott Fujimoto and David Meger and Gregory Dudek}, title = {Imitation Learning from Observation through Optimal Transport}, journal = {{RLJ}}, volume = {4}, pages = {1911--1923}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/ChangFMD24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/ChenTTLLTZ24, author = {Yuxin Chen and Chen Tang and Thomas Tian and Chenran Li and Jinning Li and Masayoshi Tomizuka and Wei Zhan}, title = {Quantifying Interaction Level Between Agents Helps Cost-efficient Generalization in Multi-agent Reinforcement Learning}, journal = {{RLJ}}, volume = {4}, pages = {1950--1964}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/ChenTTLLTZ24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/ChitnisYG24, author = {Rohan Chitnis and Shentao Yang and Alborz Geramifard}, title = {Sequential Decision-Making for Inline Text Autocomplete}, journal = {{RLJ}}, volume = {2}, pages = {946--960}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/ChitnisYG24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/ChoudharyGT24, author = {Kartik Choudhary and Dhawal Gupta and Philip S. Thomas}, title = {ICU-Sepsis: {A} Benchmark {MDP} Built from Real Medical Data}, journal = {{RLJ}}, volume = {4}, pages = {1546--1566}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/ChoudharyGT24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/CornelisseV24, author = {Daphne Cornelisse and Eugene Vinitsky}, title = {Human-compatible driving agents through data-regularized self-play reinforcement learning}, journal = {{RLJ}}, volume = {5}, pages = {2320--2344}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/CornelisseV24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/CorradoQBLH24, author = {Nicholas E. Corrado and Yuxiao Qu and John U. Balis and Adam Labiosa and Josiah P. Hanna}, title = {Guided Data Augmentation for Offline Reinforcement Learning and Imitation Learning}, journal = {{RLJ}}, volume = {1}, pages = {198--215}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/CorradoQBLH24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/CorsiARK0F24, author = {Davide Corsi and Guy Amir and Andoni Rodr{\'{\i}}guez and Guy Katz and C{\'{e}}sar S{\'{a}}nchez and Roy Fox}, title = {Verification-Guided Shielding for Deep Reinforcement Learning}, journal = {{RLJ}}, volume = {4}, pages = {1759--1780}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/CorsiARK0F24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/CorsiCF24, author = {Davide Corsi and Davide Camponogara and Alessandro Farinelli}, title = {Aquatic Navigation: {A} Challenging Benchmark for Deep Reinforcement Learning}, journal = {{RLJ}}, volume = {3}, pages = {1106--1123}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/CorsiCF24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/CousinsAL024, author = {Cyrus Cousins and Kavosh Asadi and Elita A. Lobo and Michael Littman}, title = {On Welfare-Centric Fair Reinforcement Learning}, journal = {{RLJ}}, volume = {3}, pages = {1124--1137}, year = {2024}, timestamp = {Tue, 17 Dec 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/CousinsAL024.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/CramerFST24, author = {Emma Cramer and Bernd Frauenknecht and Ramil Sabirov and Sebastian Trimpe}, title = {Contextualized Hybrid Ensemble Q-learning: Learning Fast with Control Priors}, journal = {{RLJ}}, volume = {2}, pages = {926--945}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/CramerFST24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/DaleyMW24, author = {Brett Daley and Marlos C. Machado and Martha White}, title = {Demystifying the Recency Heuristic in Temporal-Difference Learning}, journal = {{RLJ}}, volume = {3}, pages = {1019--1036}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/DaleyMW24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/DelfosseBGSK24, author = {Quentin Delfosse and Jannis Bl{\"{u}}ml and Bjarne Gregori and Sebastian Sztwiertnia and Kristian Kersting}, title = {OCAtari: Object-Centric Atari 2600 Reinforcement Learning Environments}, journal = {{RLJ}}, volume = {1}, pages = {400--449}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/DelfosseBGSK24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/DierkesCHT24, author = {Julian Dierkes and Emma Cramer and Holger H. Hoos and Sebastian Trimpe}, title = {Combining Automated Optimisation of Hyperparameters and Reward Shape}, journal = {{RLJ}}, volume = {3}, pages = {1441--1466}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/DierkesCHT24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/Dizon-ParadisWC24, author = {Olivia P. Dizon{-}Paradis and Stephen E. Wormald and Daniel E. Capecci and Avanti Bhandarkar and Damon L. Woodard}, title = {Resource Usage Evaluation of Discrete Model-Free Deep Reinforcement Learning Algorithms}, journal = {{RLJ}}, volume = {5}, pages = {2162--2177}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/Dizon-ParadisWC24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/DokoYBP24, author = {Gersi Doko and Guang Yang and Daniel S. Brown and Marek Petrik}, title = {{ROIL:} Robust Offline Imitation Learning without Trajectories}, journal = {{RLJ}}, volume = {2}, pages = {593--605}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/DokoYBP24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/DrappoMR24, author = {Gianluca Drappo and Alberto Maria Metelli and Marcello Restelli}, title = {A Provably Efficient Option-Based Algorithm for both High-Level and Low-Level Learning}, journal = {{RLJ}}, volume = {2}, pages = {819--839}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/DrappoMR24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/DunionA24, author = {Mhairi Dunion and Stefano V. Albrecht}, title = {Multi-view Disentanglement for Reinforcement Learning with Multiple Cameras}, journal = {{RLJ}}, volume = {2}, pages = {498--515}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/DunionA24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/FreedWC0C24, author = {Benjamin Freed and Thomas Wei and Roberto Calandra and Jeff Schneider and Howie Choset}, title = {Unifying Model-Based and Model-Free Reinforcement Learning with Equivalent Policy Sets}, journal = {{RLJ}}, volume = {1}, pages = {283--301}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/FreedWC0C24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/FungDHM24, author = {Ho Long Fung and Victor{-}Alexandru Darvariu and Stephen Hailes and Mirco Musolesi}, title = {Trust-based Consensus in Multi-Agent Reinforcement Learning Systems}, journal = {{RLJ}}, volume = {2}, pages = {714--732}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/FungDHM24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/GolowichM24, author = {Noah Golowich and Ankur Moitra}, title = {The Role of Inherent Bellman Error in Offline Reinforcement Learning with Linear Function Approximation}, journal = {{RLJ}}, volume = {1}, pages = {302--341}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/GolowichM24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/HuZZZYX024, author = {Bin Hu and Chenyang Zhao and Pu Zhang and Zihao Zhou and Yuanhang Yang and Zenglin Xu and Bin Liu}, title = {Enabling Intelligent Interactions between an Agent and an {LLM:} {A} Reinforcement Learning Approach}, journal = {{RLJ}}, volume = {3}, pages = {1289--1305}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/HuZZZYX024.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/HuangGJP24, author = {Audrey Huang and Mohammad Ghavamzadeh and Nan Jiang and Marek Petrik}, title = {Non-adaptive Online Finetuning for Offline Reinforcement Learning}, journal = {{RLJ}}, volume = {1}, pages = {182--197}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/HuangGJP24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/HugessenCMB24, author = {Adriana Hugessen and Roger Creus Castanyer and Faisal Mohamed and Glen Berseth}, title = {Surprise-Adaptive Intrinsic Motivation for Unsupervised Reinforcement Learning}, journal = {{RLJ}}, volume = {2}, pages = {547--562}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/HugessenCMB24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/HussingMSKE24, author = {Marcel Hussing and Jorge A. Mendez and Anisha Singrodia and Cassandra Kent and Eric Eaton}, title = {Robotic Manipulation Datasets for Offline Compositional Reinforcement Learning}, journal = {{RLJ}}, volume = {2}, pages = {979--994}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/HussingMSKE24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/HussingVGFE24, author = {Marcel Hussing and Claas Voelcker and Igor Gilitschenski and Amir{-}massoud Farahmand and Eric Eaton}, title = {Dissecting Deep {RL} with High Update Ratios: Combatting Value Divergence}, journal = {{RLJ}}, volume = {2}, pages = {995--1018}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/HussingVGFE24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/IshfaqT0LLMP024, author = {Haque Ishfaq and Yixin Tan and Yu Yang and Qingfeng Lan and Jianfeng Lu and A. Rupam Mahmood and Doina Precup and Pan Xu}, title = {More Efficient Randomized Exploration for Reinforcement Learning via Approximate Sampling}, journal = {{RLJ}}, volume = {3}, pages = {1211--1235}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/IshfaqT0LLMP024.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/JacksonMLEWF24, author = {Matthew Thomas Jackson and Michael T. Matthews and Cong Lu and Benjamin Ellis and Shimon Whiteson and Jakob Nicolaus Foerster}, title = {Policy-Guided Diffusion}, journal = {{RLJ}}, volume = {4}, pages = {1855--1872}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/JacksonMLEWF24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/JarnefeltKD24, author = {Oliver J{\"{a}}rnefelt and Mahdi Kallel and Carlo D'Eramo}, title = {Cyclicity-Regularized Coordination Graphs}, journal = {{RLJ}}, volume = {1}, pages = {366--379}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/JarnefeltKD24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/JavedSS24, author = {Khurram Javed and Arsalan Sharifnassab and Richard S. Sutton}, title = {SwiftTD: {A} Fast and Robust Algorithm for Temporal Difference Learning}, journal = {{RLJ}}, volume = {2}, pages = {840--863}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/JavedSS24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/JeongM24, author = {Woojin Jeong and Seungki Min}, title = {Improving Thompson Sampling via Information Relaxation for Budgeted Multi-armed Bandits}, journal = {{RLJ}}, volume = {1}, pages = {16--28}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/JeongM24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/JordanNK0T24, author = {Scott M. Jordan and Samuel Neumann and James E. Kostas and Adam White and Philip S. Thomas}, title = {The Cliff of Overcommitment with Policy Gradient Step Sizes}, journal = {{RLJ}}, volume = {2}, pages = {864--883}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/JordanNK0T24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/KalraB24, author = {Akansha Kalra and Daniel S. Brown}, title = {Can Differentiable Decision Trees Enable Interpretable Reward Learning from Human Feedback?}, journal = {{RLJ}}, volume = {4}, pages = {1887--1910}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/KalraB24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/KanariosZ024, author = {Kellen Kanarios and Qining Zhang and Lei Ying}, title = {Cost Aware Best Arm Identification}, journal = {{RLJ}}, volume = {4}, pages = {1533--1545}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/KanariosZ024.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/KapoorF0C24, author = {Aditya Kapoor and Benjamin Freed and Jeff Schneider and Howie Choset}, title = {Assigning Credit with Partial Reward Decoupling in Multi-Agent Proximal Policy Optimization}, journal = {{RLJ}}, volume = {1}, pages = {380--399}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/KapoorF0C24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/Karamzade0KF24, author = {Armin Karamzade and Kyungmin Kim and Montek Kalsi and Roy Fox}, title = {Reinforcement Learning from Delayed Observations via World Models}, journal = {{RLJ}}, volume = {5}, pages = {2123--2139}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/Karamzade0KF24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/KoyamadaNI24, author = {Sotetsu Koyamada and Soichiro Nishimori and Shin Ishii}, title = {A Batch Sequential Halving Algorithm without Performance Degradation}, journal = {{RLJ}}, volume = {5}, pages = {2218--2232}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/KoyamadaNI24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/LambrechtsBE24, author = {Gaspard Lambrechts and Adrien Bolland and Damien Ernst}, title = {Informed {POMDP:} Leveraging Additional Information in Model-Based {RL}}, journal = {{RLJ}}, volume = {2}, pages = {763--784}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/LambrechtsBE24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/LanMYX24, author = {Qingfeng Lan and A. Rupam Mahmood and Shuicheng Yan and Zhongwen Xu}, title = {Learning to Optimize for Reinforcement Learning}, journal = {{RLJ}}, volume = {2}, pages = {481--497}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/LanMYX24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/LiH0GP24, author = {Changling Li and Zhang{-}Wei Hong and Pulkit Agrawal and Divyansh Garg and Joni Pajarinen}, title = {{ROER:} Regularized Optimal Experience Replay}, journal = {{RLJ}}, volume = {4}, pages = {1598--1618}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/LiH0GP24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/LobelP24, author = {Sam Lobel and Ronald Parr}, title = {An Optimal Tightness Bound for the Simulation Lemma}, journal = {{RLJ}}, volume = {2}, pages = {785--797}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/LobelP24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/LuARV24, author = {Michael Lu and Matin Aghaei and Anant Raj and Sharan Vaswani}, title = {Towards Principled, Practical Policy Gradient for Bandits and Tabular MDPs}, journal = {{RLJ}}, volume = {1}, pages = {216--282}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/LuARV24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/Luo0JZ24, author = {Yu Luo and Fuchun Sun and Tianying Ji and Xianyuan Zhan}, title = {Bidirectional-Reachable Hierarchical Reinforcement Learning with Mutually Responsive Policies}, journal = {{RLJ}}, volume = {2}, pages = {733--762}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/Luo0JZ24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/LuoPW0P24, author = {Yudong Luo and Yangchen Pan and Han Wang and Philip Torr and Pascal Poupart}, title = {A Simple Mixture Policy Parameterization for Improving Sample Efficiency of CVaR Optimization}, journal = {{RLJ}}, volume = {2}, pages = {573--592}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/LuoPW0P24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/MadhavanM0B24, author = {Rahul Madhavan and Aurghya Maiti and Gaurav Sinha and Siddharth Barman}, title = {Causal Contextual Bandits with Adaptive Context}, journal = {{RLJ}}, volume = {5}, pages = {2233--2263}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/MadhavanM0B24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/MattsonAB24, author = {Connor Mattson and Anurag Aribandi and Daniel S. Brown}, title = {Representation Alignment from Human Feedback for Cross-Embodiment Reward Learning from Mixed-Quality Demonstrations}, journal = {{RLJ}}, volume = {4}, pages = {1822--1840}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/MattsonAB24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/MaytieDAV24, author = {L{\'{e}}opold Mayti{\'{e}} and Benjamin Devillers and Alexandre Arnold and Rufin VanRullen}, title = {Zero-shot cross-modal transfer of Reinforcement Learning policies through a Global Workspace}, journal = {{RLJ}}, volume = {3}, pages = {1410--1426}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/MaytieDAV24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/McInroeJAS24, author = {Trevor McInroe and Adam Jelley and Stefano V. Albrecht and Amos J. Storkey}, title = {Planning to Go Out-of-Distribution in Offline-to-Online Reinforcement Learning}, journal = {{RLJ}}, volume = {2}, pages = {516--546}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/McInroeJAS24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/McMahanW0ZX24, author = {Jeremy McMahan and Young Wu and Yudong Chen and Jerry Zhu and Qiaomin Xie}, title = {Inception: Efficiently Computable Misinformation Attacks on Markov Games}, journal = {{RLJ}}, volume = {5}, pages = {2345--2358}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/McMahanW0ZX24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/MelcerAT24, author = {Daniel Melcer and Christopher Amato and Stavros Tripakis}, title = {Shield Decomposition for Safe Reinforcement Learning in General Partially Observable Multi-Agent Environments}, journal = {{RLJ}}, volume = {4}, pages = {1965--1994}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/MelcerAT24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/Meyer0M24, author = {Edan Meyer and Adam White and Marlos C. Machado}, title = {Harnessing Discrete Representations for Continual Reinforcement Learning}, journal = {{RLJ}}, volume = {2}, pages = {606--628}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/Meyer0M24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/MorimuraOAZ24, author = {Tetsuro Morimura and Kazuhiro Ota and Kenshi Abe and Peinan Zhang}, title = {Policy Gradient Algorithms with Monte Carlo Tree Learning for Non-Markov Decision Processes}, journal = {{RLJ}}, volume = {3}, pages = {1351--1376}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/MorimuraOAZ24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/Moss0CK24, author = {Robert J. Moss and Anthony Corso and Jef Caers and Mykel J. Kochenderfer}, title = {BetaZero: Belief-State Planning for Long-Horizon POMDPs using Learned Approximations}, journal = {{RLJ}}, volume = {1}, pages = {158--181}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/Moss0CK24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/Naik0TS24, author = {Abhishek Naik and Yi Wan and Manan Tomar and Richard S. Sutton}, title = {Reward Centering}, journal = {{RLJ}}, volume = {4}, pages = {1995--2016}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/Naik0TS24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/Obando-CeronACC24, author = {Johan Samir Obando{-}Ceron and Jo{\~{a}}o Guilherme Madeira Ara{\'{u}}jo and Aaron C. Courville and Pablo Samuel Castro}, title = {On the consistency of hyper-parameter selection in value-based deep reinforcement learning}, journal = {{RLJ}}, volume = {3}, pages = {1037--1059}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/Obando-CeronACC24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/OertellCZB024, author = {Owen Oertell and Jonathan D. Chang and Yiyi Zhang and Kiant{\'{e}} Brantley and Wen Sun}, title = {{RL} for Consistency Models: Reward Guided Text-to-Image Generation with Fast Inference}, journal = {{RLJ}}, volume = {4}, pages = {1656--1673}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/OertellCZB024.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/OmuraOMH24, author = {Motoki Omura and Takayuki Osa and Yusuke Mukuta and Tatsuya Harada}, title = {Stabilizing Extreme Q-learning by Maclaurin Expansion}, journal = {{RLJ}}, volume = {3}, pages = {1427--1440}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/OmuraOMH24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/PanahiPW024, author = {Parham Mohammad Panahi and Andrew Patterson and Martha White and Adam White}, title = {Investigating the Interplay of Prioritized Replay and Generalization}, journal = {{RLJ}}, volume = {5}, pages = {2041--2058}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/PanahiPW024.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/PapiniMMR24, author = {Matteo Papini and Giorgio Manganini and Alberto Maria Metelli and Marcello Restelli}, title = {Policy Gradient with Active Importance Sampling}, journal = {{RLJ}}, volume = {2}, pages = {645--675}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/PapiniMMR24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/PattersonNKW024, author = {Andrew Patterson and Samuel Neumann and Raksha Kumaraswamy and Martha White and Adam White}, title = {Cross-environment Hyperparameter Tuning for Reinforcement Learning}, journal = {{RLJ}}, volume = {5}, pages = {2298--2319}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/PattersonNKW024.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/PrasannaFRB24, author = {Sai Prasanna and Karim Farid and Raghu Rajan and Andr{\'{e}} Biedenkapp}, title = {Dreaming of Many Worlds: Learning Contextual World Models aids Zero-Shot Generalization}, journal = {{RLJ}}, volume = {3}, pages = {1317--1350}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/PrasannaFRB24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/PriesterBGS24, author = {Jan de Priester and Zachary I. Bell and Prashant Ganesh and Ricardo G. Sanfelice}, title = {MultiHyRL: Robust Hybrid {RL} for Obstacle Avoidance against Adversarial Attacks on the Observation Space}, journal = {{RLJ}}, volume = {4}, pages = {2017--2040}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/PriesterBGS24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/RafailovHSKSKHK24, author = {Rafael Rafailov and Kyle Beltran Hatch and Anikait Singh and Aviral Kumar and Laura M. Smith and Ilya Kostrikov and Philippe Hansen{-}Estruch and Victor Kolev and Philip J. Ball and Jiajun Wu and Sergey Levine and Chelsea Finn}, title = {{D5RL:} Diverse Datasets for Data-Driven Deep Reinforcement Learning}, journal = {{RLJ}}, volume = {5}, pages = {2178--2197}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/RafailovHSKSKHK24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/RaffinSKASS24, author = {Antonin Raffin and Olivier Sigaud and Jens Kober and Alin Albu{-}Sch{\"{a}}ffer and Jo{\~{a}}o Silv{\'{e}}rio and Freek Stulp}, title = {An Open-Loop Baseline for Reinforcement Learning Locomotion Tasks}, journal = {{RLJ}}, volume = {1}, pages = {92--107}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/RaffinSKASS24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/RentingMHJ24, author = {Bram M. Renting and Thomas M. Moerland and Holger H. Hoos and Catholijn M. Jonker}, title = {Towards General Negotiation Strategies with End-to-End Reinforcement Learning}, journal = {{RLJ}}, volume = {5}, pages = {2059--2070}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/RentingMHJ24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/RongK24, author = {Frieda Rong and Max Kleiman{-}Weiner}, title = {Value Internalization: Learning and Generalizing from Social Reward}, journal = {{RLJ}}, volume = {3}, pages = {1060--1071}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/RongK24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/RudolphCBLN024, author = {Max Rudolph and Caleb Chuck and Kevin Black and Misha Lvovsky and Scott Niekum and Amy Zhang}, title = {Learning Action-based Representations Using Invariance}, journal = {{RLJ}}, volume = {1}, pages = {342--365}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/RudolphCBLN024.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/SaberM24, author = {Hassan Saber and Odalric{-}Ambrym Maillard}, title = {Bandits with Multimodal Structure}, journal = {{RLJ}}, volume = {5}, pages = {2400--2439}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/SaberM24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/StaleySGS24, author = {James Staley and Elaine Short and Shivam Goel and Yash Shukla}, title = {Agent-Centric Human Demonstrations Train World Models}, journal = {{RLJ}}, volume = {4}, pages = {1873--1886}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/StaleySGS24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/SuauSO24, author = {Miguel Suau and Matthijs T. J. Spaan and Frans A. Oliehoek}, title = {Bad Habits: Policy Confounding and Out-of-Trajectory Generalization in {RL}}, journal = {{RLJ}}, volume = {4}, pages = {1711--1732}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/SuauSO24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/TalvitieSLHBZW24, author = {Erin J. Talvitie and Zilei Shao and Huiying Li and Jinghan Hu and Jacob Boerma and Rory Zhao and Xintong Wang}, title = {Bounding-Box Inference for Error-Aware Model-Based Reinforcement Learning}, journal = {{RLJ}}, volume = {5}, pages = {2440--2460}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/TalvitieSLHBZW24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/TanX24, author = {Kevin Tan and Ziping Xu}, title = {A Natural Extension To Online Algorithms For Hybrid {RL} With Limited Coverage}, journal = {{RLJ}}, volume = {3}, pages = {1252--1264}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/TanX24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/VasanWSBJM24, author = {Gautham Vasan and Yan Wang and Fahim Shahriar and James Bergstra and Martin J{\"{a}}gersand and A. Rupam Mahmood}, title = {Revisiting Sparse Rewards for Goal-Reaching Reinforcement Learning}, journal = {{RLJ}}, volume = {4}, pages = {1841--1854}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/VasanWSBJM24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/VascoSKSWS24, author = {Miguel Vasco and Takuma Seno and Kenta Kawamoto and Kaushik Subramanian and Peter R. Wurman and Peter Stone}, title = {A Super-human Vision-based Reinforcement Learning Agent for Autonomous Racing in Gran Turismo}, journal = {{RLJ}}, volume = {4}, pages = {1674--1710}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/VascoSKSWS24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/Vlastelica0MK24, author = {Marin Vlastelica and Jin Cheng and Georg Martius and Pavel Kolev}, title = {Offline Diversity Maximization under Imitation Constraints}, journal = {{RLJ}}, volume = {3}, pages = {1377--1409}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/Vlastelica0MK24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/VoelckerKGF24, author = {Claas Voelcker and Tyler Kastner and Igor Gilitschenski and Amir{-}massoud Farahmand}, title = {When does Self-Prediction help? Understanding Auxiliary Tasks in Reinforcement Learning}, journal = {{RLJ}}, volume = {4}, pages = {1567--1597}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/VoelckerKGF24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/WangSC24, author = {He Wang and Laixi Shi and Yuejie Chi}, title = {Sample Complexity of Offline Distributionally Robust Linear Markov Decision Processes}, journal = {{RLJ}}, volume = {3}, pages = {1467--1510}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/WangSC24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/WangWLKT024, author = {Junxiong Wang and Kaiwen Wang and Yueying Li and Nathan Kallus and Immanuel Trummer and Wen Sun}, title = {JoinGym: An Efficient Join Order Selection Environment}, journal = {{RLJ}}, volume = {1}, pages = {64--91}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/WangWLKT024.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/WilliOFDC24, author = {Timon Willi and Johan Samir Obando{-}Ceron and Jakob Nicolaus Foerster and Gintare Karolina Dziugaite and Pablo Samuel Castro}, title = {Mixture of Experts in a Mixture of {RL} settings}, journal = {{RLJ}}, volume = {3}, pages = {1072--1105}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/WilliOFDC24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/WuA24, author = {Shuang Wu and Arash A. Amini}, title = {Graph Neural Thompson Sampling}, journal = {{RLJ}}, volume = {1}, pages = {29--63}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/WuA24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/WuHDMRA24, author = {Philipp Wu and Kourosh Hakhamaneshi and Yuqing Du and Igor Mordatch and Aravind Rajeswaran and Pieter Abbeel}, title = {Semi-Supervised One Shot Imitation Learning}, journal = {{RLJ}}, volume = {5}, pages = {2284--2297}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/WuHDMRA24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/Xu0R24, author = {Wanqiao Xu and Shi Dong and Benjamin Van Roy}, title = {Posterior Sampling for Continuing Environments}, journal = {{RLJ}}, volume = {5}, pages = {2107--2122}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/Xu0R24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/YaoPDE24, author = {Jiayu Yao and Weiwei Pan and Finale Doshi{-}Velez and Barbara E. Engelhardt}, title = {Inverse Reinforcement Learning with Multiple Planning Horizons}, journal = {{RLJ}}, volume = {3}, pages = {1138--1167}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/YaoPDE24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/ZamboniCRM24, author = {Riccardo Zamboni and Duilio Cirino and Marcello Restelli and Mirco Mutti}, title = {The Limits of Pure Exploration in POMDPs: When the Observation Entropy is Enough}, journal = {{RLJ}}, volume = {2}, pages = {676--692}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/ZamboniCRM24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/ZeitlerMMS024, author = {Christopher K. Zeitler and Kristina Miller and Sayan Mitra and John Schierman and Mahesh Viswanathan}, title = {Optimizing Rewards while meeting {\textdollar}{\textbackslash}omega{\textdollar}-regular Constraints}, journal = {{RLJ}}, volume = {5}, pages = {2492--2514}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/ZeitlerMMS024.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/ZhangGSLC24, author = {Wancong Zhang and Anthony GX{-}Chen and Vlad Sobal and Yann LeCun and Nicolas Carion}, title = {Light-weight Probing of Unsupervised Representations for Reinforcement Learning}, journal = {{RLJ}}, volume = {4}, pages = {1924--1949}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/ZhangGSLC24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/ZhangPSSWY24, author = {Zhengfei Zhang and Kishan Panaganti and Laixi Shi and Yanan Sui and Adam Wierman and Yisong Yue}, title = {Distributionally Robust Constrained Reinforcement Learning under Strong Duality}, journal = {{RLJ}}, volume = {4}, pages = {1793--1821}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/ZhangPSSWY24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/ZhangW024, author = {Qining Zhang and Honghao Wei and Lei Ying}, title = {Reinforcement Learning from Human Feedback without Reward Inference: Model-Free Algorithm and Instance-Dependent Analysis}, journal = {{RLJ}}, volume = {3}, pages = {1236--1251}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/ZhangW024.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/ZhangX24, author = {Yixuan Zhang and Qiaomin Xie}, title = {Constant Stepsize Q-learning: Distributional Convergence, Bias and Extrapolation}, journal = {{RLJ}}, volume = {3}, pages = {1168--1210}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/ZhangX24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/ZhaoW24, author = {Linfeng Zhao and Lawson L. S. Wong}, title = {Learning to Navigate in Mazes with Novel Layouts using Abstract Top-down Maps}, journal = {{RLJ}}, volume = {5}, pages = {2359--2372}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/ZhaoW24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/ZhouKFR24, author = {Allan Zhou and Vikash Kumar and Chelsea Finn and Aravind Rajeswaran}, title = {Policy Architectures for Compositional Generalization in Control}, journal = {{RLJ}}, volume = {5}, pages = {2264--2283}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/ZhouKFR24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:conf/rlc/ZhouRS024, author = {Zhiyuan Zhou and Shreyas Sundara Raman and Henry Sowerby and Michael Littman}, title = {Tiered Reward: Designing Rewards for Specification and Fast Learning of Desired Behavior}, journal = {{RLJ}}, volume = {3}, pages = {1265--1288}, year = {2024}, timestamp = {Fri, 22 Nov 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/rlc/ZhouRS024.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
manage site settings
To protect your privacy, all features that rely on external API calls from your browser are turned off by default. You need to opt-in for them to become active. All settings here will be stored as cookies with your web browser. For more information see our F.A.Q.