Search dblp for Publications

export results for "stream:conf/rlc:"

 download as .bib file

@article{DBLP:conf/rlc/0001S024,
  author       = {Alexander Levine and
                  Peter Stone and
                  Amy Zhang},
  title        = {Multistep Inverse Is Not All You Need},
  journal      = {{RLJ}},
  volume       = {2},
  pages        = {884--925},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/0001S024.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/0002024,
  author       = {Rafael Rodr{\'{\i}}guez{-}S{\'{a}}nchez and
                  George Konidaris},
  title        = {Learning Abstract World Models for Value-preserving Planning with
                  Options},
  journal      = {{RLJ}},
  volume       = {4},
  pages        = {1733--1758},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/0002024.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/0003LLM24,
  author       = {Mohamed Elsayed and
                  Qingfeng Lan and
                  Clare Lyle and
                  A. Rupam Mahmood},
  title        = {Weight Clipping for Deep Continual and Reinforcement Learning},
  journal      = {{RLJ}},
  volume       = {5},
  pages        = {2198--2217},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/0003LLM24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/0012LX24,
  author       = {Nan Jiang and
                  Jinzhao Li and
                  Yexiang Xue},
  title        = {A Tighter Convergence Proof of Reverse Experience Replay},
  journal      = {{RLJ}},
  volume       = {1},
  pages        = {470--480},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/0012LX24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/AbelHH24,
  author       = {David Abel and
                  Mark K. Ho and
                  Anna Harutyunyan},
  title        = {Three Dogmas of Reinforcement Learning},
  journal      = {{RLJ}},
  volume       = {2},
  pages        = {629--644},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/AbelHH24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/AckermannOS24,
  author       = {Johannes Ackermann and
                  Takayuki Osa and
                  Masashi Sugiyama},
  title        = {Offline Reinforcement Learning from Datasets with Structured Non-Stationarity},
  journal      = {{RLJ}},
  volume       = {5},
  pages        = {2140--2161},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/AckermannOS24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/AdamczykMTK24,
  author       = {Jacob Adamczyk and
                  Volodymyr Makarenko and
                  Stas Tiomkin and
                  Rahul V. Kulkarni},
  title        = {Boosting Soft Q-Learning by Bounding},
  journal      = {{RLJ}},
  volume       = {5},
  pages        = {2373--2399},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/AdamczykMTK24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/AghajohariCDAC24,
  author       = {Milad Aghajohari and
                  Tim Cooijmans and
                  Juan Agustin Duque and
                  Shunichi Akatsuka and
                  Aaron C. Courville},
  title        = {Best Response Shaping},
  journal      = {{RLJ}},
  volume       = {2},
  pages        = {798--818},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/AghajohariCDAC24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/AgostinelliS24,
  author       = {Forest Agostinelli and
                  Misagh Soltani},
  title        = {Learning Discrete World Models for Heuristic Search},
  journal      = {{RLJ}},
  volume       = {4},
  pages        = {1781--1792},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/AgostinelliS24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/Almuzairee0C24,
  author       = {Abdulaziz Almuzairee and
                  Nicklas Hansen and
                  Henrik I. Christensen},
  title        = {A Recipe for Unbounded Data Augmentation in Visual Reinforcement Learning},
  journal      = {{RLJ}},
  volume       = {1},
  pages        = {130--157},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/Almuzairee0C24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/AmirNL24,
  author       = {Nadav Amir and
                  Yael Niv and
                  Angela Langdon},
  title        = {States as goal-directed concepts: an epistemic approach to state-representation
                  learning},
  journal      = {{RLJ}},
  volume       = {5},
  pages        = {2096--2106},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/AmirNL24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/AntonovD24,
  author       = {Georgy Antonov and
                  Peter Dayan},
  title        = {Exploring Uncertainty in Distributional Reinforcement Learning},
  journal      = {{RLJ}},
  volume       = {2},
  pages        = {961--978},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/AntonovD24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/AsisS24,
  author       = {Kris De Asis and
                  Richard S. Sutton},
  title        = {An Idiosyncrasy of Time-discretization in Reinforcement Learning},
  journal      = {{RLJ}},
  volume       = {3},
  pages        = {1306--1316},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/AsisS24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/AsriST24,
  author       = {Zakariae El Asri and
                  Olivier Sigaud and
                  Nicolas Thome},
  title        = {Physics-Informed Model and Hybrid Planning for Efficient Dyna-Style
                  Reinforcement Learning},
  journal      = {{RLJ}},
  volume       = {2},
  pages        = {693--713},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/AsriST24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/AvalosBNRO24,
  author       = {Rapha{\"{e}}l Avalos and
                  Eugenio Bargiacchi and
                  Ann Now{\'{e}} and
                  Diederik M. Roijers and
                  Frans A. Oliehoek},
  title        = {Online Planning in POMDPs with State-Requests},
  journal      = {{RLJ}},
  volume       = {1},
  pages        = {108--129},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/AvalosBNRO24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/AyoubSZCGSS24,
  author       = {Alex Ayoub and
                  David Szepesvari and
                  Francesco Zanini and
                  Bryan Chan and
                  Dhawal Gupta and
                  Bruno Castro da Silva and
                  Dale Schuurmans},
  title        = {Mitigating the Curse of Horizon in Monte-Carlo Returns},
  journal      = {{RLJ}},
  volume       = {2},
  pages        = {563--572},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/AyoubSZCGSS24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/AziziDA0VG24,
  author       = {Mohammad Javad Azizi and
                  Thang Duong and
                  Yasin Abbasi{-}Yadkori and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Claire Vernade and
                  Mohammad Ghavamzadeh},
  title        = {Non-stationary Bandits and Meta-Learning with a Small Set of Optimal
                  Arms},
  journal      = {{RLJ}},
  volume       = {5},
  pages        = {2461--2491},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/AziziDA0VG24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/BeckJVXW24,
  author       = {Jacob Beck and
                  Matthew Thomas Jackson and
                  Risto Vuorio and
                  Zheng Xiong and
                  Shimon Whiteson},
  title        = {SplAgger: Split Aggregation for Meta-Reinforcement Learning},
  journal      = {{RLJ}},
  volume       = {1},
  pages        = {450--469},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/BeckJVXW24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/BeckerMON24,
  author       = {Philipp Becker and
                  Sebastian Mossburger and
                  Fabian Otto and
                  Gerhard Neumann},
  title        = {Combining Reconstruction and Contrastive Methods for Multimodal Representations
                  in {RL}},
  journal      = {{RLJ}},
  volume       = {4},
  pages        = {1619--1655},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/BeckerMON24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/BedaywiRF24,
  author       = {Mark Bedaywi and
                  Amin Rakhsha and
                  Amir{-}massoud Farahmand},
  title        = {{PID} Accelerated Temporal Difference Algorithms},
  journal      = {{RLJ}},
  volume       = {5},
  pages        = {2071--2095},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/BedaywiRF24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/BoigeFQFRP24,
  author       = {Rapha{\"{e}}l Boige and
                  Yannis Flet{-}Berliac and
                  Lars C. P. M. Quaedvlieg and
                  Arthur Flajolet and
                  Guillaume Richard and
                  Thomas Pierrot},
  title        = {{PASTA:} Pretrained Action-State Transformer Agents},
  journal      = {{RLJ}},
  volume       = {3},
  pages        = {1511--1532},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/BoigeFQFRP24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/ChangFMD24,
  author       = {Wei{-}Di Chang and
                  Scott Fujimoto and
                  David Meger and
                  Gregory Dudek},
  title        = {Imitation Learning from Observation through Optimal Transport},
  journal      = {{RLJ}},
  volume       = {4},
  pages        = {1911--1923},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/ChangFMD24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/ChenTTLLTZ24,
  author       = {Yuxin Chen and
                  Chen Tang and
                  Thomas Tian and
                  Chenran Li and
                  Jinning Li and
                  Masayoshi Tomizuka and
                  Wei Zhan},
  title        = {Quantifying Interaction Level Between Agents Helps Cost-efficient
                  Generalization in Multi-agent Reinforcement Learning},
  journal      = {{RLJ}},
  volume       = {4},
  pages        = {1950--1964},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/ChenTTLLTZ24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/ChitnisYG24,
  author       = {Rohan Chitnis and
                  Shentao Yang and
                  Alborz Geramifard},
  title        = {Sequential Decision-Making for Inline Text Autocomplete},
  journal      = {{RLJ}},
  volume       = {2},
  pages        = {946--960},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/ChitnisYG24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/ChoudharyGT24,
  author       = {Kartik Choudhary and
                  Dhawal Gupta and
                  Philip S. Thomas},
  title        = {ICU-Sepsis: {A} Benchmark {MDP} Built from Real Medical Data},
  journal      = {{RLJ}},
  volume       = {4},
  pages        = {1546--1566},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/ChoudharyGT24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/CornelisseV24,
  author       = {Daphne Cornelisse and
                  Eugene Vinitsky},
  title        = {Human-compatible driving agents through data-regularized self-play
                  reinforcement learning},
  journal      = {{RLJ}},
  volume       = {5},
  pages        = {2320--2344},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/CornelisseV24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/CorradoQBLH24,
  author       = {Nicholas E. Corrado and
                  Yuxiao Qu and
                  John U. Balis and
                  Adam Labiosa and
                  Josiah P. Hanna},
  title        = {Guided Data Augmentation for Offline Reinforcement Learning and Imitation
                  Learning},
  journal      = {{RLJ}},
  volume       = {1},
  pages        = {198--215},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/CorradoQBLH24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/CorsiARK0F24,
  author       = {Davide Corsi and
                  Guy Amir and
                  Andoni Rodr{\'{\i}}guez and
                  Guy Katz and
                  C{\'{e}}sar S{\'{a}}nchez and
                  Roy Fox},
  title        = {Verification-Guided Shielding for Deep Reinforcement Learning},
  journal      = {{RLJ}},
  volume       = {4},
  pages        = {1759--1780},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/CorsiARK0F24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/CorsiCF24,
  author       = {Davide Corsi and
                  Davide Camponogara and
                  Alessandro Farinelli},
  title        = {Aquatic Navigation: {A} Challenging Benchmark for Deep Reinforcement
                  Learning},
  journal      = {{RLJ}},
  volume       = {3},
  pages        = {1106--1123},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/CorsiCF24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/CousinsAL024,
  author       = {Cyrus Cousins and
                  Kavosh Asadi and
                  Elita A. Lobo and
                  Michael Littman},
  title        = {On Welfare-Centric Fair Reinforcement Learning},
  journal      = {{RLJ}},
  volume       = {3},
  pages        = {1124--1137},
  year         = {2024},
  timestamp    = {Tue, 17 Dec 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/CousinsAL024.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/CramerFST24,
  author       = {Emma Cramer and
                  Bernd Frauenknecht and
                  Ramil Sabirov and
                  Sebastian Trimpe},
  title        = {Contextualized Hybrid Ensemble Q-learning: Learning Fast with Control
                  Priors},
  journal      = {{RLJ}},
  volume       = {2},
  pages        = {926--945},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/CramerFST24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/DaleyMW24,
  author       = {Brett Daley and
                  Marlos C. Machado and
                  Martha White},
  title        = {Demystifying the Recency Heuristic in Temporal-Difference Learning},
  journal      = {{RLJ}},
  volume       = {3},
  pages        = {1019--1036},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/DaleyMW24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/DelfosseBGSK24,
  author       = {Quentin Delfosse and
                  Jannis Bl{\"{u}}ml and
                  Bjarne Gregori and
                  Sebastian Sztwiertnia and
                  Kristian Kersting},
  title        = {OCAtari: Object-Centric Atari 2600 Reinforcement Learning Environments},
  journal      = {{RLJ}},
  volume       = {1},
  pages        = {400--449},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/DelfosseBGSK24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/DierkesCHT24,
  author       = {Julian Dierkes and
                  Emma Cramer and
                  Holger H. Hoos and
                  Sebastian Trimpe},
  title        = {Combining Automated Optimisation of Hyperparameters and Reward Shape},
  journal      = {{RLJ}},
  volume       = {3},
  pages        = {1441--1466},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/DierkesCHT24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/Dizon-ParadisWC24,
  author       = {Olivia P. Dizon{-}Paradis and
                  Stephen E. Wormald and
                  Daniel E. Capecci and
                  Avanti Bhandarkar and
                  Damon L. Woodard},
  title        = {Resource Usage Evaluation of Discrete Model-Free Deep Reinforcement
                  Learning Algorithms},
  journal      = {{RLJ}},
  volume       = {5},
  pages        = {2162--2177},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/Dizon-ParadisWC24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/DokoYBP24,
  author       = {Gersi Doko and
                  Guang Yang and
                  Daniel S. Brown and
                  Marek Petrik},
  title        = {{ROIL:} Robust Offline Imitation Learning without Trajectories},
  journal      = {{RLJ}},
  volume       = {2},
  pages        = {593--605},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/DokoYBP24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/DrappoMR24,
  author       = {Gianluca Drappo and
                  Alberto Maria Metelli and
                  Marcello Restelli},
  title        = {A Provably Efficient Option-Based Algorithm for both High-Level and
                  Low-Level Learning},
  journal      = {{RLJ}},
  volume       = {2},
  pages        = {819--839},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/DrappoMR24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/DunionA24,
  author       = {Mhairi Dunion and
                  Stefano V. Albrecht},
  title        = {Multi-view Disentanglement for Reinforcement Learning with Multiple
                  Cameras},
  journal      = {{RLJ}},
  volume       = {2},
  pages        = {498--515},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/DunionA24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/FreedWC0C24,
  author       = {Benjamin Freed and
                  Thomas Wei and
                  Roberto Calandra and
                  Jeff Schneider and
                  Howie Choset},
  title        = {Unifying Model-Based and Model-Free Reinforcement Learning with Equivalent
                  Policy Sets},
  journal      = {{RLJ}},
  volume       = {1},
  pages        = {283--301},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/FreedWC0C24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/FungDHM24,
  author       = {Ho Long Fung and
                  Victor{-}Alexandru Darvariu and
                  Stephen Hailes and
                  Mirco Musolesi},
  title        = {Trust-based Consensus in Multi-Agent Reinforcement Learning Systems},
  journal      = {{RLJ}},
  volume       = {2},
  pages        = {714--732},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/FungDHM24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/GolowichM24,
  author       = {Noah Golowich and
                  Ankur Moitra},
  title        = {The Role of Inherent Bellman Error in Offline Reinforcement Learning
                  with Linear Function Approximation},
  journal      = {{RLJ}},
  volume       = {1},
  pages        = {302--341},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/GolowichM24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/HuZZZYX024,
  author       = {Bin Hu and
                  Chenyang Zhao and
                  Pu Zhang and
                  Zihao Zhou and
                  Yuanhang Yang and
                  Zenglin Xu and
                  Bin Liu},
  title        = {Enabling Intelligent Interactions between an Agent and an {LLM:} {A}
                  Reinforcement Learning Approach},
  journal      = {{RLJ}},
  volume       = {3},
  pages        = {1289--1305},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/HuZZZYX024.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/HuangGJP24,
  author       = {Audrey Huang and
                  Mohammad Ghavamzadeh and
                  Nan Jiang and
                  Marek Petrik},
  title        = {Non-adaptive Online Finetuning for Offline Reinforcement Learning},
  journal      = {{RLJ}},
  volume       = {1},
  pages        = {182--197},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/HuangGJP24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/HugessenCMB24,
  author       = {Adriana Hugessen and
                  Roger Creus Castanyer and
                  Faisal Mohamed and
                  Glen Berseth},
  title        = {Surprise-Adaptive Intrinsic Motivation for Unsupervised Reinforcement
                  Learning},
  journal      = {{RLJ}},
  volume       = {2},
  pages        = {547--562},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/HugessenCMB24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/HussingMSKE24,
  author       = {Marcel Hussing and
                  Jorge A. Mendez and
                  Anisha Singrodia and
                  Cassandra Kent and
                  Eric Eaton},
  title        = {Robotic Manipulation Datasets for Offline Compositional Reinforcement
                  Learning},
  journal      = {{RLJ}},
  volume       = {2},
  pages        = {979--994},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/HussingMSKE24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/HussingVGFE24,
  author       = {Marcel Hussing and
                  Claas Voelcker and
                  Igor Gilitschenski and
                  Amir{-}massoud Farahmand and
                  Eric Eaton},
  title        = {Dissecting Deep {RL} with High Update Ratios: Combatting Value Divergence},
  journal      = {{RLJ}},
  volume       = {2},
  pages        = {995--1018},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/HussingVGFE24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/IshfaqT0LLMP024,
  author       = {Haque Ishfaq and
                  Yixin Tan and
                  Yu Yang and
                  Qingfeng Lan and
                  Jianfeng Lu and
                  A. Rupam Mahmood and
                  Doina Precup and
                  Pan Xu},
  title        = {More Efficient Randomized Exploration for Reinforcement Learning via
                  Approximate Sampling},
  journal      = {{RLJ}},
  volume       = {3},
  pages        = {1211--1235},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/IshfaqT0LLMP024.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/JacksonMLEWF24,
  author       = {Matthew Thomas Jackson and
                  Michael T. Matthews and
                  Cong Lu and
                  Benjamin Ellis and
                  Shimon Whiteson and
                  Jakob Nicolaus Foerster},
  title        = {Policy-Guided Diffusion},
  journal      = {{RLJ}},
  volume       = {4},
  pages        = {1855--1872},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/JacksonMLEWF24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/JarnefeltKD24,
  author       = {Oliver J{\"{a}}rnefelt and
                  Mahdi Kallel and
                  Carlo D'Eramo},
  title        = {Cyclicity-Regularized Coordination Graphs},
  journal      = {{RLJ}},
  volume       = {1},
  pages        = {366--379},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/JarnefeltKD24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/JavedSS24,
  author       = {Khurram Javed and
                  Arsalan Sharifnassab and
                  Richard S. Sutton},
  title        = {SwiftTD: {A} Fast and Robust Algorithm for Temporal Difference Learning},
  journal      = {{RLJ}},
  volume       = {2},
  pages        = {840--863},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/JavedSS24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/JeongM24,
  author       = {Woojin Jeong and
                  Seungki Min},
  title        = {Improving Thompson Sampling via Information Relaxation for Budgeted
                  Multi-armed Bandits},
  journal      = {{RLJ}},
  volume       = {1},
  pages        = {16--28},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/JeongM24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/JordanNK0T24,
  author       = {Scott M. Jordan and
                  Samuel Neumann and
                  James E. Kostas and
                  Adam White and
                  Philip S. Thomas},
  title        = {The Cliff of Overcommitment with Policy Gradient Step Sizes},
  journal      = {{RLJ}},
  volume       = {2},
  pages        = {864--883},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/JordanNK0T24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/KalraB24,
  author       = {Akansha Kalra and
                  Daniel S. Brown},
  title        = {Can Differentiable Decision Trees Enable Interpretable Reward Learning
                  from Human Feedback?},
  journal      = {{RLJ}},
  volume       = {4},
  pages        = {1887--1910},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/KalraB24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/KanariosZ024,
  author       = {Kellen Kanarios and
                  Qining Zhang and
                  Lei Ying},
  title        = {Cost Aware Best Arm Identification},
  journal      = {{RLJ}},
  volume       = {4},
  pages        = {1533--1545},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/KanariosZ024.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/KapoorF0C24,
  author       = {Aditya Kapoor and
                  Benjamin Freed and
                  Jeff Schneider and
                  Howie Choset},
  title        = {Assigning Credit with Partial Reward Decoupling in Multi-Agent Proximal
                  Policy Optimization},
  journal      = {{RLJ}},
  volume       = {1},
  pages        = {380--399},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/KapoorF0C24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/Karamzade0KF24,
  author       = {Armin Karamzade and
                  Kyungmin Kim and
                  Montek Kalsi and
                  Roy Fox},
  title        = {Reinforcement Learning from Delayed Observations via World Models},
  journal      = {{RLJ}},
  volume       = {5},
  pages        = {2123--2139},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/Karamzade0KF24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/KoyamadaNI24,
  author       = {Sotetsu Koyamada and
                  Soichiro Nishimori and
                  Shin Ishii},
  title        = {A Batch Sequential Halving Algorithm without Performance Degradation},
  journal      = {{RLJ}},
  volume       = {5},
  pages        = {2218--2232},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/KoyamadaNI24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/LambrechtsBE24,
  author       = {Gaspard Lambrechts and
                  Adrien Bolland and
                  Damien Ernst},
  title        = {Informed {POMDP:} Leveraging Additional Information in Model-Based
                  {RL}},
  journal      = {{RLJ}},
  volume       = {2},
  pages        = {763--784},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/LambrechtsBE24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/LanMYX24,
  author       = {Qingfeng Lan and
                  A. Rupam Mahmood and
                  Shuicheng Yan and
                  Zhongwen Xu},
  title        = {Learning to Optimize for Reinforcement Learning},
  journal      = {{RLJ}},
  volume       = {2},
  pages        = {481--497},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/LanMYX24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/LiH0GP24,
  author       = {Changling Li and
                  Zhang{-}Wei Hong and
                  Pulkit Agrawal and
                  Divyansh Garg and
                  Joni Pajarinen},
  title        = {{ROER:} Regularized Optimal Experience Replay},
  journal      = {{RLJ}},
  volume       = {4},
  pages        = {1598--1618},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/LiH0GP24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/LobelP24,
  author       = {Sam Lobel and
                  Ronald Parr},
  title        = {An Optimal Tightness Bound for the Simulation Lemma},
  journal      = {{RLJ}},
  volume       = {2},
  pages        = {785--797},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/LobelP24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/LuARV24,
  author       = {Michael Lu and
                  Matin Aghaei and
                  Anant Raj and
                  Sharan Vaswani},
  title        = {Towards Principled, Practical Policy Gradient for Bandits and Tabular
                  MDPs},
  journal      = {{RLJ}},
  volume       = {1},
  pages        = {216--282},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/LuARV24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/Luo0JZ24,
  author       = {Yu Luo and
                  Fuchun Sun and
                  Tianying Ji and
                  Xianyuan Zhan},
  title        = {Bidirectional-Reachable Hierarchical Reinforcement Learning with Mutually
                  Responsive Policies},
  journal      = {{RLJ}},
  volume       = {2},
  pages        = {733--762},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/Luo0JZ24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/LuoPW0P24,
  author       = {Yudong Luo and
                  Yangchen Pan and
                  Han Wang and
                  Philip Torr and
                  Pascal Poupart},
  title        = {A Simple Mixture Policy Parameterization for Improving Sample Efficiency
                  of CVaR Optimization},
  journal      = {{RLJ}},
  volume       = {2},
  pages        = {573--592},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/LuoPW0P24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/MadhavanM0B24,
  author       = {Rahul Madhavan and
                  Aurghya Maiti and
                  Gaurav Sinha and
                  Siddharth Barman},
  title        = {Causal Contextual Bandits with Adaptive Context},
  journal      = {{RLJ}},
  volume       = {5},
  pages        = {2233--2263},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/MadhavanM0B24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/MattsonAB24,
  author       = {Connor Mattson and
                  Anurag Aribandi and
                  Daniel S. Brown},
  title        = {Representation Alignment from Human Feedback for Cross-Embodiment
                  Reward Learning from Mixed-Quality Demonstrations},
  journal      = {{RLJ}},
  volume       = {4},
  pages        = {1822--1840},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/MattsonAB24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/MaytieDAV24,
  author       = {L{\'{e}}opold Mayti{\'{e}} and
                  Benjamin Devillers and
                  Alexandre Arnold and
                  Rufin VanRullen},
  title        = {Zero-shot cross-modal transfer of Reinforcement Learning policies
                  through a Global Workspace},
  journal      = {{RLJ}},
  volume       = {3},
  pages        = {1410--1426},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/MaytieDAV24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/McInroeJAS24,
  author       = {Trevor McInroe and
                  Adam Jelley and
                  Stefano V. Albrecht and
                  Amos J. Storkey},
  title        = {Planning to Go Out-of-Distribution in Offline-to-Online Reinforcement
                  Learning},
  journal      = {{RLJ}},
  volume       = {2},
  pages        = {516--546},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/McInroeJAS24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/McMahanW0ZX24,
  author       = {Jeremy McMahan and
                  Young Wu and
                  Yudong Chen and
                  Jerry Zhu and
                  Qiaomin Xie},
  title        = {Inception: Efficiently Computable Misinformation Attacks on Markov
                  Games},
  journal      = {{RLJ}},
  volume       = {5},
  pages        = {2345--2358},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/McMahanW0ZX24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/MelcerAT24,
  author       = {Daniel Melcer and
                  Christopher Amato and
                  Stavros Tripakis},
  title        = {Shield Decomposition for Safe Reinforcement Learning in General Partially
                  Observable Multi-Agent Environments},
  journal      = {{RLJ}},
  volume       = {4},
  pages        = {1965--1994},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/MelcerAT24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/Meyer0M24,
  author       = {Edan Meyer and
                  Adam White and
                  Marlos C. Machado},
  title        = {Harnessing Discrete Representations for Continual Reinforcement Learning},
  journal      = {{RLJ}},
  volume       = {2},
  pages        = {606--628},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/Meyer0M24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/MorimuraOAZ24,
  author       = {Tetsuro Morimura and
                  Kazuhiro Ota and
                  Kenshi Abe and
                  Peinan Zhang},
  title        = {Policy Gradient Algorithms with Monte Carlo Tree Learning for Non-Markov
                  Decision Processes},
  journal      = {{RLJ}},
  volume       = {3},
  pages        = {1351--1376},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/MorimuraOAZ24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/Moss0CK24,
  author       = {Robert J. Moss and
                  Anthony Corso and
                  Jef Caers and
                  Mykel J. Kochenderfer},
  title        = {BetaZero: Belief-State Planning for Long-Horizon POMDPs using Learned
                  Approximations},
  journal      = {{RLJ}},
  volume       = {1},
  pages        = {158--181},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/Moss0CK24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/Naik0TS24,
  author       = {Abhishek Naik and
                  Yi Wan and
                  Manan Tomar and
                  Richard S. Sutton},
  title        = {Reward Centering},
  journal      = {{RLJ}},
  volume       = {4},
  pages        = {1995--2016},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/Naik0TS24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/Obando-CeronACC24,
  author       = {Johan Samir Obando{-}Ceron and
                  Jo{\~{a}}o Guilherme Madeira Ara{\'{u}}jo and
                  Aaron C. Courville and
                  Pablo Samuel Castro},
  title        = {On the consistency of hyper-parameter selection in value-based deep
                  reinforcement learning},
  journal      = {{RLJ}},
  volume       = {3},
  pages        = {1037--1059},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/Obando-CeronACC24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/OertellCZB024,
  author       = {Owen Oertell and
                  Jonathan D. Chang and
                  Yiyi Zhang and
                  Kiant{\'{e}} Brantley and
                  Wen Sun},
  title        = {{RL} for Consistency Models: Reward Guided Text-to-Image Generation
                  with Fast Inference},
  journal      = {{RLJ}},
  volume       = {4},
  pages        = {1656--1673},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/OertellCZB024.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/OmuraOMH24,
  author       = {Motoki Omura and
                  Takayuki Osa and
                  Yusuke Mukuta and
                  Tatsuya Harada},
  title        = {Stabilizing Extreme Q-learning by Maclaurin Expansion},
  journal      = {{RLJ}},
  volume       = {3},
  pages        = {1427--1440},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/OmuraOMH24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/PanahiPW024,
  author       = {Parham Mohammad Panahi and
                  Andrew Patterson and
                  Martha White and
                  Adam White},
  title        = {Investigating the Interplay of Prioritized Replay and Generalization},
  journal      = {{RLJ}},
  volume       = {5},
  pages        = {2041--2058},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/PanahiPW024.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/PapiniMMR24,
  author       = {Matteo Papini and
                  Giorgio Manganini and
                  Alberto Maria Metelli and
                  Marcello Restelli},
  title        = {Policy Gradient with Active Importance Sampling},
  journal      = {{RLJ}},
  volume       = {2},
  pages        = {645--675},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/PapiniMMR24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/PattersonNKW024,
  author       = {Andrew Patterson and
                  Samuel Neumann and
                  Raksha Kumaraswamy and
                  Martha White and
                  Adam White},
  title        = {Cross-environment Hyperparameter Tuning for Reinforcement Learning},
  journal      = {{RLJ}},
  volume       = {5},
  pages        = {2298--2319},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/PattersonNKW024.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/PrasannaFRB24,
  author       = {Sai Prasanna and
                  Karim Farid and
                  Raghu Rajan and
                  Andr{\'{e}} Biedenkapp},
  title        = {Dreaming of Many Worlds: Learning Contextual World Models aids Zero-Shot
                  Generalization},
  journal      = {{RLJ}},
  volume       = {3},
  pages        = {1317--1350},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/PrasannaFRB24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/PriesterBGS24,
  author       = {Jan de Priester and
                  Zachary I. Bell and
                  Prashant Ganesh and
                  Ricardo G. Sanfelice},
  title        = {MultiHyRL: Robust Hybrid {RL} for Obstacle Avoidance against Adversarial
                  Attacks on the Observation Space},
  journal      = {{RLJ}},
  volume       = {4},
  pages        = {2017--2040},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/PriesterBGS24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/RafailovHSKSKHK24,
  author       = {Rafael Rafailov and
                  Kyle Beltran Hatch and
                  Anikait Singh and
                  Aviral Kumar and
                  Laura M. Smith and
                  Ilya Kostrikov and
                  Philippe Hansen{-}Estruch and
                  Victor Kolev and
                  Philip J. Ball and
                  Jiajun Wu and
                  Sergey Levine and
                  Chelsea Finn},
  title        = {{D5RL:} Diverse Datasets for Data-Driven Deep Reinforcement Learning},
  journal      = {{RLJ}},
  volume       = {5},
  pages        = {2178--2197},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/RafailovHSKSKHK24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/RaffinSKASS24,
  author       = {Antonin Raffin and
                  Olivier Sigaud and
                  Jens Kober and
                  Alin Albu{-}Sch{\"{a}}ffer and
                  Jo{\~{a}}o Silv{\'{e}}rio and
                  Freek Stulp},
  title        = {An Open-Loop Baseline for Reinforcement Learning Locomotion Tasks},
  journal      = {{RLJ}},
  volume       = {1},
  pages        = {92--107},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/RaffinSKASS24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/RentingMHJ24,
  author       = {Bram M. Renting and
                  Thomas M. Moerland and
                  Holger H. Hoos and
                  Catholijn M. Jonker},
  title        = {Towards General Negotiation Strategies with End-to-End Reinforcement
                  Learning},
  journal      = {{RLJ}},
  volume       = {5},
  pages        = {2059--2070},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/RentingMHJ24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/RongK24,
  author       = {Frieda Rong and
                  Max Kleiman{-}Weiner},
  title        = {Value Internalization: Learning and Generalizing from Social Reward},
  journal      = {{RLJ}},
  volume       = {3},
  pages        = {1060--1071},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/RongK24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/RudolphCBLN024,
  author       = {Max Rudolph and
                  Caleb Chuck and
                  Kevin Black and
                  Misha Lvovsky and
                  Scott Niekum and
                  Amy Zhang},
  title        = {Learning Action-based Representations Using Invariance},
  journal      = {{RLJ}},
  volume       = {1},
  pages        = {342--365},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/RudolphCBLN024.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/SaberM24,
  author       = {Hassan Saber and
                  Odalric{-}Ambrym Maillard},
  title        = {Bandits with Multimodal Structure},
  journal      = {{RLJ}},
  volume       = {5},
  pages        = {2400--2439},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/SaberM24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/StaleySGS24,
  author       = {James Staley and
                  Elaine Short and
                  Shivam Goel and
                  Yash Shukla},
  title        = {Agent-Centric Human Demonstrations Train World Models},
  journal      = {{RLJ}},
  volume       = {4},
  pages        = {1873--1886},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/StaleySGS24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/SuauSO24,
  author       = {Miguel Suau and
                  Matthijs T. J. Spaan and
                  Frans A. Oliehoek},
  title        = {Bad Habits: Policy Confounding and Out-of-Trajectory Generalization
                  in {RL}},
  journal      = {{RLJ}},
  volume       = {4},
  pages        = {1711--1732},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/SuauSO24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/TalvitieSLHBZW24,
  author       = {Erin J. Talvitie and
                  Zilei Shao and
                  Huiying Li and
                  Jinghan Hu and
                  Jacob Boerma and
                  Rory Zhao and
                  Xintong Wang},
  title        = {Bounding-Box Inference for Error-Aware Model-Based Reinforcement Learning},
  journal      = {{RLJ}},
  volume       = {5},
  pages        = {2440--2460},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/TalvitieSLHBZW24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/TanX24,
  author       = {Kevin Tan and
                  Ziping Xu},
  title        = {A Natural Extension To Online Algorithms For Hybrid {RL} With Limited
                  Coverage},
  journal      = {{RLJ}},
  volume       = {3},
  pages        = {1252--1264},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/TanX24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/VasanWSBJM24,
  author       = {Gautham Vasan and
                  Yan Wang and
                  Fahim Shahriar and
                  James Bergstra and
                  Martin J{\"{a}}gersand and
                  A. Rupam Mahmood},
  title        = {Revisiting Sparse Rewards for Goal-Reaching Reinforcement Learning},
  journal      = {{RLJ}},
  volume       = {4},
  pages        = {1841--1854},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/VasanWSBJM24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/VascoSKSWS24,
  author       = {Miguel Vasco and
                  Takuma Seno and
                  Kenta Kawamoto and
                  Kaushik Subramanian and
                  Peter R. Wurman and
                  Peter Stone},
  title        = {A Super-human Vision-based Reinforcement Learning Agent for Autonomous
                  Racing in Gran Turismo},
  journal      = {{RLJ}},
  volume       = {4},
  pages        = {1674--1710},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/VascoSKSWS24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/Vlastelica0MK24,
  author       = {Marin Vlastelica and
                  Jin Cheng and
                  Georg Martius and
                  Pavel Kolev},
  title        = {Offline Diversity Maximization under Imitation Constraints},
  journal      = {{RLJ}},
  volume       = {3},
  pages        = {1377--1409},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/Vlastelica0MK24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/VoelckerKGF24,
  author       = {Claas Voelcker and
                  Tyler Kastner and
                  Igor Gilitschenski and
                  Amir{-}massoud Farahmand},
  title        = {When does Self-Prediction help? Understanding Auxiliary Tasks in Reinforcement
                  Learning},
  journal      = {{RLJ}},
  volume       = {4},
  pages        = {1567--1597},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/VoelckerKGF24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/WangSC24,
  author       = {He Wang and
                  Laixi Shi and
                  Yuejie Chi},
  title        = {Sample Complexity of Offline Distributionally Robust Linear Markov
                  Decision Processes},
  journal      = {{RLJ}},
  volume       = {3},
  pages        = {1467--1510},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/WangSC24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/WangWLKT024,
  author       = {Junxiong Wang and
                  Kaiwen Wang and
                  Yueying Li and
                  Nathan Kallus and
                  Immanuel Trummer and
                  Wen Sun},
  title        = {JoinGym: An Efficient Join Order Selection Environment},
  journal      = {{RLJ}},
  volume       = {1},
  pages        = {64--91},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/WangWLKT024.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/WilliOFDC24,
  author       = {Timon Willi and
                  Johan Samir Obando{-}Ceron and
                  Jakob Nicolaus Foerster and
                  Gintare Karolina Dziugaite and
                  Pablo Samuel Castro},
  title        = {Mixture of Experts in a Mixture of {RL} settings},
  journal      = {{RLJ}},
  volume       = {3},
  pages        = {1072--1105},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/WilliOFDC24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/WuA24,
  author       = {Shuang Wu and
                  Arash A. Amini},
  title        = {Graph Neural Thompson Sampling},
  journal      = {{RLJ}},
  volume       = {1},
  pages        = {29--63},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/WuA24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/WuHDMRA24,
  author       = {Philipp Wu and
                  Kourosh Hakhamaneshi and
                  Yuqing Du and
                  Igor Mordatch and
                  Aravind Rajeswaran and
                  Pieter Abbeel},
  title        = {Semi-Supervised One Shot Imitation Learning},
  journal      = {{RLJ}},
  volume       = {5},
  pages        = {2284--2297},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/WuHDMRA24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/Xu0R24,
  author       = {Wanqiao Xu and
                  Shi Dong and
                  Benjamin Van Roy},
  title        = {Posterior Sampling for Continuing Environments},
  journal      = {{RLJ}},
  volume       = {5},
  pages        = {2107--2122},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/Xu0R24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/YaoPDE24,
  author       = {Jiayu Yao and
                  Weiwei Pan and
                  Finale Doshi{-}Velez and
                  Barbara E. Engelhardt},
  title        = {Inverse Reinforcement Learning with Multiple Planning Horizons},
  journal      = {{RLJ}},
  volume       = {3},
  pages        = {1138--1167},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/YaoPDE24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/ZamboniCRM24,
  author       = {Riccardo Zamboni and
                  Duilio Cirino and
                  Marcello Restelli and
                  Mirco Mutti},
  title        = {The Limits of Pure Exploration in POMDPs: When the Observation Entropy
                  is Enough},
  journal      = {{RLJ}},
  volume       = {2},
  pages        = {676--692},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/ZamboniCRM24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/ZeitlerMMS024,
  author       = {Christopher K. Zeitler and
                  Kristina Miller and
                  Sayan Mitra and
                  John Schierman and
                  Mahesh Viswanathan},
  title        = {Optimizing Rewards while meeting {\textdollar}{\textbackslash}omega{\textdollar}-regular
                  Constraints},
  journal      = {{RLJ}},
  volume       = {5},
  pages        = {2492--2514},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/ZeitlerMMS024.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/ZhangGSLC24,
  author       = {Wancong Zhang and
                  Anthony GX{-}Chen and
                  Vlad Sobal and
                  Yann LeCun and
                  Nicolas Carion},
  title        = {Light-weight Probing of Unsupervised Representations for Reinforcement
                  Learning},
  journal      = {{RLJ}},
  volume       = {4},
  pages        = {1924--1949},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/ZhangGSLC24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/ZhangPSSWY24,
  author       = {Zhengfei Zhang and
                  Kishan Panaganti and
                  Laixi Shi and
                  Yanan Sui and
                  Adam Wierman and
                  Yisong Yue},
  title        = {Distributionally Robust Constrained Reinforcement Learning under Strong
                  Duality},
  journal      = {{RLJ}},
  volume       = {4},
  pages        = {1793--1821},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/ZhangPSSWY24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/ZhangW024,
  author       = {Qining Zhang and
                  Honghao Wei and
                  Lei Ying},
  title        = {Reinforcement Learning from Human Feedback without Reward Inference:
                  Model-Free Algorithm and Instance-Dependent Analysis},
  journal      = {{RLJ}},
  volume       = {3},
  pages        = {1236--1251},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/ZhangW024.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/ZhangX24,
  author       = {Yixuan Zhang and
                  Qiaomin Xie},
  title        = {Constant Stepsize Q-learning: Distributional Convergence, Bias and
                  Extrapolation},
  journal      = {{RLJ}},
  volume       = {3},
  pages        = {1168--1210},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/ZhangX24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/ZhaoW24,
  author       = {Linfeng Zhao and
                  Lawson L. S. Wong},
  title        = {Learning to Navigate in Mazes with Novel Layouts using Abstract Top-down
                  Maps},
  journal      = {{RLJ}},
  volume       = {5},
  pages        = {2359--2372},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/ZhaoW24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/ZhouKFR24,
  author       = {Allan Zhou and
                  Vikash Kumar and
                  Chelsea Finn and
                  Aravind Rajeswaran},
  title        = {Policy Architectures for Compositional Generalization in Control},
  journal      = {{RLJ}},
  volume       = {5},
  pages        = {2264--2283},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/ZhouKFR24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:conf/rlc/ZhouRS024,
  author       = {Zhiyuan Zhou and
                  Shreyas Sundara Raman and
                  Henry Sowerby and
                  Michael Littman},
  title        = {Tiered Reward: Designing Rewards for Specification and Fast Learning
                  of Desired Behavior},
  journal      = {{RLJ}},
  volume       = {3},
  pages        = {1265--1288},
  year         = {2024},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rlc/ZhouRS024.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}