@inproceedings{d8747956ff694725b2173c155fcf4f21,
title = "Expectation maximization for average reward decentralized POMDPs",
abstract = "Planning for multiple agents under uncertainty is often based on decentralized partially observable Markov decision processes (Dec-POMDPs), but current methods must de-emphasize long-term effects of actions by a discount factor. In tasks like wireless networking, agents are evaluated by average performance over time, both short and long-term effects of actions are crucial, and discounting based solutions can perform poorly. We show that under a common set of conditions expectation maximization (EM) for average reward Dec-POMDPs is stuck in a local optimum. We introduce a new average reward EM method: it outperforms a state of the art discounted-reward Dec-POMDP method in experiments.",
keywords = "Dec-POMDP, average reward, expectation maximization, planning under uncertainty, Dec-POMDP, average reward, expectation maximization, planning under uncertainty",
author = "Joni Pajarinen and Jaakko Peltonen",
year = "2013",
doi = "10.1007/978-3-642-40988-2_9",
language = "English",
isbn = "978-3-642-40987-5",
volume = "8188 LNAI",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
number = "PART 1",
pages = "129--144",
booktitle = "Machine Learning and Knowledge Discovery in Databases - European Conference, ECML PKDD 2013, Proceedings",
edition = "PART 1",
note = "European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases, ECML PKDD 2013 ; Conference date: 01-01-2013",
}