@article{Louw_Labuschagne_Woodley_2022, title={A Comparison of Reinforcement Learning Agents Applied to Traffic Signal Optimisation}, volume={3}, url={https://www.tib-op.org/ojs/index.php/scp/article/view/116}, DOI={10.52825/scp.v3i.116}, abstractNote={<p><span dir="ltr" role="presentation">Traditional methods for traffic signal control at an urban intersection are not effective in </span><span dir="ltr" role="presentation">controlling traffic flow for dynamic traffic demand which leads to negative environmental, </span><span dir="ltr" role="presentation">psychological and financial impacts for all parties involved. Urban traffic management is </span><span dir="ltr" role="presentation">a complex problem with multiple factors  effecting the control of traffic flow. With recent </span><span dir="ltr" role="presentation">advancements in</span> <span dir="ltr" role="presentation">machine learning</span> <span dir="ltr" role="presentation">(ML), especially</span> <span dir="ltr" role="presentation">reinforcement learning</span> <span dir="ltr" role="presentation">(RL), there is </span><span dir="ltr" role="presentation">potential to solve this problem. The idea is to allow an agent to learn optimal behaviour </span><span dir="ltr" role="presentation">to maximise specific metrics through trial and error.</span> <span dir="ltr" role="presentation">In this paper we apply two RL </span><span dir="ltr" role="presentation">algorithms, one policy-based, the other value-based, to solve this problem in simulation. </span><span dir="ltr" role="presentation">For the simulation, we use an open-source traffic simulator,</span> <span dir="ltr" role="presentation">Simulation of Urban MObility </span><span dir="ltr" role="presentation">(SUMO), packaged as an OpenAI Gym environment.</span> <span dir="ltr" role="presentation">We trained the agents on </span><span dir="ltr" role="presentation">different traffic patterns on a simulated intersection. We compare the performance of </span><span dir="ltr" role="presentation">the resultant policies to traditional approaches such as the Webster and</span> <span dir="ltr" role="presentation">vehicle actuated </span><span dir="ltr" role="presentation">(VA) methods. We also examine and  contrast the policies learned by the RL agents and </span><span dir="ltr" role="presentation">evaluate how well they generalise to different traffic patterns.</span></p>}, journal={SUMO Conference Proceedings}, author={Louw, Cobus and Labuschagne, Louwrens and Woodley, Tiffany}, year={2022}, month={Sep.}, pages={15–43} }