← Back
@article{xu2026overconfident,
  title={Overconfident Errors Need Stronger Correction: Asymmetric Confidence Penalties for Reinforcement Learning},
  author={Xu, Yuanda and Sang, Hejian and Zhou, Zhengze and He, Ran and Wang, Zhipeng},
  journal={arXiv preprint arXiv:2602.21420},
  year={2026}
}