@article{xu2026beyond,
title={Beyond GRPO and On-Policy Distillation: An Empirical Sparse-to-Dense Reward Principle for Language-Model Post-Training},
author={Xu, Yuanda and Sang, Hejian and Zhou, Zhengze and He, Ran and Wang, Zhipeng and Geramifard, Alborz},
journal={arXiv preprint arXiv:2605.12483},
year={2026}
}