from deploy.common import gen_index_param, gen_search_param def test_task_1(self, index_type, data_size): before reinstall: create collection and insert data, load and search after reinstall: get ...
将原始环境接入 PPO 框架,构建 Actor-Critic 网络(Categorical 策略采样) 实现 GAE 优势函数估计、Clip surrogate 目标、熵正则项 通过 RolloutBuffer + 多轮更新,实现了从 -300 奖励到稳定400的收敛 ...