From b40f26bab162789d29305028347f06ea339ea3c7 Mon Sep 17 00:00:00 2001 From: baijinqiu <2522827873@qq.com> Date: Mon, 25 Dec 2023 14:37:13 +0800 Subject: [PATCH] 'iddpg_learner_x(#1)' --- .../documents/api/learners/marl/iddpg.rst | 30 +++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/docs/source/documents/api/learners/marl/iddpg.rst b/docs/source/documents/api/learners/marl/iddpg.rst index d574bc60..c0fdfe51 100644 --- a/docs/source/documents/api/learners/marl/iddpg.rst +++ b/docs/source/documents/api/learners/marl/iddpg.rst @@ -1,7 +1,7 @@ IDDPG_Learner ===================================== -xxxxxx. +An Independent Deep Deterministic Policy Gradient (IDDPG) learner. .. raw:: html @@ -33,10 +33,12 @@ PyTorch .. py:function:: xuance.torch.learners.multi_agent_rl.iddpg_learner.IDDPG_Learner.update(sample) - xxxxxx. + Update the IDDPG learner with a batch of samples. - :param sample: xxxxxx. - :type sample: xxxxxx + :param sample: A dictionary containing current observations, actions taken in the current state, + observations of the next state, rewards, binary indicators of episode terminations, + binary mask indicating which agents are active. + :type sample: dict :return: The infomation of the training. :rtype: dict @@ -70,11 +72,13 @@ TensorFlow .. py:function:: xuance.tensorflow.learners.multi_agent_rl.iddpg_learner.IDDPG_Learner.update(sample) - xxxxxx. + Update the IDDPG learner with a batch of samples. - :param sample: xxxxxx. - :type sample: xxxxxx - :return: The infomation of the training. + :param sample: A dictionary containing current observations, actions taken in the current state, + observations of the next state, rewards, binary indicators of episode terminations, + binary mask indicating which agents are active. + :type sample: dict + :return: The information of the training. :rtype: dict .. raw:: html @@ -105,11 +109,13 @@ MindSpore .. py:function:: xuance.mindspore.learners.multi_agent_rl.iddpg_learner.IDDPG_Learner.update(sample) - xxxxxx. + Update the IDDPG learner with a batch of samples. - :param sample: xxxxxx. - :type sample: xxxxxx - :return: The infomation of the training. + :param sample: A dictionary containing current observations, actions taken in the current state, + observations of the next state, rewards, binary indicators of episode terminations, + binary mask indicating which agents are active. + :type sample: dict + :return: The information of the training. :rtype: dict .. raw:: html