You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
When I use dpgen run param.json machine.json to run a job, I frequently get errors: paramiko.ssh_exception.AuthenticationException: Authentication failed.
DeePMD-kit Version
2.1.1
TensorFlow Version
tf=2.5.0
Python Version, CUDA Version, GCC Version, LAMMPS Version, etc
python=3.8.5
cudatoolkit-11.3.1
gcc=7.5
Details
hello , dear Developers
When I use dpgen run param.json machine.json to run a job, I frequently find errors as follows:
Please cite:
Yuzhi Zhang, Haidi Wang, Weijie Chen, Jinzhe Zeng, Linfeng Zhang, Han Wang, and Weinan E,
DP-GEN: A concurrent learning platform for the generation of reliable deep learning
based potential energy models, Computer Physics Communications, 2020, 107206.
Description
Traceback (most recent call last):
File "/HOME/zhoujy/.conda/envs/deepmd-kit2.1.1/bin/dpgen", line 8, in
sys.exit(main())
File "/HOME/zhoujy/.conda/envs/deepmd-kit2.1.1/lib/python3.9/site-packages/dpgen/main.py", line 185, in main
args.func(args)
File "/HOME/zhoujy/.conda/envs/deepmd-kit2.1.1/lib/python3.9/site-packages/dpgen/generator/run.py", line 3642, in gen_run
run_iter (args.PARAM, args.MACHINE)
File "/HOME/zhoujy/.conda/envs/deepmd-kit2.1.1/lib/python3.9/site-packages/dpgen/generator/run.py", line 3628, in run_iter
run_fp (ii, jdata, mdata)
File "/HOME/zhoujy/.conda/envs/deepmd-kit2.1.1/lib/python3.9/site-packages/dpgen/generator/run.py", line 3018, in run_fp
run_fp_inner(iter_index, jdata, mdata, forward_files, backward_files, _vasp_check_fin,
File "/HOME/zhoujy/.conda/envs/deepmd-kit2.1.1/lib/python3.9/site-packages/dpgen/generator/run.py", line 2985, in run_fp_inner
submission = make_submission(
File "/HOME/zhoujy/.conda/envs/deepmd-kit2.1.1/lib/python3.9/site-packages/dpgen/dispatcher/Dispatcher.py", line 359, in make_submission
machine = Machine.load_from_dict(abs_mdata_machine)
File "/HOME/zhoujy/.conda/envs/deepmd-kit2.1.1/lib/python3.9/site-packages/dpdispatcher/machine.py", line 134, in load_from_dict
context = BaseContext.load_from_dict(machine_dict)
File "/HOME/zhoujy/.conda/envs/deepmd-kit2.1.1/lib/python3.9/site-packages/dpdispatcher/base_context.py", line 41, in load_from_dict
context = context_class.load_from_dict(context_dict)
File "/HOME/zhoujy/.conda/envs/deepmd-kit2.1.1/lib/python3.9/site-packages/dpdispatcher/ssh_context.py", line 350, in load_from_dict
ssh_context = cls(
File "/HOME/zhoujy/.conda/envs/deepmd-kit2.1.1/lib/python3.9/site-packages/dpdispatcher/ssh_context.py", line 323, in init
self.ssh_session = SSHSession(**remote_profile)
File "/HOME/zhoujy/.conda/envs/deepmd-kit2.1.1/lib/python3.9/site-packages/dpdispatcher/ssh_context.py", line 44, in init
self._setup_ssh()
File "/HOME/zhoujy/.conda/envs/deepmd-kit2.1.1/lib/python3.9/site-packages/dpdispatcher/utils.py", line 162, in wrapper
return func(*args, **kwargs)
File "/HOME/zhoujy/.conda/envs/deepmd-kit2.1.1/lib/python3.9/site-packages/dpdispatcher/ssh_context.py", line 166, in _setup_ssh
ts.auth_password(self.username, self.password)
File "/HOME/zhoujy/.conda/envs/deepmd-kit2.1.1/lib/python3.9/site-packages/paramiko/transport.py", line 1564, in auth_password
return self.auth_handler.wait_for_response(my_event)
File "/HOME/zhoujy/.conda/envs/deepmd-kit2.1.1/lib/python3.9/site-packages/paramiko/auth_handler.py", line 245, in wait_for_response
raise e
paramiko.ssh_exception.AuthenticationException: Authentication failed.
Summary
When I use dpgen run param.json machine.json to run a job, I frequently get errors: paramiko.ssh_exception.AuthenticationException: Authentication failed.
DeePMD-kit Version
2.1.1
TensorFlow Version
tf=2.5.0
Python Version, CUDA Version, GCC Version, LAMMPS Version, etc
python=3.8.5
cudatoolkit-11.3.1
gcc=7.5
Details
hello , dear Developers
When I use dpgen run param.json machine.json to run a job, I frequently find errors as follows:
Please cite:
Yuzhi Zhang, Haidi Wang, Weijie Chen, Jinzhe Zeng, Linfeng Zhang, Han Wang, and Weinan E,
DP-GEN: A concurrent learning platform for the generation of reliable deep learning
based potential energy models, Computer Physics Communications, 2020, 107206.
Description
Traceback (most recent call last):
File "/HOME/zhoujy/.conda/envs/deepmd-kit2.1.1/bin/dpgen", line 8, in
sys.exit(main())
File "/HOME/zhoujy/.conda/envs/deepmd-kit2.1.1/lib/python3.9/site-packages/dpgen/main.py", line 185, in main
args.func(args)
File "/HOME/zhoujy/.conda/envs/deepmd-kit2.1.1/lib/python3.9/site-packages/dpgen/generator/run.py", line 3642, in gen_run
run_iter (args.PARAM, args.MACHINE)
File "/HOME/zhoujy/.conda/envs/deepmd-kit2.1.1/lib/python3.9/site-packages/dpgen/generator/run.py", line 3628, in run_iter
run_fp (ii, jdata, mdata)
File "/HOME/zhoujy/.conda/envs/deepmd-kit2.1.1/lib/python3.9/site-packages/dpgen/generator/run.py", line 3018, in run_fp
run_fp_inner(iter_index, jdata, mdata, forward_files, backward_files, _vasp_check_fin,
File "/HOME/zhoujy/.conda/envs/deepmd-kit2.1.1/lib/python3.9/site-packages/dpgen/generator/run.py", line 2985, in run_fp_inner
submission = make_submission(
File "/HOME/zhoujy/.conda/envs/deepmd-kit2.1.1/lib/python3.9/site-packages/dpgen/dispatcher/Dispatcher.py", line 359, in make_submission
machine = Machine.load_from_dict(abs_mdata_machine)
File "/HOME/zhoujy/.conda/envs/deepmd-kit2.1.1/lib/python3.9/site-packages/dpdispatcher/machine.py", line 134, in load_from_dict
context = BaseContext.load_from_dict(machine_dict)
File "/HOME/zhoujy/.conda/envs/deepmd-kit2.1.1/lib/python3.9/site-packages/dpdispatcher/base_context.py", line 41, in load_from_dict
context = context_class.load_from_dict(context_dict)
File "/HOME/zhoujy/.conda/envs/deepmd-kit2.1.1/lib/python3.9/site-packages/dpdispatcher/ssh_context.py", line 350, in load_from_dict
ssh_context = cls(
File "/HOME/zhoujy/.conda/envs/deepmd-kit2.1.1/lib/python3.9/site-packages/dpdispatcher/ssh_context.py", line 323, in init
self.ssh_session = SSHSession(**remote_profile)
File "/HOME/zhoujy/.conda/envs/deepmd-kit2.1.1/lib/python3.9/site-packages/dpdispatcher/ssh_context.py", line 44, in init
self._setup_ssh()
File "/HOME/zhoujy/.conda/envs/deepmd-kit2.1.1/lib/python3.9/site-packages/dpdispatcher/utils.py", line 162, in wrapper
return func(*args, **kwargs)
File "/HOME/zhoujy/.conda/envs/deepmd-kit2.1.1/lib/python3.9/site-packages/dpdispatcher/ssh_context.py", line 166, in _setup_ssh
ts.auth_password(self.username, self.password)
File "/HOME/zhoujy/.conda/envs/deepmd-kit2.1.1/lib/python3.9/site-packages/paramiko/transport.py", line 1564, in auth_password
return self.auth_handler.wait_for_response(my_event)
File "/HOME/zhoujy/.conda/envs/deepmd-kit2.1.1/lib/python3.9/site-packages/paramiko/auth_handler.py", line 245, in wait_for_response
raise e
paramiko.ssh_exception.AuthenticationException: Authentication failed.
here is my machine.json,hostname、username and password is correct , about the fp , I want to run it on remote cluster:
{
"api_version": "1.0",
"train": [
{
"machine": {
"context_type": "local",
"batch_type": "Slurm",
"machine_type": "Slurm",
"local_root": "./",
"_remote_root": "/data/run01/scz5616/HHM/dpmd-project/WFpro/S2tet/work",
"remote_root": "/HOME/zhoujy/run/dp-test/work"
},
"resources": {
"module_list": [],
"_source_list": [
"/data/run01/scz5616/HHM/dpmd-project/WFpro/S2tet/train.sh"
],
"source_list": ["/HOME/zhoujy/run/dp-test/train.sh"],
"cpu_per_node": 6,
"number_node": 1,
"gpu_per_node": 1,
"queue_name": "gpu_c128",
"_exclude_list": [],
"_time_limit": "24:0:0",
"group_size": 1
},
"command": "dp"
}
],
"model_devi": [
{
"machine": {
"context_type": "local",
"batch_type": "Slurm",
"machine_type": "Slurm",
"local_root": "./",
"_remote_root": "/data/run01/scz5616/HHM/dpmd-project/WFpro/S2tet/work",
"remote_root": "/HOME/zhoujy/run/dp-test/work"
},
"resources": {
"_module_list": [],
"_source_list": [
"/data/run01/scz5616/HHM/dp-test/lammps.sh"
],
"cpu_per_node": 6,
"number_node": 1,
"gpu_per_node": 1,
"queue_name": "gpu",
"_exclude_list": [],
"_time_limit": "23:0:0",
"group_size": 1
},
"command": "lmp"
}
],
"fp": [
{
"machine": {
"context_type": "ssh",
"batch_type": "Slurm",
"_machine_type": "Slurm",
"local_root": "./",
"remote_root": "/public1/ws133/sc94566/zhou/work",
"remote_profile": {
"hostname": "36.103.203.6",
"username": "sc94566@BSCC-A3",
"port": 22,
"password": "xxxxxxxxxxxxxxxxxxxx"
}
},
"resources": {
"number_node": 1,
"cpu_per_node": 64,
"_custom_flags": [
"-p G1Part_sce"
],
"queue_name": "amd_256",
"_with_mpi": false,
"source_list": [
"/public1/ws133/sc94566/zhou/env.sh"
],
"_time_limit": "120:0:0",
"_comment": "that's all",
"group_size": 100
},
"command": "ulimit -s unlimited; srun -n 64 vasp_std"
}
]
}
The text was updated successfully, but these errors were encountered: