You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
执行--deepspeed default-zero2是可以正常运行,在尝试使用--deepspeed default-zero3时报错,具体执行命令如下:
NPROC_PER_NODE=2 CUDA_VISIBLE_DEVICES=1,2 swift sft --model_type deepseek-vl-7b-chat --custom_train_dataset_path data/train.json --custom_val_dataset_path data/val.json --sft_type full --eval_steps 100 --deepspeed default-zero3
报错信息如下:
result = llm_x(args, **kwargs)
File "/mnt/MLLM/swift/swift/llm/sft.py", line 179, in llm_sft
td0, tkwargs0 = template.encode(train_dataset[0])
File "/mnt/MLLM/swift/swift/llm/utils/template.py", line 1001, in encode
inputs_embeds = model.prepare_inputs_embeds(**batched_output)[0]
File "/mnt/MLLM/swift/swift/llm/utils/model.py", line 1815, in __prepare_inputs_embeds
images_embeds = self.aligner(self.vision_model(images))
File "/data/venv/lib64/python3.9/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/data/venv/lib64/python3.9/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/MLLM/deepseek/DeepSeek-VL-main/deepseek_vl/models/clip_encoder.py", line 183, in forward
high_res = self.vision_tower_high(high_images)
File "/data/venv/lib64/python3.9/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/data/venv/lib64/python3.9/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/MLLM/deepseek/DeepSeek-VL-main/deepseek_vl/models/clip_encoder.py", line 121, in forward
image_forward_outs = self.vision_tower(images, **self.forward_kwargs)
File "/data/venv/lib64/python3.9/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/data/venv/lib64/python3.9/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/MLLM/deepseek/DeepSeek-VL-main/deepseek_vl/models/sam.py", line 169, in forward
x = self.patch_embed(x)
File "/data/venv/lib64/python3.9/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/data/venv/lib64/python3.9/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/MLLM/deepseek/DeepSeek-VL-main/deepseek_vl/models/sam.py", line 502, in forward
x = self.proj(x)
File "/data/venv/lib64/python3.9/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/data/venv/lib64/python3.9/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/data/venv/lib64/python3.9/site-packages/torch/nn/modules/conv.py", line 460, in forward
return self._conv_forward(input, self.weight, self.bias)
File "/data/venv/lib64/python3.9/site-packages/torch/nn/modules/conv.py", line 456, in _conv_forward
return F.conv2d(input, weight, bias, self.stride,
RuntimeError: weight should have at least three dimensions
[2024-05-15 16:49:34,817] torch.distributed.elastic.multiprocessing.api: [ERROR] failed (exitcode: 1) local_rank: 0 (pid: 229405) of binary: /data/venv/bin/python
主要环境版本:
ms-swift==1.8.0.dev0
deepspeed==0.14.0
The text was updated successfully, but these errors were encountered:
执行--deepspeed default-zero2是可以正常运行,在尝试使用--deepspeed default-zero3时报错,具体执行命令如下:
NPROC_PER_NODE=2 CUDA_VISIBLE_DEVICES=1,2 swift sft --model_type deepseek-vl-7b-chat --custom_train_dataset_path data/train.json --custom_val_dataset_path data/val.json --sft_type full --eval_steps 100 --deepspeed default-zero3
报错信息如下:
result = llm_x(args, **kwargs)
File "/mnt/MLLM/swift/swift/llm/sft.py", line 179, in llm_sft
td0, tkwargs0 = template.encode(train_dataset[0])
File "/mnt/MLLM/swift/swift/llm/utils/template.py", line 1001, in encode
inputs_embeds = model.prepare_inputs_embeds(**batched_output)[0]
File "/mnt/MLLM/swift/swift/llm/utils/model.py", line 1815, in __prepare_inputs_embeds
images_embeds = self.aligner(self.vision_model(images))
File "/data/venv/lib64/python3.9/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/data/venv/lib64/python3.9/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/MLLM/deepseek/DeepSeek-VL-main/deepseek_vl/models/clip_encoder.py", line 183, in forward
high_res = self.vision_tower_high(high_images)
File "/data/venv/lib64/python3.9/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/data/venv/lib64/python3.9/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/MLLM/deepseek/DeepSeek-VL-main/deepseek_vl/models/clip_encoder.py", line 121, in forward
image_forward_outs = self.vision_tower(images, **self.forward_kwargs)
File "/data/venv/lib64/python3.9/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/data/venv/lib64/python3.9/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/MLLM/deepseek/DeepSeek-VL-main/deepseek_vl/models/sam.py", line 169, in forward
x = self.patch_embed(x)
File "/data/venv/lib64/python3.9/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/data/venv/lib64/python3.9/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/MLLM/deepseek/DeepSeek-VL-main/deepseek_vl/models/sam.py", line 502, in forward
x = self.proj(x)
File "/data/venv/lib64/python3.9/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/data/venv/lib64/python3.9/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/data/venv/lib64/python3.9/site-packages/torch/nn/modules/conv.py", line 460, in forward
return self._conv_forward(input, self.weight, self.bias)
File "/data/venv/lib64/python3.9/site-packages/torch/nn/modules/conv.py", line 456, in _conv_forward
return F.conv2d(input, weight, bias, self.stride,
RuntimeError: weight should have at least three dimensions
[2024-05-15 16:49:34,817] torch.distributed.elastic.multiprocessing.api: [ERROR] failed (exitcode: 1) local_rank: 0 (pid: 229405) of binary: /data/venv/bin/python
主要环境版本:
ms-swift==1.8.0.dev0
deepspeed==0.14.0
The text was updated successfully, but these errors were encountered: