Skip to content

Commit

Permalink
[Feature] support auto saving tokenizer (#696)
Browse files Browse the repository at this point in the history
support auto saving tokenizer
  • Loading branch information
HIT-cwh committed May 17, 2024
1 parent 3b14f48 commit ed844be
Showing 1 changed file with 9 additions and 0 deletions.
9 changes: 9 additions & 0 deletions xtuner/engine/hooks/hf_checkpoint_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,14 @@
from typing import Optional, Union

import torch.distributed as dist
from mmengine import print_log
from mmengine._strategy import DeepSpeedStrategy
from mmengine.hooks import Hook
from mmengine.model import is_model_wrapper
from mmengine.runner import FlexibleRunner

from xtuner.registry import BUILDER

DATA_BATCH = Optional[Union[dict, tuple, list]]


Expand Down Expand Up @@ -50,4 +53,10 @@ def after_run(self, runner) -> None:
for k in keys:
val = state_dict.pop(k)
state_dict[k[4:]] = val

print_log(f'Saving LLM to {self.out_dir}')
llm.save_pretrained(self.out_dir, state_dict=state_dict)

print_log(f'Saving LLM tokenizer to {self.out_dir}')
tokenizer = BUILDER.build(runner.cfg.tokenizer)
tokenizer.save_pretrained(self.out_dir)

0 comments on commit ed844be

Please sign in to comment.