run_convert_checkpoint.py script
run_convert_checkpoint.py script
python3 run_convert_checkpoint.pyrun_convert_checkpoint.py script
import argparse
import os
import subprocess
import yaml
def main():
parser = argparse.ArgumentParser(description='Run convert_checkpoint.py with configurations from config.yaml')
parser.add_argument('--config', default='config.yaml', help='Path to the YAML configuration file')
args = parser.parse_args()
# Load configurations from the YAML file
with open(args.config, 'r') as f:
config = yaml.safe_load(f)
# Extract the configuration values
model_dir = config['model']['model_dir']
output_dir = config['model']['output_dir']
dtype = config['model']['dtype']
tp_size = config['checkpoint']['tp_size']
pp_size = config['checkpoint']['pp_size']
vocab_size = config['checkpoint']['vocab_size']
n_positions = config['checkpoint']['n_positions']
n_layer = config['checkpoint']['n_layer']
n_head = config['checkpoint']['n_head']
n_embd = config['checkpoint']['n_embd']
inter_size = config['checkpoint']['inter_size']
# Construct the command-line arguments for convert_checkpoint.py
cmd_args = [
'python', 'convert_checkpoint.py',
'--model_dir', model_dir,
'--output_dir', output_dir,
'--dtype', dtype,
'--tp_size', str(tp_size),
'--pp_size', str(pp_size),
'--vocab_size', str(vocab_size),
'--n_positions', str(n_positions),
'--n_layer', str(n_layer),
'--n_head', str(n_head),
'--n_embd', str(n_embd),
'--inter_size', str(inter_size)
]
# Add additional checkpoint arguments if specified in the YAML file
for key, value in config['checkpoint'].items():
if key not in ['tp_size', 'pp_size', 'vocab_size', 'n_positions', 'n_layer', 'n_head', 'n_embd', 'inter_size']:
if value is not None:
cmd_args.extend([f'--{key}', str(value)])
# Run the convert_checkpoint.py script with the specified arguments
subprocess.run(cmd_args, check=True)
if __name__ == '__main__':
main()Last updated
Was this helpful?

