Tutorial 2 - get inference going
pip install -r requirements.txtpython convert_checkpoint.py --model_dir ./llama-2-7b-chat-hf \
--output_dir ./tllm_checkpoint_1gpu_bf16 \
--dtype bfloat16trtllm-build --checkpoint_dir ./tllm_checkpoint_1gpu_bf16 \
--output_dir ./tmp/llama/7B-chat/trt_engines/bf16/1-gpu \
--gpt_attention_plugin bfloat16 \
--gemm_plugin bfloat16python ../run.py --max_output_len 50 \
--engine_dir ./tmp/llama/7B-chat/trt_engines/bf16/1-gpu \
--tokenizer_dir ./llama-2-7b-chat-hf \
--input_text "Hello, how are you?"Last updated
Was this helpful?

