refer to code.
.
# https://github.com/huggingface/optimum
# python -m pip install optimum[onnxruntime]
from transformers import AutoTokenizer
from optimum.onnxruntime import ORTModelForTokenClassification
from layoutlmv3_model import layoutlmv3_ner_model
import pickle
def layoutlmv3_vvv3():
base_path = 'layoutLM_research/tb_logs/model/M_microsoft-layoutlmv3-large_T_stride_V_v8/checkpoints/'
#load ckpt model
ckpt_path = base_path + 'vvv0.99061.ckpt'
bin_path = base_path + "bin"
onnx_path = base_path + "onnx"
quantizer_onnx_directory = base_path + "onnx_q"
#load ckpt model
base_model = layoutlmv3_ner_model.load_from_checkpoint(ckpt_path)
#save model params
with open(base_path+'model_cfg.pkl', 'wb') as fout:
pickle.dump(base_model.cfg, fout, protocol=2)
#save transformer model to bin
base_model.model.save_pretrained(bin_path)
# Load a model from transformers and export it to ONNX
ort_model = ORTModelForTokenClassification.from_pretrained(bin_path, from_transformers=True)
# Save the ONNX model and tokenizer
ort_model.save_pretrained(onnx_path)
from optimum.onnxruntime.configuration import AutoQuantizationConfig
from optimum.onnxruntime import ORTQuantizer
# Define the quantization methodology
qconfig = AutoQuantizationConfig.arm64(is_static=False, per_channel=False)
quantizer = ORTQuantizer.from_pretrained(ort_model)
# Apply dynamic quantization on the model
quantizer.quantize(save_dir=quantizer_onnx_directory, quantization_config=qconfig)
if __name__ == "__main__":
layoutlmv3_vvv3()
..
some specific code may not suitable your case.
but general concept would be same.
Thank you.
ππ»♂️
www.marearts.com
No comments:
Post a Comment