From 9657c4e7c26e37e318b0fd851917a2d7a1608de8 Mon Sep 17 00:00:00 2001 From: "Shiyi Zheng (from Dev Box)" Date: Mon, 22 Jun 2026 11:50:01 +0800 Subject: [PATCH 1/3] examples/recipes: refresh built-in model recipes --- .../feature-extraction_fp16_config.json | 79 +++ .../feature-extraction_w8a16_config.json | 96 ++++ .../feature-extraction_w8a8_config.json | 96 ++++ .../sentence-similarity_fp16_config.json | 79 +++ .../sentence-similarity_w8a16_config.json | 96 ++++ .../sentence-similarity_w8a8_config.json | 96 ++++ .../sentence-similarity_fp16_config.json | 1 + .../sentence-similarity_w8a16_config.json | 1 + .../sentence-similarity_w8a8_config.json | 1 + .../feature-extraction_fp16_config.json | 67 +++ .../feature-extraction_w8a16_config.json | 84 +++ .../feature-extraction_w8a8_config.json | 84 +++ .../sentence-similarity_fp16_config.json | 67 +++ .../sentence-similarity_w8a16_config.json | 84 +++ .../sentence-similarity_w8a8_config.json | 84 +++ .../feature-extraction_fp16_config.json | 79 +++ .../feature-extraction_w8a16_config.json | 96 ++++ .../feature-extraction_w8a8_config.json | 96 ++++ .../sentence-similarity_fp16_config.json | 78 +++ .../sentence-similarity_w8a16_config.json | 95 ++++ .../sentence-similarity_w8a8_config.json | 95 ++++ .../token-classification_fp16_config.json | 76 +++ .../token-classification_w8a16_config.json | 93 ++++ .../token-classification_w8a8_config.json | 93 ++++ .../fill-mask_fp16_config.json | 65 +++ .../fill-mask_w8a16_config.json | 82 +++ .../fill-mask_w8a8_config.json | 82 +++ .../fill-mask_fp16_config.json | 66 +++ .../fill-mask_w8a16_config.json | 83 +++ .../fill-mask_w8a8_config.json | 83 +++ .../fill-mask_fp16_config.json | 65 +++ .../fill-mask_w8a16_config.json | 82 +++ .../fill-mask_w8a8_config.json | 82 +++ .../feature-extraction_fp16_config.json | 79 +++ .../feature-extraction_w8a16_config.json | 96 ++++ .../feature-extraction_w8a8_config.json | 96 ++++ .../text-classification_fp16_config.json | 78 +++ .../text-classification_w8a16_config.json | 95 ++++ .../text-classification_w8a8_config.json | 95 ++++ .../token-classification_fp16_config.json | 62 +++ .../token-classification_w8a16_config.json | 80 +++ .../token-classification_w8a8_config.json | 80 +++ .../text-classification_fp16_config.json | 74 +++ .../text-classification_w8a16_config.json | 91 ++++ .../text-classification_w8a8_config.json | 91 ++++ examples/recipes/README.md | 67 ++- .../image-to-text_fp16_config_decoder.json | 492 +++++++++++++++++ .../image-to-text_fp16_config_encoder.json | 58 ++ .../image-feature-extraction_fp16_config.json | 50 ++ ...image-feature-extraction_w8a16_config.json | 67 +++ .../image-feature-extraction_w8a8_config.json | 67 +++ .../question-answering_fp16_config.json | 80 +++ .../question-answering_w8a16_config.json | 98 ++++ .../question-answering_w8a8_config.json | 98 ++++ .../image-classification_fp16_config.json | 49 ++ .../image-classification_w8a16_config.json | 67 +++ .../image-classification_w8a8_config.json | 67 +++ .../text-classification_fp16_config.json | 1 + .../text-classification_w8a16_config.json | 1 + .../text-classification_w8a8_config.json | 1 + .../token-classification_fp16_config.json | 76 +++ .../token-classification_w8a16_config.json | 93 ++++ .../token-classification_w8a8_config.json | 93 ++++ .../question-answering_fp16_config.json | 82 +++ .../question-answering_w8a16_config.json | 99 ++++ .../question-answering_w8a8_config.json | 99 ++++ .../question-answering_fp16_config.json | 1 + .../question-answering_w8a16_config.json | 1 + .../question-answering_w8a8_config.json | 1 + .../question-answering_fp16_config.json | 1 + .../question-answering_w8a16_config.json | 1 + .../question-answering_w8a8_config.json | 1 + .../image-classification_fp16_config.json | 52 ++ .../image-classification_w8a16_config.json | 70 +++ .../image-classification_w8a8_config.json | 70 +++ .../question-answering_fp16_config.json | 68 +++ .../question-answering_w8a16_config.json | 86 +++ .../question-answering_w8a8_config.json | 86 +++ .../question-answering_fp16_config.json | 68 +++ .../question-answering_w8a16_config.json | 86 +++ .../question-answering_w8a8_config.json | 86 +++ .../text-classification_fp16_config.json | 62 +++ .../text-classification_w8a16_config.json | 80 +++ .../text-classification_w8a8_config.json | 80 +++ .../fill-mask_fp16_config.json | 63 +++ .../fill-mask_w8a16_config.json | 81 +++ .../fill-mask_w8a8_config.json | 81 +++ .../token-classification_fp16_config.json | 76 +++ .../token-classification_w8a16_config.json | 93 ++++ .../token-classification_w8a8_config.json | 93 ++++ .../image-classification_fp16_config.json | 50 ++ .../image-classification_w8a16_config.json | 67 +++ .../image-classification_w8a8_config.json | 67 +++ .../image-feature-extraction_fp16_config.json | 49 ++ ...image-feature-extraction_w8a16_config.json | 66 +++ .../image-feature-extraction_w8a8_config.json | 66 +++ .../image-feature-extraction_fp16_config.json | 49 ++ ...image-feature-extraction_w8a16_config.json | 66 +++ .../image-feature-extraction_w8a8_config.json | 66 +++ .../image-feature-extraction_fp16_config.json | 2 +- .../image-feature-extraction_fp16_config.json | 49 ++ ...image-feature-extraction_w8a16_config.json | 66 +++ .../image-feature-extraction_w8a8_config.json | 66 +++ .../image-feature-extraction_fp16_config.json | 2 +- .../feature-extraction_fp16_config.json | 79 +++ .../feature-extraction_w8a16_config.json | 96 ++++ .../feature-extraction_w8a8_config.json | 96 ++++ .../fill-mask_fp16_config.json | 77 +++ .../fill-mask_w8a16_config.json | 95 ++++ .../fill-mask_w8a8_config.json | 95 ++++ .../fill-mask_fp16_config.json | 77 +++ .../fill-mask_w8a16_config.json | 94 ++++ .../fill-mask_w8a8_config.json | 94 ++++ .../fill-mask_fp16_config.json | 77 +++ .../fill-mask_w8a16_config.json | 94 ++++ .../fill-mask_w8a8_config.json | 94 ++++ .../question-answering_fp16_config.json | 83 +++ .../question-answering_w8a16_config.json | 100 ++++ .../question-answering_w8a8_config.json | 100 ++++ .../image-feature-extraction_fp16_config.json | 2 +- .../image-classification_fp16_config.json | 50 ++ .../image-classification_w8a16_config.json | 67 +++ .../image-classification_w8a8_config.json | 66 +++ .../zero-shot-classification_fp16_config.json | 68 +++ ...zero-shot-classification_w8a16_config.json | 85 +++ .../zero-shot-classification_w8a8_config.json | 85 +++ .../feature-extraction_fp16_config.json | 1 + .../feature-extraction_w8a16_config.json | 1 + .../feature-extraction_w8a8_config.json | 2 + .../image-segmentation_fp16_config.json | 53 ++ .../image-segmentation_w8a16_config.json | 70 +++ .../image-segmentation_w8a8_config.json | 70 +++ .../image-classification_fp16_config.json | 49 ++ .../image-classification_w8a16_config.json | 67 +++ .../image-classification_w8a8_config.json | 67 +++ .../image-classification_fp16_config.json | 50 ++ .../image-classification_w8a16_config.json | 67 +++ .../image-classification_w8a8_config.json | 67 +++ .../image-classification_fp16_config.json | 50 ++ .../image-classification_w8a16_config.json | 67 +++ .../image-classification_w8a8_config.json | 67 +++ .../image-classification_fp16_config.json | 50 ++ .../image-classification_w8a16_config.json | 67 +++ .../image-classification_w8a8_config.json | 67 +++ .../image-to-text_fp16_config_decoder.json | 494 ++++++++++++++++++ .../image-to-text_fp16_config_encoder.json | 60 +++ .../image-to-text_fp16_config_decoder.json | 494 ++++++++++++++++++ .../image-to-text_fp16_config_encoder.json | 60 +++ .../image-to-text_fp16_config_decoder.json | 494 ++++++++++++++++++ .../image-to-text_fp16_config_encoder.json | 60 +++ .../image-to-text_fp16_config_decoder.json | 494 ++++++++++++++++++ .../image-to-text_fp16_config_encoder.json | 60 +++ .../question-answering_fp16_config.json | 80 +++ .../question-answering_w8a16_config.json | 98 ++++ .../question-answering_w8a8_config.json | 98 ++++ .../image-segmentation_fp16_config.json | 54 ++ .../image-segmentation_w8a16_config.json | 71 +++ .../image-segmentation_w8a8_config.json | 71 +++ .../image-segmentation_fp16_config.json | 54 ++ .../image-segmentation_w8a16_config.json | 71 +++ .../image-segmentation_w8a8_config.json | 71 +++ .../image-segmentation_fp16_config.json | 54 ++ .../image-segmentation_w8a16_config.json | 71 +++ .../image-segmentation_w8a8_config.json | 71 +++ .../feature-extraction_fp16_config.json | 1 + .../feature-extraction_w8a16_config.json | 1 + .../feature-extraction_w8a8_config.json | 1 + ...ssification_fp16_config_image-encoder.json | 62 +++ ...assification_fp16_config_text-encoder.json | 72 +++ ...sification_w8a16_config_image-encoder.json | 79 +++ ...ssification_w8a16_config_text-encoder.json | 89 ++++ ...ssification_w8a8_config_image-encoder.json | 79 +++ ...assification_w8a8_config_text-encoder.json | 89 ++++ .../feature-extraction_fp16_config.json | 72 +++ .../feature-extraction_w8a16_config.json | 89 ++++ .../feature-extraction_w8a8_config.json | 89 ++++ ...ssification_fp16_config_image-encoder.json | 62 +++ ...assification_fp16_config_text-encoder.json | 72 +++ ...sification_w8a16_config_image-encoder.json | 79 +++ ...ssification_w8a16_config_text-encoder.json | 89 ++++ ...ssification_w8a8_config_image-encoder.json | 79 +++ ...assification_w8a8_config_text-encoder.json | 89 ++++ ...ssification_fp16_config_image-encoder.json | 62 +++ ...assification_fp16_config_text-encoder.json | 72 +++ ...sification_w8a16_config_image-encoder.json | 79 +++ ...ssification_w8a16_config_text-encoder.json | 89 ++++ ...ssification_w8a8_config_image-encoder.json | 79 +++ ...assification_w8a8_config_text-encoder.json | 89 ++++ .../image-classification_fp16_config.json | 52 ++ .../image-classification_w8a16_config.json | 69 +++ .../image-classification_w8a8_config.json | 69 +++ .../feature-extraction_fp16_config.json | 1 + .../feature-extraction_w8a16_config.json | 1 + .../feature-extraction_w8a8_config.json | 1 + .../sentence-similarity_fp16_config.json | 1 + .../sentence-similarity_w8a16_config.json | 1 + .../sentence-similarity_w8a8_config.json | 1 + .../feature-extraction_fp16_config.json | 64 +++ .../feature-extraction_w8a16_config.json | 82 +++ .../feature-extraction_w8a8_config.json | 82 +++ .../sentence-similarity_fp16_config.json | 64 +++ .../sentence-similarity_w8a16_config.json | 82 +++ .../sentence-similarity_w8a8_config.json | 82 +++ .../feature-extraction_fp16_config.json | 64 +++ .../feature-extraction_w8a16_config.json | 82 +++ .../feature-extraction_w8a8_config.json | 82 +++ .../sentence-similarity_fp16_config.json | 64 +++ .../sentence-similarity_w8a16_config.json | 82 +++ .../sentence-similarity_w8a8_config.json | 82 +++ .../feature-extraction_fp16_config.json | 79 +++ .../feature-extraction_w8a16_config.json | 96 ++++ .../feature-extraction_w8a8_config.json | 96 ++++ .../sentence-similarity_fp16_config.json | 78 +++ .../sentence-similarity_w8a16_config.json | 95 ++++ .../sentence-similarity_w8a8_config.json | 95 ++++ .../sentence-similarity_fp16_config.json | 66 +++ .../sentence-similarity_w8a16_config.json | 83 +++ .../sentence-similarity_w8a8_config.json | 83 +++ .../token-classification_fp16_config.json | 64 +++ .../token-classification_w8a16_config.json | 81 +++ .../token-classification_w8a8_config.json | 81 +++ examples/release-builtin-models.md | 127 +++++ scripts/pick_builtin_recipes.py | 185 +++++++ scripts/rebuild_recipes_readme.py | 131 +++++ 224 files changed, 17439 insertions(+), 4 deletions(-) create mode 100644 examples/recipes/BAAI_bge-base-en-v1.5/feature-extraction_fp16_config.json create mode 100644 examples/recipes/BAAI_bge-base-en-v1.5/feature-extraction_w8a16_config.json create mode 100644 examples/recipes/BAAI_bge-base-en-v1.5/feature-extraction_w8a8_config.json create mode 100644 examples/recipes/BAAI_bge-base-en-v1.5/sentence-similarity_fp16_config.json create mode 100644 examples/recipes/BAAI_bge-base-en-v1.5/sentence-similarity_w8a16_config.json create mode 100644 examples/recipes/BAAI_bge-base-en-v1.5/sentence-similarity_w8a8_config.json create mode 100644 examples/recipes/BAAI_bge-m3/feature-extraction_fp16_config.json create mode 100644 examples/recipes/BAAI_bge-m3/feature-extraction_w8a16_config.json create mode 100644 examples/recipes/BAAI_bge-m3/feature-extraction_w8a8_config.json create mode 100644 examples/recipes/BAAI_bge-m3/sentence-similarity_fp16_config.json create mode 100644 examples/recipes/BAAI_bge-m3/sentence-similarity_w8a16_config.json create mode 100644 examples/recipes/BAAI_bge-m3/sentence-similarity_w8a8_config.json create mode 100644 examples/recipes/BAAI_bge-small-en-v1.5/feature-extraction_fp16_config.json create mode 100644 examples/recipes/BAAI_bge-small-en-v1.5/feature-extraction_w8a16_config.json create mode 100644 examples/recipes/BAAI_bge-small-en-v1.5/feature-extraction_w8a8_config.json create mode 100644 examples/recipes/BAAI_bge-small-en-v1.5/sentence-similarity_fp16_config.json create mode 100644 examples/recipes/BAAI_bge-small-en-v1.5/sentence-similarity_w8a16_config.json create mode 100644 examples/recipes/BAAI_bge-small-en-v1.5/sentence-similarity_w8a8_config.json create mode 100644 examples/recipes/Babelscape_wikineural-multilingual-ner/token-classification_fp16_config.json create mode 100644 examples/recipes/Babelscape_wikineural-multilingual-ner/token-classification_w8a16_config.json create mode 100644 examples/recipes/Babelscape_wikineural-multilingual-ner/token-classification_w8a8_config.json create mode 100644 examples/recipes/FacebookAI_roberta-base/fill-mask_fp16_config.json create mode 100644 examples/recipes/FacebookAI_roberta-base/fill-mask_w8a16_config.json create mode 100644 examples/recipes/FacebookAI_roberta-base/fill-mask_w8a8_config.json create mode 100644 examples/recipes/FacebookAI_roberta-large/fill-mask_fp16_config.json create mode 100644 examples/recipes/FacebookAI_roberta-large/fill-mask_w8a16_config.json create mode 100644 examples/recipes/FacebookAI_roberta-large/fill-mask_w8a8_config.json create mode 100644 examples/recipes/FacebookAI_xlm-roberta-base/fill-mask_fp16_config.json create mode 100644 examples/recipes/FacebookAI_xlm-roberta-base/fill-mask_w8a16_config.json create mode 100644 examples/recipes/FacebookAI_xlm-roberta-base/fill-mask_w8a8_config.json create mode 100644 examples/recipes/Intel_bert-base-uncased-mrpc/feature-extraction_fp16_config.json create mode 100644 examples/recipes/Intel_bert-base-uncased-mrpc/feature-extraction_w8a16_config.json create mode 100644 examples/recipes/Intel_bert-base-uncased-mrpc/feature-extraction_w8a8_config.json create mode 100644 examples/recipes/Intel_bert-base-uncased-mrpc/text-classification_fp16_config.json create mode 100644 examples/recipes/Intel_bert-base-uncased-mrpc/text-classification_w8a16_config.json create mode 100644 examples/recipes/Intel_bert-base-uncased-mrpc/text-classification_w8a8_config.json create mode 100644 examples/recipes/Isotonic_distilbert_finetuned_ai4privacy_v2/token-classification_fp16_config.json create mode 100644 examples/recipes/Isotonic_distilbert_finetuned_ai4privacy_v2/token-classification_w8a16_config.json create mode 100644 examples/recipes/Isotonic_distilbert_finetuned_ai4privacy_v2/token-classification_w8a8_config.json create mode 100644 examples/recipes/ProsusAI_finbert/text-classification_fp16_config.json create mode 100644 examples/recipes/ProsusAI_finbert/text-classification_w8a16_config.json create mode 100644 examples/recipes/ProsusAI_finbert/text-classification_w8a8_config.json create mode 100644 examples/recipes/Salesforce_blip-image-captioning-base/image-to-text_fp16_config_decoder.json create mode 100644 examples/recipes/Salesforce_blip-image-captioning-base/image-to-text_fp16_config_encoder.json create mode 100644 examples/recipes/StanfordAIMI_dinov2-base-xray-224/image-feature-extraction_fp16_config.json create mode 100644 examples/recipes/StanfordAIMI_dinov2-base-xray-224/image-feature-extraction_w8a16_config.json create mode 100644 examples/recipes/StanfordAIMI_dinov2-base-xray-224/image-feature-extraction_w8a8_config.json create mode 100644 examples/recipes/ahotrod_electra_large_discriminator_squad2_512/question-answering_fp16_config.json create mode 100644 examples/recipes/ahotrod_electra_large_discriminator_squad2_512/question-answering_w8a16_config.json create mode 100644 examples/recipes/ahotrod_electra_large_discriminator_squad2_512/question-answering_w8a8_config.json create mode 100644 examples/recipes/apple_mobilevit-small/image-classification_fp16_config.json create mode 100644 examples/recipes/apple_mobilevit-small/image-classification_w8a16_config.json create mode 100644 examples/recipes/apple_mobilevit-small/image-classification_w8a8_config.json create mode 100644 examples/recipes/dbmdz_bert-large-cased-finetuned-conll03-english/token-classification_fp16_config.json create mode 100644 examples/recipes/dbmdz_bert-large-cased-finetuned-conll03-english/token-classification_w8a16_config.json create mode 100644 examples/recipes/dbmdz_bert-large-cased-finetuned-conll03-english/token-classification_w8a8_config.json create mode 100644 examples/recipes/deepset_bert-large-uncased-whole-word-masking-squad2/question-answering_fp16_config.json create mode 100644 examples/recipes/deepset_bert-large-uncased-whole-word-masking-squad2/question-answering_w8a16_config.json create mode 100644 examples/recipes/deepset_bert-large-uncased-whole-word-masking-squad2/question-answering_w8a8_config.json create mode 100644 examples/recipes/dima806_fairface_age_image_detection/image-classification_fp16_config.json create mode 100644 examples/recipes/dima806_fairface_age_image_detection/image-classification_w8a16_config.json create mode 100644 examples/recipes/dima806_fairface_age_image_detection/image-classification_w8a8_config.json create mode 100644 examples/recipes/distilbert_distilbert-base-cased-distilled-squad/question-answering_fp16_config.json create mode 100644 examples/recipes/distilbert_distilbert-base-cased-distilled-squad/question-answering_w8a16_config.json create mode 100644 examples/recipes/distilbert_distilbert-base-cased-distilled-squad/question-answering_w8a8_config.json create mode 100644 examples/recipes/distilbert_distilbert-base-uncased-distilled-squad/question-answering_fp16_config.json create mode 100644 examples/recipes/distilbert_distilbert-base-uncased-distilled-squad/question-answering_w8a16_config.json create mode 100644 examples/recipes/distilbert_distilbert-base-uncased-distilled-squad/question-answering_w8a8_config.json create mode 100644 examples/recipes/distilbert_distilbert-base-uncased-finetuned-sst-2-english/text-classification_fp16_config.json create mode 100644 examples/recipes/distilbert_distilbert-base-uncased-finetuned-sst-2-english/text-classification_w8a16_config.json create mode 100644 examples/recipes/distilbert_distilbert-base-uncased-finetuned-sst-2-english/text-classification_w8a8_config.json create mode 100644 examples/recipes/distilbert_distilbert-base-uncased/fill-mask_fp16_config.json create mode 100644 examples/recipes/distilbert_distilbert-base-uncased/fill-mask_w8a16_config.json create mode 100644 examples/recipes/distilbert_distilbert-base-uncased/fill-mask_w8a8_config.json create mode 100644 examples/recipes/dslim_bert-base-NER/token-classification_fp16_config.json create mode 100644 examples/recipes/dslim_bert-base-NER/token-classification_w8a16_config.json create mode 100644 examples/recipes/dslim_bert-base-NER/token-classification_w8a8_config.json create mode 100644 examples/recipes/facebook_convnext-tiny-224/image-classification_fp16_config.json create mode 100644 examples/recipes/facebook_convnext-tiny-224/image-classification_w8a16_config.json create mode 100644 examples/recipes/facebook_convnext-tiny-224/image-classification_w8a8_config.json create mode 100644 examples/recipes/facebook_dino-vitb16/image-feature-extraction_fp16_config.json create mode 100644 examples/recipes/facebook_dino-vitb16/image-feature-extraction_w8a16_config.json create mode 100644 examples/recipes/facebook_dino-vitb16/image-feature-extraction_w8a8_config.json create mode 100644 examples/recipes/facebook_dino-vits16/image-feature-extraction_fp16_config.json create mode 100644 examples/recipes/facebook_dino-vits16/image-feature-extraction_w8a16_config.json create mode 100644 examples/recipes/facebook_dino-vits16/image-feature-extraction_w8a8_config.json create mode 100644 examples/recipes/facebook_dinov2-large/image-feature-extraction_fp16_config.json create mode 100644 examples/recipes/facebook_dinov2-large/image-feature-extraction_w8a16_config.json create mode 100644 examples/recipes/facebook_dinov2-large/image-feature-extraction_w8a8_config.json create mode 100644 examples/recipes/google-bert_bert-base-multilingual-cased/feature-extraction_fp16_config.json create mode 100644 examples/recipes/google-bert_bert-base-multilingual-cased/feature-extraction_w8a16_config.json create mode 100644 examples/recipes/google-bert_bert-base-multilingual-cased/feature-extraction_w8a8_config.json create mode 100644 examples/recipes/google-bert_bert-base-multilingual-cased/fill-mask_fp16_config.json create mode 100644 examples/recipes/google-bert_bert-base-multilingual-cased/fill-mask_w8a16_config.json create mode 100644 examples/recipes/google-bert_bert-base-multilingual-cased/fill-mask_w8a8_config.json create mode 100644 examples/recipes/google-bert_bert-base-multilingual-uncased/fill-mask_fp16_config.json create mode 100644 examples/recipes/google-bert_bert-base-multilingual-uncased/fill-mask_w8a16_config.json create mode 100644 examples/recipes/google-bert_bert-base-multilingual-uncased/fill-mask_w8a8_config.json create mode 100644 examples/recipes/google-bert_bert-base-uncased/fill-mask_fp16_config.json create mode 100644 examples/recipes/google-bert_bert-base-uncased/fill-mask_w8a16_config.json create mode 100644 examples/recipes/google-bert_bert-base-uncased/fill-mask_w8a8_config.json create mode 100644 examples/recipes/google-bert_bert-large-uncased-whole-word-masking-finetuned-squad/question-answering_fp16_config.json create mode 100644 examples/recipes/google-bert_bert-large-uncased-whole-word-masking-finetuned-squad/question-answering_w8a16_config.json create mode 100644 examples/recipes/google-bert_bert-large-uncased-whole-word-masking-finetuned-squad/question-answering_w8a8_config.json create mode 100644 examples/recipes/google_vit-base-patch16-224/image-classification_fp16_config.json create mode 100644 examples/recipes/google_vit-base-patch16-224/image-classification_w8a16_config.json create mode 100644 examples/recipes/google_vit-base-patch16-224/image-classification_w8a8_config.json create mode 100644 examples/recipes/joeddav_xlm-roberta-large-xnli/zero-shot-classification_fp16_config.json create mode 100644 examples/recipes/joeddav_xlm-roberta-large-xnli/zero-shot-classification_w8a16_config.json create mode 100644 examples/recipes/joeddav_xlm-roberta-large-xnli/zero-shot-classification_w8a8_config.json create mode 100644 examples/recipes/mattmdjaga_segformer_b2_clothes/image-segmentation_fp16_config.json create mode 100644 examples/recipes/mattmdjaga_segformer_b2_clothes/image-segmentation_w8a16_config.json create mode 100644 examples/recipes/mattmdjaga_segformer_b2_clothes/image-segmentation_w8a8_config.json create mode 100644 examples/recipes/microsoft_resnet-18/image-classification_fp16_config.json create mode 100644 examples/recipes/microsoft_resnet-18/image-classification_w8a16_config.json create mode 100644 examples/recipes/microsoft_resnet-18/image-classification_w8a8_config.json create mode 100644 examples/recipes/microsoft_resnet-50/image-classification_fp16_config.json create mode 100644 examples/recipes/microsoft_resnet-50/image-classification_w8a16_config.json create mode 100644 examples/recipes/microsoft_resnet-50/image-classification_w8a8_config.json create mode 100644 examples/recipes/microsoft_swin-large-patch4-window7-224/image-classification_fp16_config.json create mode 100644 examples/recipes/microsoft_swin-large-patch4-window7-224/image-classification_w8a16_config.json create mode 100644 examples/recipes/microsoft_swin-large-patch4-window7-224/image-classification_w8a8_config.json create mode 100644 examples/recipes/microsoft_swinv2-tiny-patch4-window16-256/image-classification_fp16_config.json create mode 100644 examples/recipes/microsoft_swinv2-tiny-patch4-window16-256/image-classification_w8a16_config.json create mode 100644 examples/recipes/microsoft_swinv2-tiny-patch4-window16-256/image-classification_w8a8_config.json create mode 100644 examples/recipes/microsoft_trocr-base-handwritten/image-to-text_fp16_config_decoder.json create mode 100644 examples/recipes/microsoft_trocr-base-handwritten/image-to-text_fp16_config_encoder.json create mode 100644 examples/recipes/microsoft_trocr-base-printed/image-to-text_fp16_config_decoder.json create mode 100644 examples/recipes/microsoft_trocr-base-printed/image-to-text_fp16_config_encoder.json create mode 100644 examples/recipes/microsoft_trocr-large-handwritten/image-to-text_fp16_config_decoder.json create mode 100644 examples/recipes/microsoft_trocr-large-handwritten/image-to-text_fp16_config_encoder.json create mode 100644 examples/recipes/microsoft_trocr-large-printed/image-to-text_fp16_config_decoder.json create mode 100644 examples/recipes/microsoft_trocr-large-printed/image-to-text_fp16_config_encoder.json create mode 100644 examples/recipes/monologg_koelectra-small-v2-distilled-korquad-384/question-answering_fp16_config.json create mode 100644 examples/recipes/monologg_koelectra-small-v2-distilled-korquad-384/question-answering_w8a16_config.json create mode 100644 examples/recipes/monologg_koelectra-small-v2-distilled-korquad-384/question-answering_w8a8_config.json create mode 100644 examples/recipes/nvidia_segformer-b1-finetuned-ade-512-512/image-segmentation_fp16_config.json create mode 100644 examples/recipes/nvidia_segformer-b1-finetuned-ade-512-512/image-segmentation_w8a16_config.json create mode 100644 examples/recipes/nvidia_segformer-b1-finetuned-ade-512-512/image-segmentation_w8a8_config.json create mode 100644 examples/recipes/nvidia_segformer-b2-finetuned-ade-512-512/image-segmentation_fp16_config.json create mode 100644 examples/recipes/nvidia_segformer-b2-finetuned-ade-512-512/image-segmentation_w8a16_config.json create mode 100644 examples/recipes/nvidia_segformer-b2-finetuned-ade-512-512/image-segmentation_w8a8_config.json create mode 100644 examples/recipes/nvidia_segformer-b5-finetuned-ade-640-640/image-segmentation_fp16_config.json create mode 100644 examples/recipes/nvidia_segformer-b5-finetuned-ade-640-640/image-segmentation_w8a16_config.json create mode 100644 examples/recipes/nvidia_segformer-b5-finetuned-ade-640-640/image-segmentation_w8a8_config.json create mode 100644 examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_fp16_config_image-encoder.json create mode 100644 examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_fp16_config_text-encoder.json create mode 100644 examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_w8a16_config_image-encoder.json create mode 100644 examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_w8a16_config_text-encoder.json create mode 100644 examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_w8a8_config_image-encoder.json create mode 100644 examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_w8a8_config_text-encoder.json create mode 100644 examples/recipes/openai_clip-vit-base-patch32/feature-extraction_fp16_config.json create mode 100644 examples/recipes/openai_clip-vit-base-patch32/feature-extraction_w8a16_config.json create mode 100644 examples/recipes/openai_clip-vit-base-patch32/feature-extraction_w8a8_config.json create mode 100644 examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_fp16_config_image-encoder.json create mode 100644 examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_fp16_config_text-encoder.json create mode 100644 examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_w8a16_config_image-encoder.json create mode 100644 examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_w8a16_config_text-encoder.json create mode 100644 examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_w8a8_config_image-encoder.json create mode 100644 examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_w8a8_config_text-encoder.json create mode 100644 examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_fp16_config_image-encoder.json create mode 100644 examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_fp16_config_text-encoder.json create mode 100644 examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_w8a16_config_image-encoder.json create mode 100644 examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_w8a16_config_text-encoder.json create mode 100644 examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_w8a8_config_image-encoder.json create mode 100644 examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_w8a8_config_text-encoder.json create mode 100644 examples/recipes/rizvandwiki_gender-classification/image-classification_fp16_config.json create mode 100644 examples/recipes/rizvandwiki_gender-classification/image-classification_w8a16_config.json create mode 100644 examples/recipes/rizvandwiki_gender-classification/image-classification_w8a8_config.json create mode 100644 examples/recipes/sentence-transformers_all-mpnet-base-v2/feature-extraction_fp16_config.json create mode 100644 examples/recipes/sentence-transformers_all-mpnet-base-v2/feature-extraction_w8a16_config.json create mode 100644 examples/recipes/sentence-transformers_all-mpnet-base-v2/feature-extraction_w8a8_config.json create mode 100644 examples/recipes/sentence-transformers_all-mpnet-base-v2/sentence-similarity_fp16_config.json create mode 100644 examples/recipes/sentence-transformers_all-mpnet-base-v2/sentence-similarity_w8a16_config.json create mode 100644 examples/recipes/sentence-transformers_all-mpnet-base-v2/sentence-similarity_w8a8_config.json create mode 100644 examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/feature-extraction_fp16_config.json create mode 100644 examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/feature-extraction_w8a16_config.json create mode 100644 examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/feature-extraction_w8a8_config.json create mode 100644 examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/sentence-similarity_fp16_config.json create mode 100644 examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/sentence-similarity_w8a16_config.json create mode 100644 examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/sentence-similarity_w8a8_config.json create mode 100644 examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/feature-extraction_fp16_config.json create mode 100644 examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/feature-extraction_w8a16_config.json create mode 100644 examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/feature-extraction_w8a8_config.json create mode 100644 examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/sentence-similarity_fp16_config.json create mode 100644 examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/sentence-similarity_w8a16_config.json create mode 100644 examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/sentence-similarity_w8a8_config.json create mode 100644 examples/recipes/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/sentence-similarity_fp16_config.json create mode 100644 examples/recipes/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/sentence-similarity_w8a16_config.json create mode 100644 examples/recipes/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/sentence-similarity_w8a8_config.json create mode 100644 examples/recipes/w11wo_indonesian-roberta-base-posp-tagger/token-classification_fp16_config.json create mode 100644 examples/recipes/w11wo_indonesian-roberta-base-posp-tagger/token-classification_w8a16_config.json create mode 100644 examples/recipes/w11wo_indonesian-roberta-base-posp-tagger/token-classification_w8a8_config.json create mode 100644 examples/release-builtin-models.md create mode 100644 scripts/pick_builtin_recipes.py create mode 100644 scripts/rebuild_recipes_readme.py diff --git a/examples/recipes/BAAI_bge-base-en-v1.5/feature-extraction_fp16_config.json b/examples/recipes/BAAI_bge-base-en-v1.5/feature-extraction_fp16_config.json new file mode 100644 index 000000000..2da045256 --- /dev/null +++ b/examples/recipes/BAAI_bge-base-en-v1.5/feature-extraction_fp16_config.json @@ -0,0 +1,79 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "AutoModel", + "model_type": "bert" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/BAAI_bge-base-en-v1.5/feature-extraction_w8a16_config.json b/examples/recipes/BAAI_bge-base-en-v1.5/feature-extraction_w8a16_config.json new file mode 100644 index 000000000..0e0b7c083 --- /dev/null +++ b/examples/recipes/BAAI_bge-base-en-v1.5/feature-extraction_w8a16_config.json @@ -0,0 +1,96 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "feature-extraction", + "model_name": "BAAI/bge-base-en-v1.5" + }, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "AutoModel", + "model_type": "bert" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/BAAI_bge-base-en-v1.5/feature-extraction_w8a8_config.json b/examples/recipes/BAAI_bge-base-en-v1.5/feature-extraction_w8a8_config.json new file mode 100644 index 000000000..7665bf642 --- /dev/null +++ b/examples/recipes/BAAI_bge-base-en-v1.5/feature-extraction_w8a8_config.json @@ -0,0 +1,96 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "feature-extraction", + "model_name": "BAAI/bge-base-en-v1.5" + }, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "AutoModel", + "model_type": "bert" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/BAAI_bge-base-en-v1.5/sentence-similarity_fp16_config.json b/examples/recipes/BAAI_bge-base-en-v1.5/sentence-similarity_fp16_config.json new file mode 100644 index 000000000..92e4fa925 --- /dev/null +++ b/examples/recipes/BAAI_bge-base-en-v1.5/sentence-similarity_fp16_config.json @@ -0,0 +1,79 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "sentence-similarity", + "model_class": "AutoModel", + "model_type": "bert" + }, + "eval": { + "task": "sentence-similarity", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/BAAI_bge-base-en-v1.5/sentence-similarity_w8a16_config.json b/examples/recipes/BAAI_bge-base-en-v1.5/sentence-similarity_w8a16_config.json new file mode 100644 index 000000000..b75f8cd6d --- /dev/null +++ b/examples/recipes/BAAI_bge-base-en-v1.5/sentence-similarity_w8a16_config.json @@ -0,0 +1,96 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "sentence-similarity", + "model_name": "BAAI/bge-base-en-v1.5" + }, + "compile": null, + "loader": { + "task": "sentence-similarity", + "model_class": "AutoModel", + "model_type": "bert" + }, + "eval": { + "task": "sentence-similarity", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/BAAI_bge-base-en-v1.5/sentence-similarity_w8a8_config.json b/examples/recipes/BAAI_bge-base-en-v1.5/sentence-similarity_w8a8_config.json new file mode 100644 index 000000000..39c5e60a6 --- /dev/null +++ b/examples/recipes/BAAI_bge-base-en-v1.5/sentence-similarity_w8a8_config.json @@ -0,0 +1,96 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "sentence-similarity", + "model_name": "BAAI/bge-base-en-v1.5" + }, + "compile": null, + "loader": { + "task": "sentence-similarity", + "model_class": "AutoModel", + "model_type": "bert" + }, + "eval": { + "task": "sentence-similarity", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/BAAI_bge-large-en-v1.5/sentence-similarity_fp16_config.json b/examples/recipes/BAAI_bge-large-en-v1.5/sentence-similarity_fp16_config.json index eed96889f..30f1521ce 100644 --- a/examples/recipes/BAAI_bge-large-en-v1.5/sentence-similarity_fp16_config.json +++ b/examples/recipes/BAAI_bge-large-en-v1.5/sentence-similarity_fp16_config.json @@ -67,6 +67,7 @@ "dataset": { "path": "mteb/stsbenchmark-sts", "split": "test", + "samples": 100, "columns_mapping": { "input_column_1": "sentence1", "input_column_2": "sentence2", diff --git a/examples/recipes/BAAI_bge-large-en-v1.5/sentence-similarity_w8a16_config.json b/examples/recipes/BAAI_bge-large-en-v1.5/sentence-similarity_w8a16_config.json index 6f1450a96..96abec426 100644 --- a/examples/recipes/BAAI_bge-large-en-v1.5/sentence-similarity_w8a16_config.json +++ b/examples/recipes/BAAI_bge-large-en-v1.5/sentence-similarity_w8a16_config.json @@ -84,6 +84,7 @@ "dataset": { "path": "mteb/stsbenchmark-sts", "split": "test", + "samples": 1000, "columns_mapping": { "input_column_1": "sentence1", "input_column_2": "sentence2", diff --git a/examples/recipes/BAAI_bge-large-en-v1.5/sentence-similarity_w8a8_config.json b/examples/recipes/BAAI_bge-large-en-v1.5/sentence-similarity_w8a8_config.json index 275363979..a1630b546 100644 --- a/examples/recipes/BAAI_bge-large-en-v1.5/sentence-similarity_w8a8_config.json +++ b/examples/recipes/BAAI_bge-large-en-v1.5/sentence-similarity_w8a8_config.json @@ -84,6 +84,7 @@ "dataset": { "path": "mteb/stsbenchmark-sts", "split": "test", + "samples": 1000, "columns_mapping": { "input_column_1": "sentence1", "input_column_2": "sentence2", diff --git a/examples/recipes/BAAI_bge-m3/feature-extraction_fp16_config.json b/examples/recipes/BAAI_bge-m3/feature-extraction_fp16_config.json new file mode 100644 index 000000000..d7c9b8ebb --- /dev/null +++ b/examples/recipes/BAAI_bge-m3/feature-extraction_fp16_config.json @@ -0,0 +1,67 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 1024 + ], + "value_range": [ + 0, + 250002 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 1024 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "AutoModel", + "model_type": "xlm-roberta" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/BAAI_bge-m3/feature-extraction_w8a16_config.json b/examples/recipes/BAAI_bge-m3/feature-extraction_w8a16_config.json new file mode 100644 index 000000000..cc1ad56c9 --- /dev/null +++ b/examples/recipes/BAAI_bge-m3/feature-extraction_w8a16_config.json @@ -0,0 +1,84 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 1024 + ], + "value_range": [ + 0, + 250002 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 1024 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "feature-extraction", + "model_name": "BAAI/bge-m3" + }, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "AutoModel", + "model_type": "xlm-roberta" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/BAAI_bge-m3/feature-extraction_w8a8_config.json b/examples/recipes/BAAI_bge-m3/feature-extraction_w8a8_config.json new file mode 100644 index 000000000..d3cd88764 --- /dev/null +++ b/examples/recipes/BAAI_bge-m3/feature-extraction_w8a8_config.json @@ -0,0 +1,84 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 1024 + ], + "value_range": [ + 0, + 250002 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 1024 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "feature-extraction", + "model_name": "BAAI/bge-m3" + }, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "AutoModel", + "model_type": "xlm-roberta" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/BAAI_bge-m3/sentence-similarity_fp16_config.json b/examples/recipes/BAAI_bge-m3/sentence-similarity_fp16_config.json new file mode 100644 index 000000000..83f4fd1f0 --- /dev/null +++ b/examples/recipes/BAAI_bge-m3/sentence-similarity_fp16_config.json @@ -0,0 +1,67 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 1024 + ], + "value_range": [ + 0, + 250002 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 1024 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "sentence-similarity", + "model_class": "AutoModel", + "model_type": "xlm-roberta" + }, + "eval": { + "task": "sentence-similarity", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/BAAI_bge-m3/sentence-similarity_w8a16_config.json b/examples/recipes/BAAI_bge-m3/sentence-similarity_w8a16_config.json new file mode 100644 index 000000000..06a124129 --- /dev/null +++ b/examples/recipes/BAAI_bge-m3/sentence-similarity_w8a16_config.json @@ -0,0 +1,84 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 1024 + ], + "value_range": [ + 0, + 250002 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 1024 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "sentence-similarity", + "model_name": "BAAI/bge-m3" + }, + "compile": null, + "loader": { + "task": "sentence-similarity", + "model_class": "AutoModel", + "model_type": "xlm-roberta" + }, + "eval": { + "task": "sentence-similarity", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/BAAI_bge-m3/sentence-similarity_w8a8_config.json b/examples/recipes/BAAI_bge-m3/sentence-similarity_w8a8_config.json new file mode 100644 index 000000000..7e87ee267 --- /dev/null +++ b/examples/recipes/BAAI_bge-m3/sentence-similarity_w8a8_config.json @@ -0,0 +1,84 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 1024 + ], + "value_range": [ + 0, + 250002 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 1024 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "sentence-similarity", + "model_name": "BAAI/bge-m3" + }, + "compile": null, + "loader": { + "task": "sentence-similarity", + "model_class": "AutoModel", + "model_type": "xlm-roberta" + }, + "eval": { + "task": "sentence-similarity", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/BAAI_bge-small-en-v1.5/feature-extraction_fp16_config.json b/examples/recipes/BAAI_bge-small-en-v1.5/feature-extraction_fp16_config.json new file mode 100644 index 000000000..2da045256 --- /dev/null +++ b/examples/recipes/BAAI_bge-small-en-v1.5/feature-extraction_fp16_config.json @@ -0,0 +1,79 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "AutoModel", + "model_type": "bert" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/BAAI_bge-small-en-v1.5/feature-extraction_w8a16_config.json b/examples/recipes/BAAI_bge-small-en-v1.5/feature-extraction_w8a16_config.json new file mode 100644 index 000000000..c629a6ca6 --- /dev/null +++ b/examples/recipes/BAAI_bge-small-en-v1.5/feature-extraction_w8a16_config.json @@ -0,0 +1,96 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "feature-extraction", + "model_name": "BAAI/bge-small-en-v1.5" + }, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "AutoModel", + "model_type": "bert" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/BAAI_bge-small-en-v1.5/feature-extraction_w8a8_config.json b/examples/recipes/BAAI_bge-small-en-v1.5/feature-extraction_w8a8_config.json new file mode 100644 index 000000000..09510e0b9 --- /dev/null +++ b/examples/recipes/BAAI_bge-small-en-v1.5/feature-extraction_w8a8_config.json @@ -0,0 +1,96 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "feature-extraction", + "model_name": "BAAI/bge-small-en-v1.5" + }, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "AutoModel", + "model_type": "bert" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/BAAI_bge-small-en-v1.5/sentence-similarity_fp16_config.json b/examples/recipes/BAAI_bge-small-en-v1.5/sentence-similarity_fp16_config.json new file mode 100644 index 000000000..30f1521ce --- /dev/null +++ b/examples/recipes/BAAI_bge-small-en-v1.5/sentence-similarity_fp16_config.json @@ -0,0 +1,78 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "loader": { + "task": "sentence-similarity", + "model_class": "AutoModel", + "model_type": "bert" + }, + "eval": { + "task": "sentence-similarity", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/BAAI_bge-small-en-v1.5/sentence-similarity_w8a16_config.json b/examples/recipes/BAAI_bge-small-en-v1.5/sentence-similarity_w8a16_config.json new file mode 100644 index 000000000..f4423ad76 --- /dev/null +++ b/examples/recipes/BAAI_bge-small-en-v1.5/sentence-similarity_w8a16_config.json @@ -0,0 +1,95 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "sentence-similarity", + "model_name": "BAAI/bge-small-en-v1.5" + }, + "loader": { + "task": "sentence-similarity", + "model_class": "AutoModel", + "model_type": "bert" + }, + "eval": { + "task": "sentence-similarity", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/BAAI_bge-small-en-v1.5/sentence-similarity_w8a8_config.json b/examples/recipes/BAAI_bge-small-en-v1.5/sentence-similarity_w8a8_config.json new file mode 100644 index 000000000..1ec46532a --- /dev/null +++ b/examples/recipes/BAAI_bge-small-en-v1.5/sentence-similarity_w8a8_config.json @@ -0,0 +1,95 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "sentence-similarity", + "model_name": "BAAI/bge-small-en-v1.5" + }, + "loader": { + "task": "sentence-similarity", + "model_class": "AutoModel", + "model_type": "bert" + }, + "eval": { + "task": "sentence-similarity", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/Babelscape_wikineural-multilingual-ner/token-classification_fp16_config.json b/examples/recipes/Babelscape_wikineural-multilingual-ner/token-classification_fp16_config.json new file mode 100644 index 000000000..3a9c3545d --- /dev/null +++ b/examples/recipes/Babelscape_wikineural-multilingual-ner/token-classification_fp16_config.json @@ -0,0 +1,76 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 119547 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "token-classification", + "model_class": "AutoModelForTokenClassification", + "model_type": "bert" + }, + "eval": { + "task": "token-classification", + "dataset": { + "samples": 100, + "path": "BramVanroy/conll2003", + "columns_mapping": { + "label_column": "ner_tags" + } + } + } +} diff --git a/examples/recipes/Babelscape_wikineural-multilingual-ner/token-classification_w8a16_config.json b/examples/recipes/Babelscape_wikineural-multilingual-ner/token-classification_w8a16_config.json new file mode 100644 index 000000000..652d9b774 --- /dev/null +++ b/examples/recipes/Babelscape_wikineural-multilingual-ner/token-classification_w8a16_config.json @@ -0,0 +1,93 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 119547 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "token-classification", + "model_name": "Babelscape/wikineural-multilingual-ner" + }, + "compile": null, + "loader": { + "task": "token-classification", + "model_class": "AutoModelForTokenClassification", + "model_type": "bert" + }, + "eval": { + "task": "token-classification", + "dataset": { + "samples": 1000, + "path": "BramVanroy/conll2003", + "columns_mapping": { + "label_column": "ner_tags" + } + } + } +} diff --git a/examples/recipes/Babelscape_wikineural-multilingual-ner/token-classification_w8a8_config.json b/examples/recipes/Babelscape_wikineural-multilingual-ner/token-classification_w8a8_config.json new file mode 100644 index 000000000..3cf38f64f --- /dev/null +++ b/examples/recipes/Babelscape_wikineural-multilingual-ner/token-classification_w8a8_config.json @@ -0,0 +1,93 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 119547 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "token-classification", + "model_name": "Babelscape/wikineural-multilingual-ner" + }, + "compile": null, + "loader": { + "task": "token-classification", + "model_class": "AutoModelForTokenClassification", + "model_type": "bert" + }, + "eval": { + "task": "token-classification", + "dataset": { + "samples": 1000, + "path": "BramVanroy/conll2003", + "columns_mapping": { + "label_column": "ner_tags" + } + } + } +} diff --git a/examples/recipes/FacebookAI_roberta-base/fill-mask_fp16_config.json b/examples/recipes/FacebookAI_roberta-base/fill-mask_fp16_config.json new file mode 100644 index 000000000..77bafe354 --- /dev/null +++ b/examples/recipes/FacebookAI_roberta-base/fill-mask_fp16_config.json @@ -0,0 +1,65 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 50265 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "loader": { + "task": "fill-mask", + "model_class": "AutoModelForMaskedLM", + "model_type": "roberta" + }, + "eval": { + "task": "fill-mask", + "dataset": { + "path": "Salesforce/wikitext", + "name": "wikitext-2-raw-v1", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "text" + } + } + } +} diff --git a/examples/recipes/FacebookAI_roberta-base/fill-mask_w8a16_config.json b/examples/recipes/FacebookAI_roberta-base/fill-mask_w8a16_config.json new file mode 100644 index 000000000..6ed6b6001 --- /dev/null +++ b/examples/recipes/FacebookAI_roberta-base/fill-mask_w8a16_config.json @@ -0,0 +1,82 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 50265 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "fill-mask", + "model_name": "FacebookAI/roberta-base" + }, + "loader": { + "task": "fill-mask", + "model_class": "AutoModelForMaskedLM", + "model_type": "roberta" + }, + "eval": { + "task": "fill-mask", + "dataset": { + "path": "Salesforce/wikitext", + "name": "wikitext-2-raw-v1", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "text" + } + } + } +} diff --git a/examples/recipes/FacebookAI_roberta-base/fill-mask_w8a8_config.json b/examples/recipes/FacebookAI_roberta-base/fill-mask_w8a8_config.json new file mode 100644 index 000000000..21596e908 --- /dev/null +++ b/examples/recipes/FacebookAI_roberta-base/fill-mask_w8a8_config.json @@ -0,0 +1,82 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 50265 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "fill-mask", + "model_name": "FacebookAI/roberta-base" + }, + "loader": { + "task": "fill-mask", + "model_class": "AutoModelForMaskedLM", + "model_type": "roberta" + }, + "eval": { + "task": "fill-mask", + "dataset": { + "path": "Salesforce/wikitext", + "name": "wikitext-2-raw-v1", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "text" + } + } + } +} diff --git a/examples/recipes/FacebookAI_roberta-large/fill-mask_fp16_config.json b/examples/recipes/FacebookAI_roberta-large/fill-mask_fp16_config.json new file mode 100644 index 000000000..38ad865d7 --- /dev/null +++ b/examples/recipes/FacebookAI_roberta-large/fill-mask_fp16_config.json @@ -0,0 +1,66 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 50265 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "fill-mask", + "model_class": "AutoModelForMaskedLM", + "model_type": "roberta" + }, + "eval": { + "task": "fill-mask", + "dataset": { + "path": "Salesforce/wikitext", + "name": "wikitext-2-raw-v1", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "text" + } + } + } +} diff --git a/examples/recipes/FacebookAI_roberta-large/fill-mask_w8a16_config.json b/examples/recipes/FacebookAI_roberta-large/fill-mask_w8a16_config.json new file mode 100644 index 000000000..27ba8f9fb --- /dev/null +++ b/examples/recipes/FacebookAI_roberta-large/fill-mask_w8a16_config.json @@ -0,0 +1,83 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 50265 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "fill-mask", + "model_name": "FacebookAI/roberta-large" + }, + "compile": null, + "loader": { + "task": "fill-mask", + "model_class": "AutoModelForMaskedLM", + "model_type": "roberta" + }, + "eval": { + "task": "fill-mask", + "dataset": { + "path": "Salesforce/wikitext", + "name": "wikitext-2-raw-v1", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "text" + } + } + } +} diff --git a/examples/recipes/FacebookAI_roberta-large/fill-mask_w8a8_config.json b/examples/recipes/FacebookAI_roberta-large/fill-mask_w8a8_config.json new file mode 100644 index 000000000..179efa9df --- /dev/null +++ b/examples/recipes/FacebookAI_roberta-large/fill-mask_w8a8_config.json @@ -0,0 +1,83 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 50265 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "fill-mask", + "model_name": "FacebookAI/roberta-large" + }, + "compile": null, + "loader": { + "task": "fill-mask", + "model_class": "AutoModelForMaskedLM", + "model_type": "roberta" + }, + "eval": { + "task": "fill-mask", + "dataset": { + "path": "Salesforce/wikitext", + "name": "wikitext-2-raw-v1", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "text" + } + } + } +} diff --git a/examples/recipes/FacebookAI_xlm-roberta-base/fill-mask_fp16_config.json b/examples/recipes/FacebookAI_xlm-roberta-base/fill-mask_fp16_config.json new file mode 100644 index 000000000..effb5ad4d --- /dev/null +++ b/examples/recipes/FacebookAI_xlm-roberta-base/fill-mask_fp16_config.json @@ -0,0 +1,65 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 250002 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "loader": { + "task": "fill-mask", + "model_class": "AutoModelForMaskedLM", + "model_type": "xlm-roberta" + }, + "eval": { + "task": "fill-mask", + "dataset": { + "path": "Salesforce/wikitext", + "name": "wikitext-2-raw-v1", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "text" + } + } + } +} diff --git a/examples/recipes/FacebookAI_xlm-roberta-base/fill-mask_w8a16_config.json b/examples/recipes/FacebookAI_xlm-roberta-base/fill-mask_w8a16_config.json new file mode 100644 index 000000000..6a1bdfddd --- /dev/null +++ b/examples/recipes/FacebookAI_xlm-roberta-base/fill-mask_w8a16_config.json @@ -0,0 +1,82 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 250002 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "fill-mask", + "model_name": "FacebookAI/xlm-roberta-base" + }, + "loader": { + "task": "fill-mask", + "model_class": "AutoModelForMaskedLM", + "model_type": "xlm-roberta" + }, + "eval": { + "task": "fill-mask", + "dataset": { + "path": "Salesforce/wikitext", + "name": "wikitext-2-raw-v1", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "text" + } + } + } +} diff --git a/examples/recipes/FacebookAI_xlm-roberta-base/fill-mask_w8a8_config.json b/examples/recipes/FacebookAI_xlm-roberta-base/fill-mask_w8a8_config.json new file mode 100644 index 000000000..cd8be5738 --- /dev/null +++ b/examples/recipes/FacebookAI_xlm-roberta-base/fill-mask_w8a8_config.json @@ -0,0 +1,82 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 250002 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "fill-mask", + "model_name": "FacebookAI/xlm-roberta-base" + }, + "loader": { + "task": "fill-mask", + "model_class": "AutoModelForMaskedLM", + "model_type": "xlm-roberta" + }, + "eval": { + "task": "fill-mask", + "dataset": { + "path": "Salesforce/wikitext", + "name": "wikitext-2-raw-v1", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "text" + } + } + } +} diff --git a/examples/recipes/Intel_bert-base-uncased-mrpc/feature-extraction_fp16_config.json b/examples/recipes/Intel_bert-base-uncased-mrpc/feature-extraction_fp16_config.json new file mode 100644 index 000000000..2da045256 --- /dev/null +++ b/examples/recipes/Intel_bert-base-uncased-mrpc/feature-extraction_fp16_config.json @@ -0,0 +1,79 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "AutoModel", + "model_type": "bert" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/Intel_bert-base-uncased-mrpc/feature-extraction_w8a16_config.json b/examples/recipes/Intel_bert-base-uncased-mrpc/feature-extraction_w8a16_config.json new file mode 100644 index 000000000..077b9581e --- /dev/null +++ b/examples/recipes/Intel_bert-base-uncased-mrpc/feature-extraction_w8a16_config.json @@ -0,0 +1,96 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "feature-extraction", + "model_name": "Intel/bert-base-uncased-mrpc" + }, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "AutoModel", + "model_type": "bert" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/Intel_bert-base-uncased-mrpc/feature-extraction_w8a8_config.json b/examples/recipes/Intel_bert-base-uncased-mrpc/feature-extraction_w8a8_config.json new file mode 100644 index 000000000..1a1bd128f --- /dev/null +++ b/examples/recipes/Intel_bert-base-uncased-mrpc/feature-extraction_w8a8_config.json @@ -0,0 +1,96 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "feature-extraction", + "model_name": "Intel/bert-base-uncased-mrpc" + }, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "AutoModel", + "model_type": "bert" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/Intel_bert-base-uncased-mrpc/text-classification_fp16_config.json b/examples/recipes/Intel_bert-base-uncased-mrpc/text-classification_fp16_config.json new file mode 100644 index 000000000..0a6226f14 --- /dev/null +++ b/examples/recipes/Intel_bert-base-uncased-mrpc/text-classification_fp16_config.json @@ -0,0 +1,78 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "text-classification", + "model_class": "AutoModelForSequenceClassification", + "model_type": "bert" + }, + "eval": { + "task": "text-classification", + "dataset": { + "path": "nyu-mll/glue", + "name": "mrpc", + "samples": 100, + "columns_mapping": { + "input_column": "sentence1", + "second_input_column": "sentence2" + } + } + } +} diff --git a/examples/recipes/Intel_bert-base-uncased-mrpc/text-classification_w8a16_config.json b/examples/recipes/Intel_bert-base-uncased-mrpc/text-classification_w8a16_config.json new file mode 100644 index 000000000..d35a3c499 --- /dev/null +++ b/examples/recipes/Intel_bert-base-uncased-mrpc/text-classification_w8a16_config.json @@ -0,0 +1,95 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "text-classification", + "model_name": "Intel/bert-base-uncased-mrpc" + }, + "compile": null, + "loader": { + "task": "text-classification", + "model_class": "AutoModelForSequenceClassification", + "model_type": "bert" + }, + "eval": { + "task": "text-classification", + "dataset": { + "path": "nyu-mll/glue", + "name": "mrpc", + "samples": 1000, + "columns_mapping": { + "input_column": "sentence1", + "second_input_column": "sentence2" + } + } + } +} diff --git a/examples/recipes/Intel_bert-base-uncased-mrpc/text-classification_w8a8_config.json b/examples/recipes/Intel_bert-base-uncased-mrpc/text-classification_w8a8_config.json new file mode 100644 index 000000000..15280ae41 --- /dev/null +++ b/examples/recipes/Intel_bert-base-uncased-mrpc/text-classification_w8a8_config.json @@ -0,0 +1,95 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "text-classification", + "model_name": "Intel/bert-base-uncased-mrpc" + }, + "compile": null, + "loader": { + "task": "text-classification", + "model_class": "AutoModelForSequenceClassification", + "model_type": "bert" + }, + "eval": { + "task": "text-classification", + "dataset": { + "path": "nyu-mll/glue", + "name": "mrpc", + "samples": 1000, + "columns_mapping": { + "input_column": "sentence1", + "second_input_column": "sentence2" + } + } + } +} diff --git a/examples/recipes/Isotonic_distilbert_finetuned_ai4privacy_v2/token-classification_fp16_config.json b/examples/recipes/Isotonic_distilbert_finetuned_ai4privacy_v2/token-classification_fp16_config.json new file mode 100644 index 000000000..2c8e6ccc1 --- /dev/null +++ b/examples/recipes/Isotonic_distilbert_finetuned_ai4privacy_v2/token-classification_fp16_config.json @@ -0,0 +1,62 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "token-classification", + "model_class": "AutoModelForTokenClassification", + "model_type": "distilbert" + }, + "eval": { + "task": "token-classification", + "dataset": { + "samples": 100, + "path": "~/.cache/winml/eval_datasets/build_ai4privacy", + "columns_mapping": { + "label_column": "ner_tags" + }, + "build_script": "scripts/e2e_eval/datasets/build_ai4privacy.py" + } + } +} diff --git a/examples/recipes/Isotonic_distilbert_finetuned_ai4privacy_v2/token-classification_w8a16_config.json b/examples/recipes/Isotonic_distilbert_finetuned_ai4privacy_v2/token-classification_w8a16_config.json new file mode 100644 index 000000000..24755346a --- /dev/null +++ b/examples/recipes/Isotonic_distilbert_finetuned_ai4privacy_v2/token-classification_w8a16_config.json @@ -0,0 +1,80 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "token-classification", + "model_name": "Isotonic/distilbert_finetuned_ai4privacy_v2" + }, + "compile": null, + "loader": { + "task": "token-classification", + "model_class": "AutoModelForTokenClassification", + "model_type": "distilbert" + }, + "eval": { + "task": "token-classification", + "dataset": { + "samples": 1000, + "path": "~/.cache/winml/eval_datasets/build_ai4privacy", + "columns_mapping": { + "label_column": "ner_tags" + }, + "build_script": "scripts/e2e_eval/datasets/build_ai4privacy.py" + } + } +} diff --git a/examples/recipes/Isotonic_distilbert_finetuned_ai4privacy_v2/token-classification_w8a8_config.json b/examples/recipes/Isotonic_distilbert_finetuned_ai4privacy_v2/token-classification_w8a8_config.json new file mode 100644 index 000000000..e1063961f --- /dev/null +++ b/examples/recipes/Isotonic_distilbert_finetuned_ai4privacy_v2/token-classification_w8a8_config.json @@ -0,0 +1,80 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "token-classification", + "model_name": "Isotonic/distilbert_finetuned_ai4privacy_v2" + }, + "compile": null, + "loader": { + "task": "token-classification", + "model_class": "AutoModelForTokenClassification", + "model_type": "distilbert" + }, + "eval": { + "task": "token-classification", + "dataset": { + "samples": 1000, + "path": "~/.cache/winml/eval_datasets/build_ai4privacy", + "columns_mapping": { + "label_column": "ner_tags" + }, + "build_script": "scripts/e2e_eval/datasets/build_ai4privacy.py" + } + } +} diff --git a/examples/recipes/ProsusAI_finbert/text-classification_fp16_config.json b/examples/recipes/ProsusAI_finbert/text-classification_fp16_config.json new file mode 100644 index 000000000..ef2099d4d --- /dev/null +++ b/examples/recipes/ProsusAI_finbert/text-classification_fp16_config.json @@ -0,0 +1,74 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "text-classification", + "model_class": "AutoModelForSequenceClassification", + "model_type": "bert" + }, + "eval": { + "task": "text-classification", + "dataset": { + "path": "privet1mir/finbert_dataset", + "split": "val", + "samples": 100 + } + } +} diff --git a/examples/recipes/ProsusAI_finbert/text-classification_w8a16_config.json b/examples/recipes/ProsusAI_finbert/text-classification_w8a16_config.json new file mode 100644 index 000000000..6290e8fff --- /dev/null +++ b/examples/recipes/ProsusAI_finbert/text-classification_w8a16_config.json @@ -0,0 +1,91 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "text-classification", + "model_name": "ProsusAI/finbert" + }, + "compile": null, + "loader": { + "task": "text-classification", + "model_class": "AutoModelForSequenceClassification", + "model_type": "bert" + }, + "eval": { + "task": "text-classification", + "dataset": { + "path": "privet1mir/finbert_dataset", + "split": "val", + "samples": 1000 + } + } +} diff --git a/examples/recipes/ProsusAI_finbert/text-classification_w8a8_config.json b/examples/recipes/ProsusAI_finbert/text-classification_w8a8_config.json new file mode 100644 index 000000000..e921e92dd --- /dev/null +++ b/examples/recipes/ProsusAI_finbert/text-classification_w8a8_config.json @@ -0,0 +1,91 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "text-classification", + "model_name": "ProsusAI/finbert" + }, + "compile": null, + "loader": { + "task": "text-classification", + "model_class": "AutoModelForSequenceClassification", + "model_type": "bert" + }, + "eval": { + "task": "text-classification", + "dataset": { + "path": "privet1mir/finbert_dataset", + "split": "val", + "samples": 1000 + } + } +} diff --git a/examples/recipes/README.md b/examples/recipes/README.md index caaa2c15f..e0393a07d 100644 --- a/examples/recipes/README.md +++ b/examples/recipes/README.md @@ -1,4 +1,4 @@ -# Built-in Model Recipes +# Built-in Model Recipes Curated recipe configuration samples for **portable, high-performance, and high-quality** AI models on Windows ML, working consistently across supported EPs. @@ -14,17 +14,82 @@ Each *(model, task)* includes: ## Models +Total: **75** (model, task) tuples that pass fp16 eval on all 10 (EP, device) buckets. + | Model | Task | |---|---| +| BAAI/bge-base-en-v1.5 | feature-extraction | +| BAAI/bge-base-en-v1.5 | sentence-similarity | | BAAI/bge-large-en-v1.5 | sentence-similarity | +| BAAI/bge-m3 | feature-extraction | +| BAAI/bge-m3 | sentence-similarity | +| BAAI/bge-small-en-v1.5 | feature-extraction | +| BAAI/bge-small-en-v1.5 | sentence-similarity | +| Babelscape/wikineural-multilingual-ner | token-classification | +| FacebookAI/roberta-base | fill-mask | +| FacebookAI/roberta-large | fill-mask | +| FacebookAI/xlm-roberta-base | fill-mask | +| Intel/bert-base-uncased-mrpc | feature-extraction | +| Intel/bert-base-uncased-mrpc | text-classification | +| Isotonic/distilbert_finetuned_ai4privacy_v2 | token-classification | +| ProsusAI/finbert | text-classification | +| Salesforce/blip-image-captioning-base | image-to-text | +| StanfordAIMI/dinov2-base-xray-224 | image-feature-extraction | +| ahotrod/electra_large_discriminator_squad2_512 | question-answering | +| apple/mobilevit-small | image-classification | | cardiffnlp/twitter-roberta-base-sentiment-latest | text-classification | +| dbmdz/bert-large-cased-finetuned-conll03-english | token-classification | +| deepset/bert-large-uncased-whole-word-masking-squad2 | question-answering | | deepset/roberta-base-squad2 | question-answering | | deepset/tinyroberta-squad2 | question-answering | +| dima806/fairface_age_image_detection | image-classification | +| distilbert/distilbert-base-cased-distilled-squad | question-answering | +| distilbert/distilbert-base-uncased | fill-mask | +| distilbert/distilbert-base-uncased-distilled-squad | question-answering | +| distilbert/distilbert-base-uncased-finetuned-sst-2-english | text-classification | +| dslim/bert-base-NER | token-classification | +| facebook/convnext-tiny-224 | image-classification | +| facebook/dino-vitb16 | image-feature-extraction | +| facebook/dino-vits16 | image-feature-extraction | | facebook/dinov2-base | image-feature-extraction | +| facebook/dinov2-large | image-feature-extraction | | facebook/dinov2-small | image-feature-extraction | +| google-bert/bert-base-multilingual-cased | feature-extraction | +| google-bert/bert-base-multilingual-cased | fill-mask | +| google-bert/bert-base-multilingual-uncased | fill-mask | +| google-bert/bert-base-uncased | fill-mask | +| google-bert/bert-large-uncased-whole-word-masking-finetuned-squad | question-answering | +| google/vit-base-patch16-224 | image-classification | | google/vit-base-patch16-224-in21k | image-feature-extraction | +| joeddav/xlm-roberta-large-xnli | zero-shot-classification | | laion/CLIP-ViT-B-32-laion2B-s34B-b79K | feature-extraction | +| mattmdjaga/segformer_b2_clothes | image-segmentation | | microsoft/rad-dino | image-feature-extraction | +| microsoft/resnet-18 | image-classification | +| microsoft/resnet-50 | image-classification | +| microsoft/swin-large-patch4-window7-224 | image-classification | +| microsoft/swinv2-tiny-patch4-window16-256 | image-classification | +| microsoft/trocr-base-handwritten | image-to-text | +| microsoft/trocr-base-printed | image-to-text | +| microsoft/trocr-large-handwritten | image-to-text | +| microsoft/trocr-large-printed | image-to-text | +| monologg/koelectra-small-v2-distilled-korquad-384 | question-answering | +| nvidia/segformer-b1-finetuned-ade-512-512 | image-segmentation | +| nvidia/segformer-b2-finetuned-ade-512-512 | image-segmentation | +| nvidia/segformer-b5-finetuned-ade-640-640 | image-segmentation | | openai/clip-vit-base-patch16 | feature-extraction | +| openai/clip-vit-base-patch16 | zero-shot-image-classification | +| openai/clip-vit-base-patch32 | feature-extraction | +| openai/clip-vit-large-patch14 | zero-shot-image-classification | +| openai/clip-vit-large-patch14-336 | zero-shot-image-classification | +| rizvandwiki/gender-classification | image-classification | | sentence-transformers/all-MiniLM-L6-v2 | feature-extraction | | sentence-transformers/all-MiniLM-L6-v2 | sentence-similarity | +| sentence-transformers/all-mpnet-base-v2 | feature-extraction | +| sentence-transformers/all-mpnet-base-v2 | sentence-similarity | +| sentence-transformers/multi-qa-mpnet-base-dot-v1 | feature-extraction | +| sentence-transformers/multi-qa-mpnet-base-dot-v1 | sentence-similarity | +| sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 | feature-extraction | +| sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 | sentence-similarity | +| sentence-transformers/paraphrase-multilingual-mpnet-base-v2 | sentence-similarity | +| w11wo/indonesian-roberta-base-posp-tagger | token-classification | diff --git a/examples/recipes/Salesforce_blip-image-captioning-base/image-to-text_fp16_config_decoder.json b/examples/recipes/Salesforce_blip-image-captioning-base/image-to-text_fp16_config_decoder.json new file mode 100644 index 000000000..3f9be38a6 --- /dev/null +++ b/examples/recipes/Salesforce_blip-image-captioning-base/image-to-text_fp16_config_decoder.json @@ -0,0 +1,492 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "decoder_input_ids", + "dtype": "int32", + "shape": [ + 1, + 1 + ], + "value_range": [ + 0, + 30524 + ] + }, + { + "name": "encoder_hidden_states", + "dtype": "float32", + "shape": [ + 1, + 577, + 768 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "decoder_attention_mask", + "dtype": "int64", + "shape": [ + 1, + 512 + ] + }, + { + "name": "cache_position", + "dtype": "int64", + "shape": [ + 1 + ] + }, + { + "name": "past_0_key", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_0_value", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_1_key", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_1_value", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_2_key", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_2_value", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_3_key", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_3_value", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_4_key", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_4_value", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_5_key", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_5_value", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_6_key", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_6_value", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_7_key", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_7_value", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_8_key", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_8_value", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_9_key", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_9_value", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_10_key", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_10_value", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_11_key", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_11_value", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + }, + { + "name": "present_0_key" + }, + { + "name": "present_0_value" + }, + { + "name": "present_1_key" + }, + { + "name": "present_1_value" + }, + { + "name": "present_2_key" + }, + { + "name": "present_2_value" + }, + { + "name": "present_3_key" + }, + { + "name": "present_3_value" + }, + { + "name": "present_4_key" + }, + { + "name": "present_4_value" + }, + { + "name": "present_5_key" + }, + { + "name": "present_5_value" + }, + { + "name": "present_6_key" + }, + { + "name": "present_6_value" + }, + { + "name": "present_7_key" + }, + { + "name": "present_7_value" + }, + { + "name": "present_8_key" + }, + { + "name": "present_8_value" + }, + { + "name": "present_9_key" + }, + { + "name": "present_9_value" + }, + { + "name": "present_10_key" + }, + { + "name": "present_10_value" + }, + { + "name": "present_11_key" + }, + { + "name": "present_11_value" + } + ] + }, + "optim": { + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "text2text-generation", + "model_class": "BlipDecoderWrapper", + "model_type": "blip" + }, + "eval": { + "task": "image-to-text", + "dataset": { + "path": "lmms-lab/flickr30k", + "split": "test", + "samples": 500, + "columns_mapping": { + "input_column": "image", + "label_column": "caption" + } + } + } +} diff --git a/examples/recipes/Salesforce_blip-image-captioning-base/image-to-text_fp16_config_encoder.json b/examples/recipes/Salesforce_blip-image-captioning-base/image-to-text_fp16_config_encoder.json new file mode 100644 index 000000000..12f595574 --- /dev/null +++ b/examples/recipes/Salesforce_blip-image-captioning-base/image-to-text_fp16_config_encoder.json @@ -0,0 +1,58 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 384, + 384 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "encoder_hidden_states" + } + ] + }, + "optim": { + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "image-feature-extraction", + "model_class": "BlipVisionEncoderWrapper", + "model_type": "blip" + }, + "eval": { + "task": "image-to-text", + "dataset": { + "path": "lmms-lab/flickr30k", + "split": "test", + "samples": 500, + "columns_mapping": { + "input_column": "image", + "label_column": "caption" + } + } + } +} diff --git a/examples/recipes/StanfordAIMI_dinov2-base-xray-224/image-feature-extraction_fp16_config.json b/examples/recipes/StanfordAIMI_dinov2-base-xray-224/image-feature-extraction_fp16_config.json new file mode 100644 index 000000000..e503c77d9 --- /dev/null +++ b/examples/recipes/StanfordAIMI_dinov2-base-xray-224/image-feature-extraction_fp16_config.json @@ -0,0 +1,50 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": null, + "compile": null, + "loader": { + "task": "image-feature-extraction", + "model_class": "AutoModel", + "model_type": "dinov2" + }, + "eval": { + "task": "image-feature-extraction", + "dataset": { + "path": "Ewakaa/pneumonia_classification_chest_xray", + "split": "test", + "samples": 582 + } + } +} diff --git a/examples/recipes/StanfordAIMI_dinov2-base-xray-224/image-feature-extraction_w8a16_config.json b/examples/recipes/StanfordAIMI_dinov2-base-xray-224/image-feature-extraction_w8a16_config.json new file mode 100644 index 000000000..77a6331c7 --- /dev/null +++ b/examples/recipes/StanfordAIMI_dinov2-base-xray-224/image-feature-extraction_w8a16_config.json @@ -0,0 +1,67 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-feature-extraction", + "model_name": "StanfordAIMI/dinov2-base-xray-224" + }, + "compile": null, + "loader": { + "task": "image-feature-extraction", + "model_class": "AutoModel", + "model_type": "dinov2" + }, + "eval": { + "task": "image-feature-extraction", + "dataset": { + "path": "Ewakaa/pneumonia_classification_chest_xray", + "split": "test", + "samples": 582 + } + } +} diff --git a/examples/recipes/StanfordAIMI_dinov2-base-xray-224/image-feature-extraction_w8a8_config.json b/examples/recipes/StanfordAIMI_dinov2-base-xray-224/image-feature-extraction_w8a8_config.json new file mode 100644 index 000000000..39ffd70dd --- /dev/null +++ b/examples/recipes/StanfordAIMI_dinov2-base-xray-224/image-feature-extraction_w8a8_config.json @@ -0,0 +1,67 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-feature-extraction", + "model_name": "StanfordAIMI/dinov2-base-xray-224" + }, + "compile": null, + "loader": { + "task": "image-feature-extraction", + "model_class": "AutoModel", + "model_type": "dinov2" + }, + "eval": { + "task": "image-feature-extraction", + "dataset": { + "path": "Ewakaa/pneumonia_classification_chest_xray", + "split": "test", + "samples": 582 + } + } +} diff --git a/examples/recipes/ahotrod_electra_large_discriminator_squad2_512/question-answering_fp16_config.json b/examples/recipes/ahotrod_electra_large_discriminator_squad2_512/question-answering_fp16_config.json new file mode 100644 index 000000000..af32cfc6e --- /dev/null +++ b/examples/recipes/ahotrod_electra_large_discriminator_squad2_512/question-answering_fp16_config.json @@ -0,0 +1,80 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "start_logits" + }, + { + "name": "end_logits" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "question-answering", + "model_class": "AutoModelForQuestionAnswering", + "model_type": "electra" + }, + "eval": { + "task": "question-answering", + "dataset": { + "path": "rajpurkar/squad_v2", + "split": "validation", + "samples": 100, + "columns_mapping": { + "question_column": "question", + "context_column": "context", + "id_column": "id", + "label_column": "answers" + } + } + } +} diff --git a/examples/recipes/ahotrod_electra_large_discriminator_squad2_512/question-answering_w8a16_config.json b/examples/recipes/ahotrod_electra_large_discriminator_squad2_512/question-answering_w8a16_config.json new file mode 100644 index 000000000..41a80cc35 --- /dev/null +++ b/examples/recipes/ahotrod_electra_large_discriminator_squad2_512/question-answering_w8a16_config.json @@ -0,0 +1,98 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "start_logits" + }, + { + "name": "end_logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "question-answering", + "model_name": "ahotrod/electra_large_discriminator_squad2_512" + }, + "compile": null, + "loader": { + "task": "question-answering", + "model_class": "AutoModelForQuestionAnswering", + "model_type": "electra" + }, + "eval": { + "task": "question-answering", + "dataset": { + "path": "rajpurkar/squad_v2", + "split": "validation", + "samples": 1000, + "columns_mapping": { + "question_column": "question", + "context_column": "context", + "id_column": "id", + "label_column": "answers" + } + } + } +} diff --git a/examples/recipes/ahotrod_electra_large_discriminator_squad2_512/question-answering_w8a8_config.json b/examples/recipes/ahotrod_electra_large_discriminator_squad2_512/question-answering_w8a8_config.json new file mode 100644 index 000000000..5bcf12382 --- /dev/null +++ b/examples/recipes/ahotrod_electra_large_discriminator_squad2_512/question-answering_w8a8_config.json @@ -0,0 +1,98 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "start_logits" + }, + { + "name": "end_logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "question-answering", + "model_name": "ahotrod/electra_large_discriminator_squad2_512" + }, + "compile": null, + "loader": { + "task": "question-answering", + "model_class": "AutoModelForQuestionAnswering", + "model_type": "electra" + }, + "eval": { + "task": "question-answering", + "dataset": { + "path": "rajpurkar/squad_v2", + "split": "validation", + "samples": 1000, + "columns_mapping": { + "question_column": "question", + "context_column": "context", + "id_column": "id", + "label_column": "answers" + } + } + } +} diff --git a/examples/recipes/apple_mobilevit-small/image-classification_fp16_config.json b/examples/recipes/apple_mobilevit-small/image-classification_fp16_config.json new file mode 100644 index 000000000..ff6060d8e --- /dev/null +++ b/examples/recipes/apple_mobilevit-small/image-classification_fp16_config.json @@ -0,0 +1,49 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 256, + 256 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "mobilevit" + }, + "eval": { + "task": "image-classification", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 100 + } + } +} diff --git a/examples/recipes/apple_mobilevit-small/image-classification_w8a16_config.json b/examples/recipes/apple_mobilevit-small/image-classification_w8a16_config.json new file mode 100644 index 000000000..034af6e9a --- /dev/null +++ b/examples/recipes/apple_mobilevit-small/image-classification_w8a16_config.json @@ -0,0 +1,67 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 256, + 256 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-classification", + "model_name": "apple/mobilevit-small" + }, + "compile": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "mobilevit" + }, + "eval": { + "task": "image-classification", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 1000 + } + } +} diff --git a/examples/recipes/apple_mobilevit-small/image-classification_w8a8_config.json b/examples/recipes/apple_mobilevit-small/image-classification_w8a8_config.json new file mode 100644 index 000000000..85332f723 --- /dev/null +++ b/examples/recipes/apple_mobilevit-small/image-classification_w8a8_config.json @@ -0,0 +1,67 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 256, + 256 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-classification", + "model_name": "apple/mobilevit-small" + }, + "compile": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "mobilevit" + }, + "eval": { + "task": "image-classification", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 1000 + } + } +} diff --git a/examples/recipes/cardiffnlp_twitter-roberta-base-sentiment-latest/text-classification_fp16_config.json b/examples/recipes/cardiffnlp_twitter-roberta-base-sentiment-latest/text-classification_fp16_config.json index 186a6cbb8..8de0a638d 100644 --- a/examples/recipes/cardiffnlp_twitter-roberta-base-sentiment-latest/text-classification_fp16_config.json +++ b/examples/recipes/cardiffnlp_twitter-roberta-base-sentiment-latest/text-classification_fp16_config.json @@ -55,6 +55,7 @@ "dataset": { "path": "tweet_eval", "name": "sentiment", + "samples": 100, "columns_mapping": { "input_column": "text" } diff --git a/examples/recipes/cardiffnlp_twitter-roberta-base-sentiment-latest/text-classification_w8a16_config.json b/examples/recipes/cardiffnlp_twitter-roberta-base-sentiment-latest/text-classification_w8a16_config.json index b96d6b2eb..f6b9ea686 100644 --- a/examples/recipes/cardiffnlp_twitter-roberta-base-sentiment-latest/text-classification_w8a16_config.json +++ b/examples/recipes/cardiffnlp_twitter-roberta-base-sentiment-latest/text-classification_w8a16_config.json @@ -72,6 +72,7 @@ "dataset": { "path": "tweet_eval", "name": "sentiment", + "samples": 1000, "columns_mapping": { "input_column": "text" } diff --git a/examples/recipes/cardiffnlp_twitter-roberta-base-sentiment-latest/text-classification_w8a8_config.json b/examples/recipes/cardiffnlp_twitter-roberta-base-sentiment-latest/text-classification_w8a8_config.json index 2cb7114d2..5bf569f6b 100644 --- a/examples/recipes/cardiffnlp_twitter-roberta-base-sentiment-latest/text-classification_w8a8_config.json +++ b/examples/recipes/cardiffnlp_twitter-roberta-base-sentiment-latest/text-classification_w8a8_config.json @@ -72,6 +72,7 @@ "dataset": { "path": "tweet_eval", "name": "sentiment", + "samples": 1000, "columns_mapping": { "input_column": "text" } diff --git a/examples/recipes/dbmdz_bert-large-cased-finetuned-conll03-english/token-classification_fp16_config.json b/examples/recipes/dbmdz_bert-large-cased-finetuned-conll03-english/token-classification_fp16_config.json new file mode 100644 index 000000000..6e57f58db --- /dev/null +++ b/examples/recipes/dbmdz_bert-large-cased-finetuned-conll03-english/token-classification_fp16_config.json @@ -0,0 +1,76 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 28996 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "token-classification", + "model_class": "AutoModelForTokenClassification", + "model_type": "bert" + }, + "eval": { + "task": "token-classification", + "dataset": { + "samples": 100, + "path": "BramVanroy/conll2003", + "columns_mapping": { + "label_column": "ner_tags" + } + } + } +} diff --git a/examples/recipes/dbmdz_bert-large-cased-finetuned-conll03-english/token-classification_w8a16_config.json b/examples/recipes/dbmdz_bert-large-cased-finetuned-conll03-english/token-classification_w8a16_config.json new file mode 100644 index 000000000..195a9ddb6 --- /dev/null +++ b/examples/recipes/dbmdz_bert-large-cased-finetuned-conll03-english/token-classification_w8a16_config.json @@ -0,0 +1,93 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 28996 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "token-classification", + "model_name": "dbmdz/bert-large-cased-finetuned-conll03-english" + }, + "compile": null, + "loader": { + "task": "token-classification", + "model_class": "AutoModelForTokenClassification", + "model_type": "bert" + }, + "eval": { + "task": "token-classification", + "dataset": { + "samples": 1000, + "path": "BramVanroy/conll2003", + "columns_mapping": { + "label_column": "ner_tags" + } + } + } +} diff --git a/examples/recipes/dbmdz_bert-large-cased-finetuned-conll03-english/token-classification_w8a8_config.json b/examples/recipes/dbmdz_bert-large-cased-finetuned-conll03-english/token-classification_w8a8_config.json new file mode 100644 index 000000000..d91a0863b --- /dev/null +++ b/examples/recipes/dbmdz_bert-large-cased-finetuned-conll03-english/token-classification_w8a8_config.json @@ -0,0 +1,93 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 28996 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "token-classification", + "model_name": "dbmdz/bert-large-cased-finetuned-conll03-english" + }, + "compile": null, + "loader": { + "task": "token-classification", + "model_class": "AutoModelForTokenClassification", + "model_type": "bert" + }, + "eval": { + "task": "token-classification", + "dataset": { + "samples": 1000, + "path": "BramVanroy/conll2003", + "columns_mapping": { + "label_column": "ner_tags" + } + } + } +} diff --git a/examples/recipes/deepset_bert-large-uncased-whole-word-masking-squad2/question-answering_fp16_config.json b/examples/recipes/deepset_bert-large-uncased-whole-word-masking-squad2/question-answering_fp16_config.json new file mode 100644 index 000000000..ce04fa559 --- /dev/null +++ b/examples/recipes/deepset_bert-large-uncased-whole-word-masking-squad2/question-answering_fp16_config.json @@ -0,0 +1,82 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "start_logits" + }, + { + "name": "end_logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "loader": { + "task": "question-answering", + "model_class": "AutoModelForQuestionAnswering", + "model_type": "bert" + }, + "eval": { + "task": "question-answering", + "dataset": { + "path": "rajpurkar/squad_v2", + "split": "validation", + "samples": 100, + "columns_mapping": { + "question_column": "question", + "context_column": "context", + "id_column": "id", + "label_column": "answers" + } + } + } +} diff --git a/examples/recipes/deepset_bert-large-uncased-whole-word-masking-squad2/question-answering_w8a16_config.json b/examples/recipes/deepset_bert-large-uncased-whole-word-masking-squad2/question-answering_w8a16_config.json new file mode 100644 index 000000000..ff515ccdc --- /dev/null +++ b/examples/recipes/deepset_bert-large-uncased-whole-word-masking-squad2/question-answering_w8a16_config.json @@ -0,0 +1,99 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "start_logits" + }, + { + "name": "end_logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "question-answering", + "model_name": "deepset/bert-large-uncased-whole-word-masking-squad2" + }, + "loader": { + "task": "question-answering", + "model_class": "AutoModelForQuestionAnswering", + "model_type": "bert" + }, + "eval": { + "task": "question-answering", + "dataset": { + "path": "rajpurkar/squad_v2", + "split": "validation", + "samples": 1000, + "columns_mapping": { + "question_column": "question", + "context_column": "context", + "id_column": "id", + "label_column": "answers" + } + } + } +} diff --git a/examples/recipes/deepset_bert-large-uncased-whole-word-masking-squad2/question-answering_w8a8_config.json b/examples/recipes/deepset_bert-large-uncased-whole-word-masking-squad2/question-answering_w8a8_config.json new file mode 100644 index 000000000..1e065c665 --- /dev/null +++ b/examples/recipes/deepset_bert-large-uncased-whole-word-masking-squad2/question-answering_w8a8_config.json @@ -0,0 +1,99 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "start_logits" + }, + { + "name": "end_logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "question-answering", + "model_name": "deepset/bert-large-uncased-whole-word-masking-squad2" + }, + "loader": { + "task": "question-answering", + "model_class": "AutoModelForQuestionAnswering", + "model_type": "bert" + }, + "eval": { + "task": "question-answering", + "dataset": { + "path": "rajpurkar/squad_v2", + "split": "validation", + "samples": 1000, + "columns_mapping": { + "question_column": "question", + "context_column": "context", + "id_column": "id", + "label_column": "answers" + } + } + } +} diff --git a/examples/recipes/deepset_roberta-base-squad2/question-answering_fp16_config.json b/examples/recipes/deepset_roberta-base-squad2/question-answering_fp16_config.json index e97d94cb3..1c8a70440 100644 --- a/examples/recipes/deepset_roberta-base-squad2/question-answering_fp16_config.json +++ b/examples/recipes/deepset_roberta-base-squad2/question-answering_fp16_config.json @@ -58,6 +58,7 @@ "dataset": { "path": "rajpurkar/squad_v2", "split": "validation", + "samples": 100, "columns_mapping": { "question_column": "question", "context_column": "context", diff --git a/examples/recipes/deepset_roberta-base-squad2/question-answering_w8a16_config.json b/examples/recipes/deepset_roberta-base-squad2/question-answering_w8a16_config.json index 5fdbafca2..53deef516 100644 --- a/examples/recipes/deepset_roberta-base-squad2/question-answering_w8a16_config.json +++ b/examples/recipes/deepset_roberta-base-squad2/question-answering_w8a16_config.json @@ -75,6 +75,7 @@ "dataset": { "path": "rajpurkar/squad_v2", "split": "validation", + "samples": 1000, "columns_mapping": { "question_column": "question", "context_column": "context", diff --git a/examples/recipes/deepset_roberta-base-squad2/question-answering_w8a8_config.json b/examples/recipes/deepset_roberta-base-squad2/question-answering_w8a8_config.json index 60f6039f1..01a786971 100644 --- a/examples/recipes/deepset_roberta-base-squad2/question-answering_w8a8_config.json +++ b/examples/recipes/deepset_roberta-base-squad2/question-answering_w8a8_config.json @@ -75,6 +75,7 @@ "dataset": { "path": "rajpurkar/squad_v2", "split": "validation", + "samples": 1000, "columns_mapping": { "question_column": "question", "context_column": "context", diff --git a/examples/recipes/deepset_tinyroberta-squad2/question-answering_fp16_config.json b/examples/recipes/deepset_tinyroberta-squad2/question-answering_fp16_config.json index e97d94cb3..1c8a70440 100644 --- a/examples/recipes/deepset_tinyroberta-squad2/question-answering_fp16_config.json +++ b/examples/recipes/deepset_tinyroberta-squad2/question-answering_fp16_config.json @@ -58,6 +58,7 @@ "dataset": { "path": "rajpurkar/squad_v2", "split": "validation", + "samples": 100, "columns_mapping": { "question_column": "question", "context_column": "context", diff --git a/examples/recipes/deepset_tinyroberta-squad2/question-answering_w8a16_config.json b/examples/recipes/deepset_tinyroberta-squad2/question-answering_w8a16_config.json index 7a006bd7f..38969b7ab 100644 --- a/examples/recipes/deepset_tinyroberta-squad2/question-answering_w8a16_config.json +++ b/examples/recipes/deepset_tinyroberta-squad2/question-answering_w8a16_config.json @@ -75,6 +75,7 @@ "dataset": { "path": "rajpurkar/squad_v2", "split": "validation", + "samples": 1000, "columns_mapping": { "question_column": "question", "context_column": "context", diff --git a/examples/recipes/deepset_tinyroberta-squad2/question-answering_w8a8_config.json b/examples/recipes/deepset_tinyroberta-squad2/question-answering_w8a8_config.json index 0d8e54344..9322f582c 100644 --- a/examples/recipes/deepset_tinyroberta-squad2/question-answering_w8a8_config.json +++ b/examples/recipes/deepset_tinyroberta-squad2/question-answering_w8a8_config.json @@ -75,6 +75,7 @@ "dataset": { "path": "rajpurkar/squad_v2", "split": "validation", + "samples": 1000, "columns_mapping": { "question_column": "question", "context_column": "context", diff --git a/examples/recipes/dima806_fairface_age_image_detection/image-classification_fp16_config.json b/examples/recipes/dima806_fairface_age_image_detection/image-classification_fp16_config.json new file mode 100644 index 000000000..08b771455 --- /dev/null +++ b/examples/recipes/dima806_fairface_age_image_detection/image-classification_fp16_config.json @@ -0,0 +1,52 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "vit" + }, + "eval": { + "task": "image-classification", + "dataset": { + "samples": 100, + "path": "~/.cache/winml/eval_datasets/build_fairface", + "columns_mapping": { + "label_column": "age" + }, + "build_script": "scripts/e2e_eval/datasets/build_fairface.py" + } + } +} diff --git a/examples/recipes/dima806_fairface_age_image_detection/image-classification_w8a16_config.json b/examples/recipes/dima806_fairface_age_image_detection/image-classification_w8a16_config.json new file mode 100644 index 000000000..379bc4caf --- /dev/null +++ b/examples/recipes/dima806_fairface_age_image_detection/image-classification_w8a16_config.json @@ -0,0 +1,70 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-classification", + "model_name": "dima806/fairface_age_image_detection" + }, + "compile": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "vit" + }, + "eval": { + "task": "image-classification", + "dataset": { + "samples": 1000, + "path": "~/.cache/winml/eval_datasets/build_fairface", + "columns_mapping": { + "label_column": "age" + }, + "build_script": "scripts/e2e_eval/datasets/build_fairface.py" + } + } +} diff --git a/examples/recipes/dima806_fairface_age_image_detection/image-classification_w8a8_config.json b/examples/recipes/dima806_fairface_age_image_detection/image-classification_w8a8_config.json new file mode 100644 index 000000000..fcf6d4db5 --- /dev/null +++ b/examples/recipes/dima806_fairface_age_image_detection/image-classification_w8a8_config.json @@ -0,0 +1,70 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-classification", + "model_name": "dima806/fairface_age_image_detection" + }, + "compile": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "vit" + }, + "eval": { + "task": "image-classification", + "dataset": { + "samples": 1000, + "path": "~/.cache/winml/eval_datasets/build_fairface", + "columns_mapping": { + "label_column": "age" + }, + "build_script": "scripts/e2e_eval/datasets/build_fairface.py" + } + } +} diff --git a/examples/recipes/distilbert_distilbert-base-cased-distilled-squad/question-answering_fp16_config.json b/examples/recipes/distilbert_distilbert-base-cased-distilled-squad/question-answering_fp16_config.json new file mode 100644 index 000000000..605ec3526 --- /dev/null +++ b/examples/recipes/distilbert_distilbert-base-cased-distilled-squad/question-answering_fp16_config.json @@ -0,0 +1,68 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 28996 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "start_logits" + }, + { + "name": "end_logits" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "question-answering", + "model_class": "AutoModelForQuestionAnswering", + "model_type": "distilbert" + }, + "eval": { + "task": "question-answering", + "dataset": { + "path": "rajpurkar/squad", + "split": "validation", + "samples": 100, + "columns_mapping": { + "question_column": "question", + "context_column": "context", + "id_column": "id", + "label_column": "answers" + } + } + } +} diff --git a/examples/recipes/distilbert_distilbert-base-cased-distilled-squad/question-answering_w8a16_config.json b/examples/recipes/distilbert_distilbert-base-cased-distilled-squad/question-answering_w8a16_config.json new file mode 100644 index 000000000..784ccc775 --- /dev/null +++ b/examples/recipes/distilbert_distilbert-base-cased-distilled-squad/question-answering_w8a16_config.json @@ -0,0 +1,86 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 28996 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "start_logits" + }, + { + "name": "end_logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "question-answering", + "model_name": "distilbert/distilbert-base-cased-distilled-squad" + }, + "compile": null, + "loader": { + "task": "question-answering", + "model_class": "AutoModelForQuestionAnswering", + "model_type": "distilbert" + }, + "eval": { + "task": "question-answering", + "dataset": { + "path": "rajpurkar/squad", + "split": "validation", + "samples": 1000, + "columns_mapping": { + "question_column": "question", + "context_column": "context", + "id_column": "id", + "label_column": "answers" + } + } + } +} diff --git a/examples/recipes/distilbert_distilbert-base-cased-distilled-squad/question-answering_w8a8_config.json b/examples/recipes/distilbert_distilbert-base-cased-distilled-squad/question-answering_w8a8_config.json new file mode 100644 index 000000000..c6386c944 --- /dev/null +++ b/examples/recipes/distilbert_distilbert-base-cased-distilled-squad/question-answering_w8a8_config.json @@ -0,0 +1,86 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 28996 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "start_logits" + }, + { + "name": "end_logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "question-answering", + "model_name": "distilbert/distilbert-base-cased-distilled-squad" + }, + "compile": null, + "loader": { + "task": "question-answering", + "model_class": "AutoModelForQuestionAnswering", + "model_type": "distilbert" + }, + "eval": { + "task": "question-answering", + "dataset": { + "path": "rajpurkar/squad", + "split": "validation", + "samples": 1000, + "columns_mapping": { + "question_column": "question", + "context_column": "context", + "id_column": "id", + "label_column": "answers" + } + } + } +} diff --git a/examples/recipes/distilbert_distilbert-base-uncased-distilled-squad/question-answering_fp16_config.json b/examples/recipes/distilbert_distilbert-base-uncased-distilled-squad/question-answering_fp16_config.json new file mode 100644 index 000000000..027c69c52 --- /dev/null +++ b/examples/recipes/distilbert_distilbert-base-uncased-distilled-squad/question-answering_fp16_config.json @@ -0,0 +1,68 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "start_logits" + }, + { + "name": "end_logits" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "question-answering", + "model_class": "AutoModelForQuestionAnswering", + "model_type": "distilbert" + }, + "eval": { + "task": "question-answering", + "dataset": { + "path": "rajpurkar/squad", + "split": "validation", + "samples": 100, + "columns_mapping": { + "question_column": "question", + "context_column": "context", + "id_column": "id", + "label_column": "answers" + } + } + } +} diff --git a/examples/recipes/distilbert_distilbert-base-uncased-distilled-squad/question-answering_w8a16_config.json b/examples/recipes/distilbert_distilbert-base-uncased-distilled-squad/question-answering_w8a16_config.json new file mode 100644 index 000000000..23216becf --- /dev/null +++ b/examples/recipes/distilbert_distilbert-base-uncased-distilled-squad/question-answering_w8a16_config.json @@ -0,0 +1,86 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "start_logits" + }, + { + "name": "end_logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "question-answering", + "model_name": "distilbert/distilbert-base-uncased-distilled-squad" + }, + "compile": null, + "loader": { + "task": "question-answering", + "model_class": "AutoModelForQuestionAnswering", + "model_type": "distilbert" + }, + "eval": { + "task": "question-answering", + "dataset": { + "path": "rajpurkar/squad", + "split": "validation", + "samples": 1000, + "columns_mapping": { + "question_column": "question", + "context_column": "context", + "id_column": "id", + "label_column": "answers" + } + } + } +} diff --git a/examples/recipes/distilbert_distilbert-base-uncased-distilled-squad/question-answering_w8a8_config.json b/examples/recipes/distilbert_distilbert-base-uncased-distilled-squad/question-answering_w8a8_config.json new file mode 100644 index 000000000..2258aa259 --- /dev/null +++ b/examples/recipes/distilbert_distilbert-base-uncased-distilled-squad/question-answering_w8a8_config.json @@ -0,0 +1,86 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "start_logits" + }, + { + "name": "end_logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "question-answering", + "model_name": "distilbert/distilbert-base-uncased-distilled-squad" + }, + "compile": null, + "loader": { + "task": "question-answering", + "model_class": "AutoModelForQuestionAnswering", + "model_type": "distilbert" + }, + "eval": { + "task": "question-answering", + "dataset": { + "path": "rajpurkar/squad", + "split": "validation", + "samples": 1000, + "columns_mapping": { + "question_column": "question", + "context_column": "context", + "id_column": "id", + "label_column": "answers" + } + } + } +} diff --git a/examples/recipes/distilbert_distilbert-base-uncased-finetuned-sst-2-english/text-classification_fp16_config.json b/examples/recipes/distilbert_distilbert-base-uncased-finetuned-sst-2-english/text-classification_fp16_config.json new file mode 100644 index 000000000..f7b761cb6 --- /dev/null +++ b/examples/recipes/distilbert_distilbert-base-uncased-finetuned-sst-2-english/text-classification_fp16_config.json @@ -0,0 +1,62 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "text-classification", + "model_class": "AutoModelForSequenceClassification", + "model_type": "distilbert" + }, + "eval": { + "task": "text-classification", + "dataset": { + "path": "nyu-mll/glue", + "name": "sst2", + "samples": 100, + "columns_mapping": { + "input_column": "sentence" + } + } + } +} diff --git a/examples/recipes/distilbert_distilbert-base-uncased-finetuned-sst-2-english/text-classification_w8a16_config.json b/examples/recipes/distilbert_distilbert-base-uncased-finetuned-sst-2-english/text-classification_w8a16_config.json new file mode 100644 index 000000000..443f3eb7e --- /dev/null +++ b/examples/recipes/distilbert_distilbert-base-uncased-finetuned-sst-2-english/text-classification_w8a16_config.json @@ -0,0 +1,80 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "text-classification", + "model_name": "distilbert/distilbert-base-uncased-finetuned-sst-2-english" + }, + "compile": null, + "loader": { + "task": "text-classification", + "model_class": "AutoModelForSequenceClassification", + "model_type": "distilbert" + }, + "eval": { + "task": "text-classification", + "dataset": { + "path": "nyu-mll/glue", + "name": "sst2", + "samples": 1000, + "columns_mapping": { + "input_column": "sentence" + } + } + } +} diff --git a/examples/recipes/distilbert_distilbert-base-uncased-finetuned-sst-2-english/text-classification_w8a8_config.json b/examples/recipes/distilbert_distilbert-base-uncased-finetuned-sst-2-english/text-classification_w8a8_config.json new file mode 100644 index 000000000..9907a37ef --- /dev/null +++ b/examples/recipes/distilbert_distilbert-base-uncased-finetuned-sst-2-english/text-classification_w8a8_config.json @@ -0,0 +1,80 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "text-classification", + "model_name": "distilbert/distilbert-base-uncased-finetuned-sst-2-english" + }, + "compile": null, + "loader": { + "task": "text-classification", + "model_class": "AutoModelForSequenceClassification", + "model_type": "distilbert" + }, + "eval": { + "task": "text-classification", + "dataset": { + "path": "nyu-mll/glue", + "name": "sst2", + "samples": 1000, + "columns_mapping": { + "input_column": "sentence" + } + } + } +} diff --git a/examples/recipes/distilbert_distilbert-base-uncased/fill-mask_fp16_config.json b/examples/recipes/distilbert_distilbert-base-uncased/fill-mask_fp16_config.json new file mode 100644 index 000000000..ac283ad29 --- /dev/null +++ b/examples/recipes/distilbert_distilbert-base-uncased/fill-mask_fp16_config.json @@ -0,0 +1,63 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "fill-mask", + "model_class": "AutoModelForMaskedLM", + "model_type": "distilbert" + }, + "eval": { + "task": "fill-mask", + "dataset": { + "path": "Salesforce/wikitext", + "name": "wikitext-2-raw-v1", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "text" + } + } + } +} diff --git a/examples/recipes/distilbert_distilbert-base-uncased/fill-mask_w8a16_config.json b/examples/recipes/distilbert_distilbert-base-uncased/fill-mask_w8a16_config.json new file mode 100644 index 000000000..6748df0de --- /dev/null +++ b/examples/recipes/distilbert_distilbert-base-uncased/fill-mask_w8a16_config.json @@ -0,0 +1,81 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "fill-mask", + "model_name": "distilbert/distilbert-base-uncased" + }, + "compile": null, + "loader": { + "task": "fill-mask", + "model_class": "AutoModelForMaskedLM", + "model_type": "distilbert" + }, + "eval": { + "task": "fill-mask", + "dataset": { + "path": "Salesforce/wikitext", + "name": "wikitext-2-raw-v1", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "text" + } + } + } +} diff --git a/examples/recipes/distilbert_distilbert-base-uncased/fill-mask_w8a8_config.json b/examples/recipes/distilbert_distilbert-base-uncased/fill-mask_w8a8_config.json new file mode 100644 index 000000000..e93363d0e --- /dev/null +++ b/examples/recipes/distilbert_distilbert-base-uncased/fill-mask_w8a8_config.json @@ -0,0 +1,81 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "fill-mask", + "model_name": "distilbert/distilbert-base-uncased" + }, + "compile": null, + "loader": { + "task": "fill-mask", + "model_class": "AutoModelForMaskedLM", + "model_type": "distilbert" + }, + "eval": { + "task": "fill-mask", + "dataset": { + "path": "Salesforce/wikitext", + "name": "wikitext-2-raw-v1", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "text" + } + } + } +} diff --git a/examples/recipes/dslim_bert-base-NER/token-classification_fp16_config.json b/examples/recipes/dslim_bert-base-NER/token-classification_fp16_config.json new file mode 100644 index 000000000..6e57f58db --- /dev/null +++ b/examples/recipes/dslim_bert-base-NER/token-classification_fp16_config.json @@ -0,0 +1,76 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 28996 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "token-classification", + "model_class": "AutoModelForTokenClassification", + "model_type": "bert" + }, + "eval": { + "task": "token-classification", + "dataset": { + "samples": 100, + "path": "BramVanroy/conll2003", + "columns_mapping": { + "label_column": "ner_tags" + } + } + } +} diff --git a/examples/recipes/dslim_bert-base-NER/token-classification_w8a16_config.json b/examples/recipes/dslim_bert-base-NER/token-classification_w8a16_config.json new file mode 100644 index 000000000..4d3a2dbef --- /dev/null +++ b/examples/recipes/dslim_bert-base-NER/token-classification_w8a16_config.json @@ -0,0 +1,93 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 28996 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "token-classification", + "model_name": "dslim/bert-base-NER" + }, + "compile": null, + "loader": { + "task": "token-classification", + "model_class": "AutoModelForTokenClassification", + "model_type": "bert" + }, + "eval": { + "task": "token-classification", + "dataset": { + "samples": 1000, + "path": "BramVanroy/conll2003", + "columns_mapping": { + "label_column": "ner_tags" + } + } + } +} diff --git a/examples/recipes/dslim_bert-base-NER/token-classification_w8a8_config.json b/examples/recipes/dslim_bert-base-NER/token-classification_w8a8_config.json new file mode 100644 index 000000000..de26003dc --- /dev/null +++ b/examples/recipes/dslim_bert-base-NER/token-classification_w8a8_config.json @@ -0,0 +1,93 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 28996 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "token-classification", + "model_name": "dslim/bert-base-NER" + }, + "compile": null, + "loader": { + "task": "token-classification", + "model_class": "AutoModelForTokenClassification", + "model_type": "bert" + }, + "eval": { + "task": "token-classification", + "dataset": { + "samples": 1000, + "path": "BramVanroy/conll2003", + "columns_mapping": { + "label_column": "ner_tags" + } + } + } +} diff --git a/examples/recipes/facebook_convnext-tiny-224/image-classification_fp16_config.json b/examples/recipes/facebook_convnext-tiny-224/image-classification_fp16_config.json new file mode 100644 index 000000000..86334662d --- /dev/null +++ b/examples/recipes/facebook_convnext-tiny-224/image-classification_fp16_config.json @@ -0,0 +1,50 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": null, + "compile": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "convnext" + }, + "eval": { + "task": "image-classification", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 100 + } + } +} diff --git a/examples/recipes/facebook_convnext-tiny-224/image-classification_w8a16_config.json b/examples/recipes/facebook_convnext-tiny-224/image-classification_w8a16_config.json new file mode 100644 index 000000000..f92b2fe5d --- /dev/null +++ b/examples/recipes/facebook_convnext-tiny-224/image-classification_w8a16_config.json @@ -0,0 +1,67 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-classification", + "model_name": "facebook/convnext-tiny-224" + }, + "compile": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "convnext" + }, + "eval": { + "task": "image-classification", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 1000 + } + } +} diff --git a/examples/recipes/facebook_convnext-tiny-224/image-classification_w8a8_config.json b/examples/recipes/facebook_convnext-tiny-224/image-classification_w8a8_config.json new file mode 100644 index 000000000..d4766924f --- /dev/null +++ b/examples/recipes/facebook_convnext-tiny-224/image-classification_w8a8_config.json @@ -0,0 +1,67 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-classification", + "model_name": "facebook/convnext-tiny-224" + }, + "compile": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "convnext" + }, + "eval": { + "task": "image-classification", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 1000 + } + } +} diff --git a/examples/recipes/facebook_dino-vitb16/image-feature-extraction_fp16_config.json b/examples/recipes/facebook_dino-vitb16/image-feature-extraction_fp16_config.json new file mode 100644 index 000000000..4330bdf89 --- /dev/null +++ b/examples/recipes/facebook_dino-vitb16/image-feature-extraction_fp16_config.json @@ -0,0 +1,49 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "image-feature-extraction", + "model_class": "AutoModel", + "model_type": "vit" + }, + "eval": { + "task": "image-feature-extraction", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 100 + } + } +} diff --git a/examples/recipes/facebook_dino-vitb16/image-feature-extraction_w8a16_config.json b/examples/recipes/facebook_dino-vitb16/image-feature-extraction_w8a16_config.json new file mode 100644 index 000000000..f750bc8b1 --- /dev/null +++ b/examples/recipes/facebook_dino-vitb16/image-feature-extraction_w8a16_config.json @@ -0,0 +1,66 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-feature-extraction", + "model_name": "facebook/dino-vitb16" + }, + "loader": { + "task": "image-feature-extraction", + "model_class": "AutoModel", + "model_type": "vit" + }, + "eval": { + "task": "image-feature-extraction", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 1000 + } + } +} diff --git a/examples/recipes/facebook_dino-vitb16/image-feature-extraction_w8a8_config.json b/examples/recipes/facebook_dino-vitb16/image-feature-extraction_w8a8_config.json new file mode 100644 index 000000000..d328593d8 --- /dev/null +++ b/examples/recipes/facebook_dino-vitb16/image-feature-extraction_w8a8_config.json @@ -0,0 +1,66 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-feature-extraction", + "model_name": "facebook/dino-vitb16" + }, + "loader": { + "task": "image-feature-extraction", + "model_class": "AutoModel", + "model_type": "vit" + }, + "eval": { + "task": "image-feature-extraction", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 1000 + } + } +} diff --git a/examples/recipes/facebook_dino-vits16/image-feature-extraction_fp16_config.json b/examples/recipes/facebook_dino-vits16/image-feature-extraction_fp16_config.json new file mode 100644 index 000000000..4330bdf89 --- /dev/null +++ b/examples/recipes/facebook_dino-vits16/image-feature-extraction_fp16_config.json @@ -0,0 +1,49 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "image-feature-extraction", + "model_class": "AutoModel", + "model_type": "vit" + }, + "eval": { + "task": "image-feature-extraction", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 100 + } + } +} diff --git a/examples/recipes/facebook_dino-vits16/image-feature-extraction_w8a16_config.json b/examples/recipes/facebook_dino-vits16/image-feature-extraction_w8a16_config.json new file mode 100644 index 000000000..3da2c7432 --- /dev/null +++ b/examples/recipes/facebook_dino-vits16/image-feature-extraction_w8a16_config.json @@ -0,0 +1,66 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-feature-extraction", + "model_name": "facebook/dino-vits16" + }, + "loader": { + "task": "image-feature-extraction", + "model_class": "AutoModel", + "model_type": "vit" + }, + "eval": { + "task": "image-feature-extraction", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 1000 + } + } +} diff --git a/examples/recipes/facebook_dino-vits16/image-feature-extraction_w8a8_config.json b/examples/recipes/facebook_dino-vits16/image-feature-extraction_w8a8_config.json new file mode 100644 index 000000000..e33ef4491 --- /dev/null +++ b/examples/recipes/facebook_dino-vits16/image-feature-extraction_w8a8_config.json @@ -0,0 +1,66 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-feature-extraction", + "model_name": "facebook/dino-vits16" + }, + "loader": { + "task": "image-feature-extraction", + "model_class": "AutoModel", + "model_type": "vit" + }, + "eval": { + "task": "image-feature-extraction", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 1000 + } + } +} diff --git a/examples/recipes/facebook_dinov2-base/image-feature-extraction_fp16_config.json b/examples/recipes/facebook_dinov2-base/image-feature-extraction_fp16_config.json index b3e1216fd..5c12ba9cf 100644 --- a/examples/recipes/facebook_dinov2-base/image-feature-extraction_fp16_config.json +++ b/examples/recipes/facebook_dinov2-base/image-feature-extraction_fp16_config.json @@ -43,7 +43,7 @@ "dataset": { "path": "timm/mini-imagenet", "split": "test", - "samples": 1000 + "samples": 100 } } } diff --git a/examples/recipes/facebook_dinov2-large/image-feature-extraction_fp16_config.json b/examples/recipes/facebook_dinov2-large/image-feature-extraction_fp16_config.json new file mode 100644 index 000000000..5c12ba9cf --- /dev/null +++ b/examples/recipes/facebook_dinov2-large/image-feature-extraction_fp16_config.json @@ -0,0 +1,49 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "image-feature-extraction", + "model_class": "AutoModel", + "model_type": "dinov2" + }, + "eval": { + "task": "image-feature-extraction", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 100 + } + } +} diff --git a/examples/recipes/facebook_dinov2-large/image-feature-extraction_w8a16_config.json b/examples/recipes/facebook_dinov2-large/image-feature-extraction_w8a16_config.json new file mode 100644 index 000000000..2d2c0022c --- /dev/null +++ b/examples/recipes/facebook_dinov2-large/image-feature-extraction_w8a16_config.json @@ -0,0 +1,66 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-feature-extraction", + "model_name": "facebook/dinov2-large" + }, + "loader": { + "task": "image-feature-extraction", + "model_class": "AutoModel", + "model_type": "dinov2" + }, + "eval": { + "task": "image-feature-extraction", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 1000 + } + } +} diff --git a/examples/recipes/facebook_dinov2-large/image-feature-extraction_w8a8_config.json b/examples/recipes/facebook_dinov2-large/image-feature-extraction_w8a8_config.json new file mode 100644 index 000000000..2b3f6cc27 --- /dev/null +++ b/examples/recipes/facebook_dinov2-large/image-feature-extraction_w8a8_config.json @@ -0,0 +1,66 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-feature-extraction", + "model_name": "facebook/dinov2-large" + }, + "loader": { + "task": "image-feature-extraction", + "model_class": "AutoModel", + "model_type": "dinov2" + }, + "eval": { + "task": "image-feature-extraction", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 1000 + } + } +} diff --git a/examples/recipes/facebook_dinov2-small/image-feature-extraction_fp16_config.json b/examples/recipes/facebook_dinov2-small/image-feature-extraction_fp16_config.json index b3e1216fd..5c12ba9cf 100644 --- a/examples/recipes/facebook_dinov2-small/image-feature-extraction_fp16_config.json +++ b/examples/recipes/facebook_dinov2-small/image-feature-extraction_fp16_config.json @@ -43,7 +43,7 @@ "dataset": { "path": "timm/mini-imagenet", "split": "test", - "samples": 1000 + "samples": 100 } } } diff --git a/examples/recipes/google-bert_bert-base-multilingual-cased/feature-extraction_fp16_config.json b/examples/recipes/google-bert_bert-base-multilingual-cased/feature-extraction_fp16_config.json new file mode 100644 index 000000000..58c6f6c2b --- /dev/null +++ b/examples/recipes/google-bert_bert-base-multilingual-cased/feature-extraction_fp16_config.json @@ -0,0 +1,79 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 119547 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "AutoModel", + "model_type": "bert" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/google-bert_bert-base-multilingual-cased/feature-extraction_w8a16_config.json b/examples/recipes/google-bert_bert-base-multilingual-cased/feature-extraction_w8a16_config.json new file mode 100644 index 000000000..1e5074c51 --- /dev/null +++ b/examples/recipes/google-bert_bert-base-multilingual-cased/feature-extraction_w8a16_config.json @@ -0,0 +1,96 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 119547 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "feature-extraction", + "model_name": "google-bert/bert-base-multilingual-cased" + }, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "AutoModel", + "model_type": "bert" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/google-bert_bert-base-multilingual-cased/feature-extraction_w8a8_config.json b/examples/recipes/google-bert_bert-base-multilingual-cased/feature-extraction_w8a8_config.json new file mode 100644 index 000000000..111ff0161 --- /dev/null +++ b/examples/recipes/google-bert_bert-base-multilingual-cased/feature-extraction_w8a8_config.json @@ -0,0 +1,96 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 119547 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "feature-extraction", + "model_name": "google-bert/bert-base-multilingual-cased" + }, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "AutoModel", + "model_type": "bert" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/google-bert_bert-base-multilingual-cased/fill-mask_fp16_config.json b/examples/recipes/google-bert_bert-base-multilingual-cased/fill-mask_fp16_config.json new file mode 100644 index 000000000..51688c2fd --- /dev/null +++ b/examples/recipes/google-bert_bert-base-multilingual-cased/fill-mask_fp16_config.json @@ -0,0 +1,77 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 119547 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "loader": { + "task": "fill-mask", + "model_class": "AutoModelForMaskedLM", + "model_type": "bert" + }, + "eval": { + "task": "fill-mask", + "dataset": { + "path": "Salesforce/wikitext", + "name": "wikitext-2-raw-v1", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "text" + } + } + } +} diff --git a/examples/recipes/google-bert_bert-base-multilingual-cased/fill-mask_w8a16_config.json b/examples/recipes/google-bert_bert-base-multilingual-cased/fill-mask_w8a16_config.json new file mode 100644 index 000000000..553074f04 --- /dev/null +++ b/examples/recipes/google-bert_bert-base-multilingual-cased/fill-mask_w8a16_config.json @@ -0,0 +1,95 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 119547 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "fill-mask", + "model_name": "google-bert/bert-base-multilingual-cased" + }, + "compile": null, + "loader": { + "task": "fill-mask", + "model_class": "AutoModelForMaskedLM", + "model_type": "bert" + }, + "eval": { + "task": "fill-mask", + "dataset": { + "path": "Salesforce/wikitext", + "name": "wikitext-2-raw-v1", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "text" + } + } + } +} diff --git a/examples/recipes/google-bert_bert-base-multilingual-cased/fill-mask_w8a8_config.json b/examples/recipes/google-bert_bert-base-multilingual-cased/fill-mask_w8a8_config.json new file mode 100644 index 000000000..067e5e45a --- /dev/null +++ b/examples/recipes/google-bert_bert-base-multilingual-cased/fill-mask_w8a8_config.json @@ -0,0 +1,95 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 119547 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "fill-mask", + "model_name": "google-bert/bert-base-multilingual-cased" + }, + "compile": null, + "loader": { + "task": "fill-mask", + "model_class": "AutoModelForMaskedLM", + "model_type": "bert" + }, + "eval": { + "task": "fill-mask", + "dataset": { + "path": "Salesforce/wikitext", + "name": "wikitext-2-raw-v1", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "text" + } + } + } +} diff --git a/examples/recipes/google-bert_bert-base-multilingual-uncased/fill-mask_fp16_config.json b/examples/recipes/google-bert_bert-base-multilingual-uncased/fill-mask_fp16_config.json new file mode 100644 index 000000000..f4b523cf9 --- /dev/null +++ b/examples/recipes/google-bert_bert-base-multilingual-uncased/fill-mask_fp16_config.json @@ -0,0 +1,77 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 105879 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "loader": { + "task": "fill-mask", + "model_class": "AutoModelForMaskedLM", + "model_type": "bert" + }, + "eval": { + "task": "fill-mask", + "dataset": { + "path": "Salesforce/wikitext", + "name": "wikitext-2-raw-v1", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "text" + } + } + } +} diff --git a/examples/recipes/google-bert_bert-base-multilingual-uncased/fill-mask_w8a16_config.json b/examples/recipes/google-bert_bert-base-multilingual-uncased/fill-mask_w8a16_config.json new file mode 100644 index 000000000..7b71cc76c --- /dev/null +++ b/examples/recipes/google-bert_bert-base-multilingual-uncased/fill-mask_w8a16_config.json @@ -0,0 +1,94 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 105879 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "fill-mask", + "model_name": "google-bert/bert-base-multilingual-uncased" + }, + "loader": { + "task": "fill-mask", + "model_class": "AutoModelForMaskedLM", + "model_type": "bert" + }, + "eval": { + "task": "fill-mask", + "dataset": { + "path": "Salesforce/wikitext", + "name": "wikitext-2-raw-v1", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "text" + } + } + } +} diff --git a/examples/recipes/google-bert_bert-base-multilingual-uncased/fill-mask_w8a8_config.json b/examples/recipes/google-bert_bert-base-multilingual-uncased/fill-mask_w8a8_config.json new file mode 100644 index 000000000..69762381d --- /dev/null +++ b/examples/recipes/google-bert_bert-base-multilingual-uncased/fill-mask_w8a8_config.json @@ -0,0 +1,94 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 105879 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "fill-mask", + "model_name": "google-bert/bert-base-multilingual-uncased" + }, + "loader": { + "task": "fill-mask", + "model_class": "AutoModelForMaskedLM", + "model_type": "bert" + }, + "eval": { + "task": "fill-mask", + "dataset": { + "path": "Salesforce/wikitext", + "name": "wikitext-2-raw-v1", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "text" + } + } + } +} diff --git a/examples/recipes/google-bert_bert-base-uncased/fill-mask_fp16_config.json b/examples/recipes/google-bert_bert-base-uncased/fill-mask_fp16_config.json new file mode 100644 index 000000000..d42fef107 --- /dev/null +++ b/examples/recipes/google-bert_bert-base-uncased/fill-mask_fp16_config.json @@ -0,0 +1,77 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "loader": { + "task": "fill-mask", + "model_class": "AutoModelForMaskedLM", + "model_type": "bert" + }, + "eval": { + "task": "fill-mask", + "dataset": { + "path": "Salesforce/wikitext", + "name": "wikitext-2-raw-v1", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "text" + } + } + } +} diff --git a/examples/recipes/google-bert_bert-base-uncased/fill-mask_w8a16_config.json b/examples/recipes/google-bert_bert-base-uncased/fill-mask_w8a16_config.json new file mode 100644 index 000000000..94669e9e9 --- /dev/null +++ b/examples/recipes/google-bert_bert-base-uncased/fill-mask_w8a16_config.json @@ -0,0 +1,94 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "fill-mask", + "model_name": "google-bert/bert-base-uncased" + }, + "loader": { + "task": "fill-mask", + "model_class": "AutoModelForMaskedLM", + "model_type": "bert" + }, + "eval": { + "task": "fill-mask", + "dataset": { + "path": "Salesforce/wikitext", + "name": "wikitext-2-raw-v1", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "text" + } + } + } +} diff --git a/examples/recipes/google-bert_bert-base-uncased/fill-mask_w8a8_config.json b/examples/recipes/google-bert_bert-base-uncased/fill-mask_w8a8_config.json new file mode 100644 index 000000000..4fddd5121 --- /dev/null +++ b/examples/recipes/google-bert_bert-base-uncased/fill-mask_w8a8_config.json @@ -0,0 +1,94 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "fill-mask", + "model_name": "google-bert/bert-base-uncased" + }, + "loader": { + "task": "fill-mask", + "model_class": "AutoModelForMaskedLM", + "model_type": "bert" + }, + "eval": { + "task": "fill-mask", + "dataset": { + "path": "Salesforce/wikitext", + "name": "wikitext-2-raw-v1", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "text" + } + } + } +} diff --git a/examples/recipes/google-bert_bert-large-uncased-whole-word-masking-finetuned-squad/question-answering_fp16_config.json b/examples/recipes/google-bert_bert-large-uncased-whole-word-masking-finetuned-squad/question-answering_fp16_config.json new file mode 100644 index 000000000..13c6daa8f --- /dev/null +++ b/examples/recipes/google-bert_bert-large-uncased-whole-word-masking-finetuned-squad/question-answering_fp16_config.json @@ -0,0 +1,83 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "start_logits" + }, + { + "name": "end_logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "question-answering", + "model_class": "AutoModelForQuestionAnswering", + "model_type": "bert" + }, + "eval": { + "task": "question-answering", + "dataset": { + "path": "rajpurkar/squad", + "split": "validation", + "samples": 100, + "columns_mapping": { + "question_column": "question", + "context_column": "context", + "id_column": "id", + "label_column": "answers" + } + } + } +} diff --git a/examples/recipes/google-bert_bert-large-uncased-whole-word-masking-finetuned-squad/question-answering_w8a16_config.json b/examples/recipes/google-bert_bert-large-uncased-whole-word-masking-finetuned-squad/question-answering_w8a16_config.json new file mode 100644 index 000000000..db79310ba --- /dev/null +++ b/examples/recipes/google-bert_bert-large-uncased-whole-word-masking-finetuned-squad/question-answering_w8a16_config.json @@ -0,0 +1,100 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "start_logits" + }, + { + "name": "end_logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "question-answering", + "model_name": "google-bert/bert-large-uncased-whole-word-masking-finetuned-squad" + }, + "compile": null, + "loader": { + "task": "question-answering", + "model_class": "AutoModelForQuestionAnswering", + "model_type": "bert" + }, + "eval": { + "task": "question-answering", + "dataset": { + "path": "rajpurkar/squad", + "split": "validation", + "samples": 1000, + "columns_mapping": { + "question_column": "question", + "context_column": "context", + "id_column": "id", + "label_column": "answers" + } + } + } +} diff --git a/examples/recipes/google-bert_bert-large-uncased-whole-word-masking-finetuned-squad/question-answering_w8a8_config.json b/examples/recipes/google-bert_bert-large-uncased-whole-word-masking-finetuned-squad/question-answering_w8a8_config.json new file mode 100644 index 000000000..69690900b --- /dev/null +++ b/examples/recipes/google-bert_bert-large-uncased-whole-word-masking-finetuned-squad/question-answering_w8a8_config.json @@ -0,0 +1,100 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "start_logits" + }, + { + "name": "end_logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "question-answering", + "model_name": "google-bert/bert-large-uncased-whole-word-masking-finetuned-squad" + }, + "compile": null, + "loader": { + "task": "question-answering", + "model_class": "AutoModelForQuestionAnswering", + "model_type": "bert" + }, + "eval": { + "task": "question-answering", + "dataset": { + "path": "rajpurkar/squad", + "split": "validation", + "samples": 1000, + "columns_mapping": { + "question_column": "question", + "context_column": "context", + "id_column": "id", + "label_column": "answers" + } + } + } +} diff --git a/examples/recipes/google_vit-base-patch16-224-in21k/image-feature-extraction_fp16_config.json b/examples/recipes/google_vit-base-patch16-224-in21k/image-feature-extraction_fp16_config.json index 628768221..4330bdf89 100644 --- a/examples/recipes/google_vit-base-patch16-224-in21k/image-feature-extraction_fp16_config.json +++ b/examples/recipes/google_vit-base-patch16-224-in21k/image-feature-extraction_fp16_config.json @@ -43,7 +43,7 @@ "dataset": { "path": "timm/mini-imagenet", "split": "test", - "samples": 1000 + "samples": 100 } } } diff --git a/examples/recipes/google_vit-base-patch16-224/image-classification_fp16_config.json b/examples/recipes/google_vit-base-patch16-224/image-classification_fp16_config.json new file mode 100644 index 000000000..b0e9d9e08 --- /dev/null +++ b/examples/recipes/google_vit-base-patch16-224/image-classification_fp16_config.json @@ -0,0 +1,50 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": null, + "compile": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "vit" + }, + "eval": { + "task": "image-classification", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 100 + } + } +} diff --git a/examples/recipes/google_vit-base-patch16-224/image-classification_w8a16_config.json b/examples/recipes/google_vit-base-patch16-224/image-classification_w8a16_config.json new file mode 100644 index 000000000..d1458e451 --- /dev/null +++ b/examples/recipes/google_vit-base-patch16-224/image-classification_w8a16_config.json @@ -0,0 +1,67 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-classification", + "model_name": "google/vit-base-patch16-224" + }, + "compile": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "vit" + }, + "eval": { + "task": "image-classification", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 1000 + } + } +} diff --git a/examples/recipes/google_vit-base-patch16-224/image-classification_w8a8_config.json b/examples/recipes/google_vit-base-patch16-224/image-classification_w8a8_config.json new file mode 100644 index 000000000..9cf94c890 --- /dev/null +++ b/examples/recipes/google_vit-base-patch16-224/image-classification_w8a8_config.json @@ -0,0 +1,66 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-classification", + "model_name": "google/vit-base-patch16-224" + }, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "vit" + }, + "eval": { + "task": "image-classification", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 1000 + } + } +} diff --git a/examples/recipes/joeddav_xlm-roberta-large-xnli/zero-shot-classification_fp16_config.json b/examples/recipes/joeddav_xlm-roberta-large-xnli/zero-shot-classification_fp16_config.json new file mode 100644 index 000000000..e7074249d --- /dev/null +++ b/examples/recipes/joeddav_xlm-roberta-large-xnli/zero-shot-classification_fp16_config.json @@ -0,0 +1,68 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 250002 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "zero-shot-classification", + "model_class": "AutoModelForSequenceClassification", + "model_type": "xlm-roberta" + }, + "eval": { + "task": "zero-shot-classification", + "dataset": { + "path": "fancyzhx/ag_news", + "split": "test", + "samples": 200, + "columns_mapping": { + "input_column": "text", + "label_column": "label", + "candidate_labels": "World,Sports,Business,Sci/Tech", + "hypothesis_template": "This text is about {}." + } + } + } +} diff --git a/examples/recipes/joeddav_xlm-roberta-large-xnli/zero-shot-classification_w8a16_config.json b/examples/recipes/joeddav_xlm-roberta-large-xnli/zero-shot-classification_w8a16_config.json new file mode 100644 index 000000000..5a1abda9e --- /dev/null +++ b/examples/recipes/joeddav_xlm-roberta-large-xnli/zero-shot-classification_w8a16_config.json @@ -0,0 +1,85 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 250002 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "zero-shot-classification", + "model_name": "joeddav/xlm-roberta-large-xnli" + }, + "compile": null, + "loader": { + "task": "zero-shot-classification", + "model_class": "AutoModelForSequenceClassification", + "model_type": "xlm-roberta" + }, + "eval": { + "task": "zero-shot-classification", + "dataset": { + "path": "fancyzhx/ag_news", + "split": "test", + "samples": 200, + "columns_mapping": { + "input_column": "text", + "label_column": "label", + "candidate_labels": "World,Sports,Business,Sci/Tech", + "hypothesis_template": "This text is about {}." + } + } + } +} diff --git a/examples/recipes/joeddav_xlm-roberta-large-xnli/zero-shot-classification_w8a8_config.json b/examples/recipes/joeddav_xlm-roberta-large-xnli/zero-shot-classification_w8a8_config.json new file mode 100644 index 000000000..b215cc87b --- /dev/null +++ b/examples/recipes/joeddav_xlm-roberta-large-xnli/zero-shot-classification_w8a8_config.json @@ -0,0 +1,85 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 250002 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "zero-shot-classification", + "model_name": "joeddav/xlm-roberta-large-xnli" + }, + "compile": null, + "loader": { + "task": "zero-shot-classification", + "model_class": "AutoModelForSequenceClassification", + "model_type": "xlm-roberta" + }, + "eval": { + "task": "zero-shot-classification", + "dataset": { + "path": "fancyzhx/ag_news", + "split": "test", + "samples": 200, + "columns_mapping": { + "input_column": "text", + "label_column": "label", + "candidate_labels": "World,Sports,Business,Sci/Tech", + "hypothesis_template": "This text is about {}." + } + } + } +} diff --git a/examples/recipes/laion_CLIP-ViT-B-32-laion2B-s34B-b79K/feature-extraction_fp16_config.json b/examples/recipes/laion_CLIP-ViT-B-32-laion2B-s34B-b79K/feature-extraction_fp16_config.json index 5a186260f..226d77d36 100644 --- a/examples/recipes/laion_CLIP-ViT-B-32-laion2B-s34B-b79K/feature-extraction_fp16_config.json +++ b/examples/recipes/laion_CLIP-ViT-B-32-laion2B-s34B-b79K/feature-extraction_fp16_config.json @@ -61,6 +61,7 @@ "dataset": { "path": "mteb/stsbenchmark-sts", "split": "test", + "samples": 100, "columns_mapping": { "input_column_1": "sentence1", "input_column_2": "sentence2", diff --git a/examples/recipes/laion_CLIP-ViT-B-32-laion2B-s34B-b79K/feature-extraction_w8a16_config.json b/examples/recipes/laion_CLIP-ViT-B-32-laion2B-s34B-b79K/feature-extraction_w8a16_config.json index 11ba24b65..c3b498a73 100644 --- a/examples/recipes/laion_CLIP-ViT-B-32-laion2B-s34B-b79K/feature-extraction_w8a16_config.json +++ b/examples/recipes/laion_CLIP-ViT-B-32-laion2B-s34B-b79K/feature-extraction_w8a16_config.json @@ -78,6 +78,7 @@ "dataset": { "path": "mteb/stsbenchmark-sts", "split": "test", + "samples": 1000, "columns_mapping": { "input_column_1": "sentence1", "input_column_2": "sentence2", diff --git a/examples/recipes/laion_CLIP-ViT-B-32-laion2B-s34B-b79K/feature-extraction_w8a8_config.json b/examples/recipes/laion_CLIP-ViT-B-32-laion2B-s34B-b79K/feature-extraction_w8a8_config.json index 849dc5481..0d45c3391 100644 --- a/examples/recipes/laion_CLIP-ViT-B-32-laion2B-s34B-b79K/feature-extraction_w8a8_config.json +++ b/examples/recipes/laion_CLIP-ViT-B-32-laion2B-s34B-b79K/feature-extraction_w8a8_config.json @@ -68,6 +68,7 @@ "task": "feature-extraction", "model_name": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K" }, + "compile": null, "loader": { "task": "feature-extraction", "model_class": "CLIPTextModelWithProjection", @@ -78,6 +79,7 @@ "dataset": { "path": "mteb/stsbenchmark-sts", "split": "test", + "samples": 1000, "columns_mapping": { "input_column_1": "sentence1", "input_column_2": "sentence2", diff --git a/examples/recipes/mattmdjaga_segformer_b2_clothes/image-segmentation_fp16_config.json b/examples/recipes/mattmdjaga_segformer_b2_clothes/image-segmentation_fp16_config.json new file mode 100644 index 000000000..e09eabfcf --- /dev/null +++ b/examples/recipes/mattmdjaga_segformer_b2_clothes/image-segmentation_fp16_config.json @@ -0,0 +1,53 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 512, + 512 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": null, + "compile": null, + "loader": { + "task": "image-segmentation", + "model_class": "AutoModelForSemanticSegmentation", + "model_type": "segformer" + }, + "eval": { + "task": "image-segmentation", + "dataset": { + "path": "mattmdjaga/human_parsing_dataset", + "split": "train", + "samples": 100, + "columns_mapping": { + "annotation_column": "mask" + } + } + } +} diff --git a/examples/recipes/mattmdjaga_segformer_b2_clothes/image-segmentation_w8a16_config.json b/examples/recipes/mattmdjaga_segformer_b2_clothes/image-segmentation_w8a16_config.json new file mode 100644 index 000000000..919689732 --- /dev/null +++ b/examples/recipes/mattmdjaga_segformer_b2_clothes/image-segmentation_w8a16_config.json @@ -0,0 +1,70 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 512, + 512 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-segmentation", + "model_name": "mattmdjaga/segformer_b2_clothes" + }, + "compile": null, + "loader": { + "task": "image-segmentation", + "model_class": "AutoModelForSemanticSegmentation", + "model_type": "segformer" + }, + "eval": { + "task": "image-segmentation", + "dataset": { + "path": "mattmdjaga/human_parsing_dataset", + "split": "train", + "samples": 1000, + "columns_mapping": { + "annotation_column": "mask" + } + } + } +} diff --git a/examples/recipes/mattmdjaga_segformer_b2_clothes/image-segmentation_w8a8_config.json b/examples/recipes/mattmdjaga_segformer_b2_clothes/image-segmentation_w8a8_config.json new file mode 100644 index 000000000..f23ded80b --- /dev/null +++ b/examples/recipes/mattmdjaga_segformer_b2_clothes/image-segmentation_w8a8_config.json @@ -0,0 +1,70 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 512, + 512 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-segmentation", + "model_name": "mattmdjaga/segformer_b2_clothes" + }, + "compile": null, + "loader": { + "task": "image-segmentation", + "model_class": "AutoModelForSemanticSegmentation", + "model_type": "segformer" + }, + "eval": { + "task": "image-segmentation", + "dataset": { + "path": "mattmdjaga/human_parsing_dataset", + "split": "train", + "samples": 1000, + "columns_mapping": { + "annotation_column": "mask" + } + } + } +} diff --git a/examples/recipes/microsoft_resnet-18/image-classification_fp16_config.json b/examples/recipes/microsoft_resnet-18/image-classification_fp16_config.json new file mode 100644 index 000000000..351c764b7 --- /dev/null +++ b/examples/recipes/microsoft_resnet-18/image-classification_fp16_config.json @@ -0,0 +1,49 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "resnet" + }, + "eval": { + "task": "image-classification", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 100 + } + } +} diff --git a/examples/recipes/microsoft_resnet-18/image-classification_w8a16_config.json b/examples/recipes/microsoft_resnet-18/image-classification_w8a16_config.json new file mode 100644 index 000000000..6e2a421c4 --- /dev/null +++ b/examples/recipes/microsoft_resnet-18/image-classification_w8a16_config.json @@ -0,0 +1,67 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-classification", + "model_name": "microsoft/resnet-18" + }, + "compile": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "resnet" + }, + "eval": { + "task": "image-classification", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 100 + } + } +} diff --git a/examples/recipes/microsoft_resnet-18/image-classification_w8a8_config.json b/examples/recipes/microsoft_resnet-18/image-classification_w8a8_config.json new file mode 100644 index 000000000..d4853a128 --- /dev/null +++ b/examples/recipes/microsoft_resnet-18/image-classification_w8a8_config.json @@ -0,0 +1,67 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-classification", + "model_name": "microsoft/resnet-18" + }, + "compile": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "resnet" + }, + "eval": { + "task": "image-classification", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 100 + } + } +} diff --git a/examples/recipes/microsoft_resnet-50/image-classification_fp16_config.json b/examples/recipes/microsoft_resnet-50/image-classification_fp16_config.json new file mode 100644 index 000000000..83a057a7a --- /dev/null +++ b/examples/recipes/microsoft_resnet-50/image-classification_fp16_config.json @@ -0,0 +1,50 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": null, + "compile": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "resnet" + }, + "eval": { + "task": "image-classification", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 100 + } + } +} diff --git a/examples/recipes/microsoft_resnet-50/image-classification_w8a16_config.json b/examples/recipes/microsoft_resnet-50/image-classification_w8a16_config.json new file mode 100644 index 000000000..17a0831ac --- /dev/null +++ b/examples/recipes/microsoft_resnet-50/image-classification_w8a16_config.json @@ -0,0 +1,67 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-classification", + "model_name": "microsoft/resnet-50" + }, + "compile": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "resnet" + }, + "eval": { + "task": "image-classification", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 1000 + } + } +} diff --git a/examples/recipes/microsoft_resnet-50/image-classification_w8a8_config.json b/examples/recipes/microsoft_resnet-50/image-classification_w8a8_config.json new file mode 100644 index 000000000..010c8389f --- /dev/null +++ b/examples/recipes/microsoft_resnet-50/image-classification_w8a8_config.json @@ -0,0 +1,67 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-classification", + "model_name": "microsoft/resnet-50" + }, + "compile": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "resnet" + }, + "eval": { + "task": "image-classification", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 1000 + } + } +} diff --git a/examples/recipes/microsoft_swin-large-patch4-window7-224/image-classification_fp16_config.json b/examples/recipes/microsoft_swin-large-patch4-window7-224/image-classification_fp16_config.json new file mode 100644 index 000000000..3b7b0c032 --- /dev/null +++ b/examples/recipes/microsoft_swin-large-patch4-window7-224/image-classification_fp16_config.json @@ -0,0 +1,50 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": null, + "compile": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "swin" + }, + "eval": { + "task": "image-classification", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 100 + } + } +} diff --git a/examples/recipes/microsoft_swin-large-patch4-window7-224/image-classification_w8a16_config.json b/examples/recipes/microsoft_swin-large-patch4-window7-224/image-classification_w8a16_config.json new file mode 100644 index 000000000..4f5349f3e --- /dev/null +++ b/examples/recipes/microsoft_swin-large-patch4-window7-224/image-classification_w8a16_config.json @@ -0,0 +1,67 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-classification", + "model_name": "microsoft/swin-large-patch4-window7-224" + }, + "compile": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "swin" + }, + "eval": { + "task": "image-classification", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 1000 + } + } +} diff --git a/examples/recipes/microsoft_swin-large-patch4-window7-224/image-classification_w8a8_config.json b/examples/recipes/microsoft_swin-large-patch4-window7-224/image-classification_w8a8_config.json new file mode 100644 index 000000000..99b8754de --- /dev/null +++ b/examples/recipes/microsoft_swin-large-patch4-window7-224/image-classification_w8a8_config.json @@ -0,0 +1,67 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-classification", + "model_name": "microsoft/swin-large-patch4-window7-224" + }, + "compile": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "swin" + }, + "eval": { + "task": "image-classification", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 1000 + } + } +} diff --git a/examples/recipes/microsoft_swinv2-tiny-patch4-window16-256/image-classification_fp16_config.json b/examples/recipes/microsoft_swinv2-tiny-patch4-window16-256/image-classification_fp16_config.json new file mode 100644 index 000000000..242b60821 --- /dev/null +++ b/examples/recipes/microsoft_swinv2-tiny-patch4-window16-256/image-classification_fp16_config.json @@ -0,0 +1,50 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 256, + 256 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": null, + "compile": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "swinv2" + }, + "eval": { + "task": "image-classification", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 100 + } + } +} diff --git a/examples/recipes/microsoft_swinv2-tiny-patch4-window16-256/image-classification_w8a16_config.json b/examples/recipes/microsoft_swinv2-tiny-patch4-window16-256/image-classification_w8a16_config.json new file mode 100644 index 000000000..89edca400 --- /dev/null +++ b/examples/recipes/microsoft_swinv2-tiny-patch4-window16-256/image-classification_w8a16_config.json @@ -0,0 +1,67 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 256, + 256 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-classification", + "model_name": "microsoft/swinv2-tiny-patch4-window16-256" + }, + "compile": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "swinv2" + }, + "eval": { + "task": "image-classification", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 100 + } + } +} diff --git a/examples/recipes/microsoft_swinv2-tiny-patch4-window16-256/image-classification_w8a8_config.json b/examples/recipes/microsoft_swinv2-tiny-patch4-window16-256/image-classification_w8a8_config.json new file mode 100644 index 000000000..b11bf17c1 --- /dev/null +++ b/examples/recipes/microsoft_swinv2-tiny-patch4-window16-256/image-classification_w8a8_config.json @@ -0,0 +1,67 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 256, + 256 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-classification", + "model_name": "microsoft/swinv2-tiny-patch4-window16-256" + }, + "compile": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "swinv2" + }, + "eval": { + "task": "image-classification", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 100 + } + } +} diff --git a/examples/recipes/microsoft_trocr-base-handwritten/image-to-text_fp16_config_decoder.json b/examples/recipes/microsoft_trocr-base-handwritten/image-to-text_fp16_config_decoder.json new file mode 100644 index 000000000..676eb1836 --- /dev/null +++ b/examples/recipes/microsoft_trocr-base-handwritten/image-to-text_fp16_config_decoder.json @@ -0,0 +1,494 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "decoder_input_ids", + "dtype": "int32", + "shape": [ + 1, + 1 + ], + "value_range": [ + 0, + 50265 + ] + }, + { + "name": "encoder_hidden_states", + "dtype": "float32", + "shape": [ + 1, + 577, + 768 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "decoder_attention_mask", + "dtype": "int64", + "shape": [ + 1, + 512 + ] + }, + { + "name": "cache_position", + "dtype": "int64", + "shape": [ + 1 + ] + }, + { + "name": "past_0_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_0_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_1_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_1_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_2_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_2_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_3_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_3_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_4_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_4_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_5_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_5_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_6_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_6_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_7_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_7_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_8_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_8_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_9_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_9_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_10_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_10_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_11_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_11_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + }, + { + "name": "present_0_key" + }, + { + "name": "present_0_value" + }, + { + "name": "present_1_key" + }, + { + "name": "present_1_value" + }, + { + "name": "present_2_key" + }, + { + "name": "present_2_value" + }, + { + "name": "present_3_key" + }, + { + "name": "present_3_value" + }, + { + "name": "present_4_key" + }, + { + "name": "present_4_value" + }, + { + "name": "present_5_key" + }, + { + "name": "present_5_value" + }, + { + "name": "present_6_key" + }, + { + "name": "present_6_value" + }, + { + "name": "present_7_key" + }, + { + "name": "present_7_value" + }, + { + "name": "present_8_key" + }, + { + "name": "present_8_value" + }, + { + "name": "present_9_key" + }, + { + "name": "present_9_value" + }, + { + "name": "present_10_key" + }, + { + "name": "present_10_value" + }, + { + "name": "present_11_key" + }, + { + "name": "present_11_value" + } + ] + }, + "optim": { + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true, + "remove_isnan_in_attention_mask": true, + "reshape_mergedreshape": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "text2text-generation", + "model_class": "VisionDecoderWrapper", + "model_type": "vision-encoder-decoder" + }, + "eval": { + "task": "image-to-text", + "dataset": { + "path": "Teklia/IAM-line", + "split": "test", + "samples": 500, + "columns_mapping": { + "input_column": "image", + "label_column": "text" + } + } + } +} diff --git a/examples/recipes/microsoft_trocr-base-handwritten/image-to-text_fp16_config_encoder.json b/examples/recipes/microsoft_trocr-base-handwritten/image-to-text_fp16_config_encoder.json new file mode 100644 index 000000000..d17d6eb43 --- /dev/null +++ b/examples/recipes/microsoft_trocr-base-handwritten/image-to-text_fp16_config_encoder.json @@ -0,0 +1,60 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 384, + 384 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "encoder_hidden_states" + } + ] + }, + "optim": { + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true, + "remove_isnan_in_attention_mask": true, + "reshape_mergedreshape": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "image-feature-extraction", + "model_class": "VisionEncoderWrapper", + "model_type": "vision-encoder-decoder" + }, + "eval": { + "task": "image-to-text", + "dataset": { + "path": "Teklia/IAM-line", + "split": "test", + "samples": 500, + "columns_mapping": { + "input_column": "image", + "label_column": "text" + } + } + } +} diff --git a/examples/recipes/microsoft_trocr-base-printed/image-to-text_fp16_config_decoder.json b/examples/recipes/microsoft_trocr-base-printed/image-to-text_fp16_config_decoder.json new file mode 100644 index 000000000..6e54cde9d --- /dev/null +++ b/examples/recipes/microsoft_trocr-base-printed/image-to-text_fp16_config_decoder.json @@ -0,0 +1,494 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "decoder_input_ids", + "dtype": "int32", + "shape": [ + 1, + 1 + ], + "value_range": [ + 0, + 50265 + ] + }, + { + "name": "encoder_hidden_states", + "dtype": "float32", + "shape": [ + 1, + 577, + 768 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "decoder_attention_mask", + "dtype": "int64", + "shape": [ + 1, + 512 + ] + }, + { + "name": "cache_position", + "dtype": "int64", + "shape": [ + 1 + ] + }, + { + "name": "past_0_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_0_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_1_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_1_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_2_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_2_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_3_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_3_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_4_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_4_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_5_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_5_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_6_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_6_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_7_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_7_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_8_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_8_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_9_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_9_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_10_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_10_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_11_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_11_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + }, + { + "name": "present_0_key" + }, + { + "name": "present_0_value" + }, + { + "name": "present_1_key" + }, + { + "name": "present_1_value" + }, + { + "name": "present_2_key" + }, + { + "name": "present_2_value" + }, + { + "name": "present_3_key" + }, + { + "name": "present_3_value" + }, + { + "name": "present_4_key" + }, + { + "name": "present_4_value" + }, + { + "name": "present_5_key" + }, + { + "name": "present_5_value" + }, + { + "name": "present_6_key" + }, + { + "name": "present_6_value" + }, + { + "name": "present_7_key" + }, + { + "name": "present_7_value" + }, + { + "name": "present_8_key" + }, + { + "name": "present_8_value" + }, + { + "name": "present_9_key" + }, + { + "name": "present_9_value" + }, + { + "name": "present_10_key" + }, + { + "name": "present_10_value" + }, + { + "name": "present_11_key" + }, + { + "name": "present_11_value" + } + ] + }, + "optim": { + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true, + "remove_isnan_in_attention_mask": true, + "reshape_mergedreshape": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "text2text-generation", + "model_class": "VisionDecoderWrapper", + "model_type": "vision-encoder-decoder" + }, + "eval": { + "task": "image-to-text", + "dataset": { + "path": "priyank-m/SROIE_2019_text_recognition", + "split": "test", + "samples": 500, + "columns_mapping": { + "input_column": "image", + "label_column": "text" + } + } + } +} diff --git a/examples/recipes/microsoft_trocr-base-printed/image-to-text_fp16_config_encoder.json b/examples/recipes/microsoft_trocr-base-printed/image-to-text_fp16_config_encoder.json new file mode 100644 index 000000000..59476ddb3 --- /dev/null +++ b/examples/recipes/microsoft_trocr-base-printed/image-to-text_fp16_config_encoder.json @@ -0,0 +1,60 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 384, + 384 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "encoder_hidden_states" + } + ] + }, + "optim": { + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true, + "remove_isnan_in_attention_mask": true, + "reshape_mergedreshape": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "image-feature-extraction", + "model_class": "VisionEncoderWrapper", + "model_type": "vision-encoder-decoder" + }, + "eval": { + "task": "image-to-text", + "dataset": { + "path": "priyank-m/SROIE_2019_text_recognition", + "split": "test", + "samples": 500, + "columns_mapping": { + "input_column": "image", + "label_column": "text" + } + } + } +} diff --git a/examples/recipes/microsoft_trocr-large-handwritten/image-to-text_fp16_config_decoder.json b/examples/recipes/microsoft_trocr-large-handwritten/image-to-text_fp16_config_decoder.json new file mode 100644 index 000000000..4774e0b43 --- /dev/null +++ b/examples/recipes/microsoft_trocr-large-handwritten/image-to-text_fp16_config_decoder.json @@ -0,0 +1,494 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "decoder_input_ids", + "dtype": "int32", + "shape": [ + 1, + 1 + ], + "value_range": [ + 0, + 50265 + ] + }, + { + "name": "encoder_hidden_states", + "dtype": "float32", + "shape": [ + 1, + 577, + 1024 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "decoder_attention_mask", + "dtype": "int64", + "shape": [ + 1, + 512 + ] + }, + { + "name": "cache_position", + "dtype": "int64", + "shape": [ + 1 + ] + }, + { + "name": "past_0_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_0_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_1_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_1_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_2_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_2_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_3_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_3_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_4_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_4_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_5_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_5_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_6_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_6_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_7_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_7_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_8_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_8_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_9_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_9_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_10_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_10_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_11_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_11_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + }, + { + "name": "present_0_key" + }, + { + "name": "present_0_value" + }, + { + "name": "present_1_key" + }, + { + "name": "present_1_value" + }, + { + "name": "present_2_key" + }, + { + "name": "present_2_value" + }, + { + "name": "present_3_key" + }, + { + "name": "present_3_value" + }, + { + "name": "present_4_key" + }, + { + "name": "present_4_value" + }, + { + "name": "present_5_key" + }, + { + "name": "present_5_value" + }, + { + "name": "present_6_key" + }, + { + "name": "present_6_value" + }, + { + "name": "present_7_key" + }, + { + "name": "present_7_value" + }, + { + "name": "present_8_key" + }, + { + "name": "present_8_value" + }, + { + "name": "present_9_key" + }, + { + "name": "present_9_value" + }, + { + "name": "present_10_key" + }, + { + "name": "present_10_value" + }, + { + "name": "present_11_key" + }, + { + "name": "present_11_value" + } + ] + }, + "optim": { + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true, + "remove_isnan_in_attention_mask": true, + "reshape_mergedreshape": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "text2text-generation", + "model_class": "VisionDecoderWrapper", + "model_type": "vision-encoder-decoder" + }, + "eval": { + "task": "image-to-text", + "dataset": { + "path": "Teklia/IAM-line", + "split": "test", + "samples": 500, + "columns_mapping": { + "input_column": "image", + "label_column": "text" + } + } + } +} diff --git a/examples/recipes/microsoft_trocr-large-handwritten/image-to-text_fp16_config_encoder.json b/examples/recipes/microsoft_trocr-large-handwritten/image-to-text_fp16_config_encoder.json new file mode 100644 index 000000000..d17d6eb43 --- /dev/null +++ b/examples/recipes/microsoft_trocr-large-handwritten/image-to-text_fp16_config_encoder.json @@ -0,0 +1,60 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 384, + 384 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "encoder_hidden_states" + } + ] + }, + "optim": { + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true, + "remove_isnan_in_attention_mask": true, + "reshape_mergedreshape": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "image-feature-extraction", + "model_class": "VisionEncoderWrapper", + "model_type": "vision-encoder-decoder" + }, + "eval": { + "task": "image-to-text", + "dataset": { + "path": "Teklia/IAM-line", + "split": "test", + "samples": 500, + "columns_mapping": { + "input_column": "image", + "label_column": "text" + } + } + } +} diff --git a/examples/recipes/microsoft_trocr-large-printed/image-to-text_fp16_config_decoder.json b/examples/recipes/microsoft_trocr-large-printed/image-to-text_fp16_config_decoder.json new file mode 100644 index 000000000..e04d4c021 --- /dev/null +++ b/examples/recipes/microsoft_trocr-large-printed/image-to-text_fp16_config_decoder.json @@ -0,0 +1,494 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "decoder_input_ids", + "dtype": "int32", + "shape": [ + 1, + 1 + ], + "value_range": [ + 0, + 50265 + ] + }, + { + "name": "encoder_hidden_states", + "dtype": "float32", + "shape": [ + 1, + 577, + 1024 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "decoder_attention_mask", + "dtype": "int64", + "shape": [ + 1, + 1024 + ] + }, + { + "name": "cache_position", + "dtype": "int64", + "shape": [ + 1 + ] + }, + { + "name": "past_0_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_0_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_1_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_1_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_2_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_2_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_3_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_3_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_4_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_4_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_5_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_5_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_6_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_6_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_7_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_7_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_8_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_8_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_9_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_9_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_10_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_10_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_11_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_11_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + }, + { + "name": "present_0_key" + }, + { + "name": "present_0_value" + }, + { + "name": "present_1_key" + }, + { + "name": "present_1_value" + }, + { + "name": "present_2_key" + }, + { + "name": "present_2_value" + }, + { + "name": "present_3_key" + }, + { + "name": "present_3_value" + }, + { + "name": "present_4_key" + }, + { + "name": "present_4_value" + }, + { + "name": "present_5_key" + }, + { + "name": "present_5_value" + }, + { + "name": "present_6_key" + }, + { + "name": "present_6_value" + }, + { + "name": "present_7_key" + }, + { + "name": "present_7_value" + }, + { + "name": "present_8_key" + }, + { + "name": "present_8_value" + }, + { + "name": "present_9_key" + }, + { + "name": "present_9_value" + }, + { + "name": "present_10_key" + }, + { + "name": "present_10_value" + }, + { + "name": "present_11_key" + }, + { + "name": "present_11_value" + } + ] + }, + "optim": { + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true, + "remove_isnan_in_attention_mask": true, + "reshape_mergedreshape": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "text2text-generation", + "model_class": "VisionDecoderWrapper", + "model_type": "vision-encoder-decoder" + }, + "eval": { + "task": "image-to-text", + "dataset": { + "path": "priyank-m/SROIE_2019_text_recognition", + "split": "test", + "samples": 200, + "columns_mapping": { + "input_column": "image", + "label_column": "text" + } + } + } +} diff --git a/examples/recipes/microsoft_trocr-large-printed/image-to-text_fp16_config_encoder.json b/examples/recipes/microsoft_trocr-large-printed/image-to-text_fp16_config_encoder.json new file mode 100644 index 000000000..e7a784568 --- /dev/null +++ b/examples/recipes/microsoft_trocr-large-printed/image-to-text_fp16_config_encoder.json @@ -0,0 +1,60 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 384, + 384 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "encoder_hidden_states" + } + ] + }, + "optim": { + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true, + "remove_isnan_in_attention_mask": true, + "reshape_mergedreshape": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "image-feature-extraction", + "model_class": "VisionEncoderWrapper", + "model_type": "vision-encoder-decoder" + }, + "eval": { + "task": "image-to-text", + "dataset": { + "path": "priyank-m/SROIE_2019_text_recognition", + "split": "test", + "samples": 200, + "columns_mapping": { + "input_column": "image", + "label_column": "text" + } + } + } +} diff --git a/examples/recipes/monologg_koelectra-small-v2-distilled-korquad-384/question-answering_fp16_config.json b/examples/recipes/monologg_koelectra-small-v2-distilled-korquad-384/question-answering_fp16_config.json new file mode 100644 index 000000000..6ca3b9a9d --- /dev/null +++ b/examples/recipes/monologg_koelectra-small-v2-distilled-korquad-384/question-answering_fp16_config.json @@ -0,0 +1,80 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 32200 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "start_logits" + }, + { + "name": "end_logits" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "question-answering", + "model_class": "AutoModelForQuestionAnswering", + "model_type": "electra" + }, + "eval": { + "task": "question-answering", + "dataset": { + "path": "KorQuAD/squad_kor_v1", + "split": "validation", + "samples": 100, + "columns_mapping": { + "question_column": "question", + "context_column": "context", + "id_column": "id", + "label_column": "answers" + } + } + } +} diff --git a/examples/recipes/monologg_koelectra-small-v2-distilled-korquad-384/question-answering_w8a16_config.json b/examples/recipes/monologg_koelectra-small-v2-distilled-korquad-384/question-answering_w8a16_config.json new file mode 100644 index 000000000..4af062dc0 --- /dev/null +++ b/examples/recipes/monologg_koelectra-small-v2-distilled-korquad-384/question-answering_w8a16_config.json @@ -0,0 +1,98 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 32200 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "start_logits" + }, + { + "name": "end_logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "question-answering", + "model_name": "monologg/koelectra-small-v2-distilled-korquad-384" + }, + "compile": null, + "loader": { + "task": "question-answering", + "model_class": "AutoModelForQuestionAnswering", + "model_type": "electra" + }, + "eval": { + "task": "question-answering", + "dataset": { + "path": "KorQuAD/squad_kor_v1", + "split": "validation", + "samples": 1000, + "columns_mapping": { + "question_column": "question", + "context_column": "context", + "id_column": "id", + "label_column": "answers" + } + } + } +} diff --git a/examples/recipes/monologg_koelectra-small-v2-distilled-korquad-384/question-answering_w8a8_config.json b/examples/recipes/monologg_koelectra-small-v2-distilled-korquad-384/question-answering_w8a8_config.json new file mode 100644 index 000000000..17a040296 --- /dev/null +++ b/examples/recipes/monologg_koelectra-small-v2-distilled-korquad-384/question-answering_w8a8_config.json @@ -0,0 +1,98 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 32200 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "start_logits" + }, + { + "name": "end_logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "question-answering", + "model_name": "monologg/koelectra-small-v2-distilled-korquad-384" + }, + "compile": null, + "loader": { + "task": "question-answering", + "model_class": "AutoModelForQuestionAnswering", + "model_type": "electra" + }, + "eval": { + "task": "question-answering", + "dataset": { + "path": "KorQuAD/squad_kor_v1", + "split": "validation", + "samples": 1000, + "columns_mapping": { + "question_column": "question", + "context_column": "context", + "id_column": "id", + "label_column": "answers" + } + } + } +} diff --git a/examples/recipes/nvidia_segformer-b1-finetuned-ade-512-512/image-segmentation_fp16_config.json b/examples/recipes/nvidia_segformer-b1-finetuned-ade-512-512/image-segmentation_fp16_config.json new file mode 100644 index 000000000..2c6552143 --- /dev/null +++ b/examples/recipes/nvidia_segformer-b1-finetuned-ade-512-512/image-segmentation_fp16_config.json @@ -0,0 +1,54 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 512, + 512 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": null, + "compile": null, + "loader": { + "task": "image-segmentation", + "model_class": "AutoModelForSemanticSegmentation", + "model_type": "segformer" + }, + "eval": { + "task": "image-segmentation", + "dataset": { + "path": "danjacobellis/scene_parse_150", + "split": "validation", + "samples": 100, + "label_mapping_file": "scripts/e2e_eval/datasets/ade20k_gt_to_model_label.json", + "columns_mapping": { + "annotation_column": "annotation" + } + } + } +} diff --git a/examples/recipes/nvidia_segformer-b1-finetuned-ade-512-512/image-segmentation_w8a16_config.json b/examples/recipes/nvidia_segformer-b1-finetuned-ade-512-512/image-segmentation_w8a16_config.json new file mode 100644 index 000000000..924497cdd --- /dev/null +++ b/examples/recipes/nvidia_segformer-b1-finetuned-ade-512-512/image-segmentation_w8a16_config.json @@ -0,0 +1,71 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 512, + 512 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-segmentation", + "model_name": "nvidia/segformer-b1-finetuned-ade-512-512" + }, + "compile": null, + "loader": { + "task": "image-segmentation", + "model_class": "AutoModelForSemanticSegmentation", + "model_type": "segformer" + }, + "eval": { + "task": "image-segmentation", + "dataset": { + "path": "danjacobellis/scene_parse_150", + "split": "validation", + "samples": 1000, + "label_mapping_file": "scripts/e2e_eval/datasets/ade20k_gt_to_model_label.json", + "columns_mapping": { + "annotation_column": "annotation" + } + } + } +} diff --git a/examples/recipes/nvidia_segformer-b1-finetuned-ade-512-512/image-segmentation_w8a8_config.json b/examples/recipes/nvidia_segformer-b1-finetuned-ade-512-512/image-segmentation_w8a8_config.json new file mode 100644 index 000000000..8c64a9223 --- /dev/null +++ b/examples/recipes/nvidia_segformer-b1-finetuned-ade-512-512/image-segmentation_w8a8_config.json @@ -0,0 +1,71 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 512, + 512 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-segmentation", + "model_name": "nvidia/segformer-b1-finetuned-ade-512-512" + }, + "compile": null, + "loader": { + "task": "image-segmentation", + "model_class": "AutoModelForSemanticSegmentation", + "model_type": "segformer" + }, + "eval": { + "task": "image-segmentation", + "dataset": { + "path": "danjacobellis/scene_parse_150", + "split": "validation", + "samples": 1000, + "label_mapping_file": "scripts/e2e_eval/datasets/ade20k_gt_to_model_label.json", + "columns_mapping": { + "annotation_column": "annotation" + } + } + } +} diff --git a/examples/recipes/nvidia_segformer-b2-finetuned-ade-512-512/image-segmentation_fp16_config.json b/examples/recipes/nvidia_segformer-b2-finetuned-ade-512-512/image-segmentation_fp16_config.json new file mode 100644 index 000000000..2c6552143 --- /dev/null +++ b/examples/recipes/nvidia_segformer-b2-finetuned-ade-512-512/image-segmentation_fp16_config.json @@ -0,0 +1,54 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 512, + 512 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": null, + "compile": null, + "loader": { + "task": "image-segmentation", + "model_class": "AutoModelForSemanticSegmentation", + "model_type": "segformer" + }, + "eval": { + "task": "image-segmentation", + "dataset": { + "path": "danjacobellis/scene_parse_150", + "split": "validation", + "samples": 100, + "label_mapping_file": "scripts/e2e_eval/datasets/ade20k_gt_to_model_label.json", + "columns_mapping": { + "annotation_column": "annotation" + } + } + } +} diff --git a/examples/recipes/nvidia_segformer-b2-finetuned-ade-512-512/image-segmentation_w8a16_config.json b/examples/recipes/nvidia_segformer-b2-finetuned-ade-512-512/image-segmentation_w8a16_config.json new file mode 100644 index 000000000..0848f1fc8 --- /dev/null +++ b/examples/recipes/nvidia_segformer-b2-finetuned-ade-512-512/image-segmentation_w8a16_config.json @@ -0,0 +1,71 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 512, + 512 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-segmentation", + "model_name": "nvidia/segformer-b2-finetuned-ade-512-512" + }, + "compile": null, + "loader": { + "task": "image-segmentation", + "model_class": "AutoModelForSemanticSegmentation", + "model_type": "segformer" + }, + "eval": { + "task": "image-segmentation", + "dataset": { + "path": "danjacobellis/scene_parse_150", + "split": "validation", + "samples": 1000, + "label_mapping_file": "scripts/e2e_eval/datasets/ade20k_gt_to_model_label.json", + "columns_mapping": { + "annotation_column": "annotation" + } + } + } +} diff --git a/examples/recipes/nvidia_segformer-b2-finetuned-ade-512-512/image-segmentation_w8a8_config.json b/examples/recipes/nvidia_segformer-b2-finetuned-ade-512-512/image-segmentation_w8a8_config.json new file mode 100644 index 000000000..610a2173a --- /dev/null +++ b/examples/recipes/nvidia_segformer-b2-finetuned-ade-512-512/image-segmentation_w8a8_config.json @@ -0,0 +1,71 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 512, + 512 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-segmentation", + "model_name": "nvidia/segformer-b2-finetuned-ade-512-512" + }, + "compile": null, + "loader": { + "task": "image-segmentation", + "model_class": "AutoModelForSemanticSegmentation", + "model_type": "segformer" + }, + "eval": { + "task": "image-segmentation", + "dataset": { + "path": "danjacobellis/scene_parse_150", + "split": "validation", + "samples": 1000, + "label_mapping_file": "scripts/e2e_eval/datasets/ade20k_gt_to_model_label.json", + "columns_mapping": { + "annotation_column": "annotation" + } + } + } +} diff --git a/examples/recipes/nvidia_segformer-b5-finetuned-ade-640-640/image-segmentation_fp16_config.json b/examples/recipes/nvidia_segformer-b5-finetuned-ade-640-640/image-segmentation_fp16_config.json new file mode 100644 index 000000000..85a882f08 --- /dev/null +++ b/examples/recipes/nvidia_segformer-b5-finetuned-ade-640-640/image-segmentation_fp16_config.json @@ -0,0 +1,54 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 640, + 640 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": null, + "compile": null, + "loader": { + "task": "image-segmentation", + "model_class": "AutoModelForSemanticSegmentation", + "model_type": "segformer" + }, + "eval": { + "task": "image-segmentation", + "dataset": { + "path": "danjacobellis/scene_parse_150", + "split": "validation", + "samples": 100, + "label_mapping_file": "scripts/e2e_eval/datasets/ade20k_gt_to_model_label.json", + "columns_mapping": { + "annotation_column": "annotation" + } + } + } +} diff --git a/examples/recipes/nvidia_segformer-b5-finetuned-ade-640-640/image-segmentation_w8a16_config.json b/examples/recipes/nvidia_segformer-b5-finetuned-ade-640-640/image-segmentation_w8a16_config.json new file mode 100644 index 000000000..dd5c11a96 --- /dev/null +++ b/examples/recipes/nvidia_segformer-b5-finetuned-ade-640-640/image-segmentation_w8a16_config.json @@ -0,0 +1,71 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 640, + 640 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-segmentation", + "model_name": "nvidia/segformer-b5-finetuned-ade-640-640" + }, + "compile": null, + "loader": { + "task": "image-segmentation", + "model_class": "AutoModelForSemanticSegmentation", + "model_type": "segformer" + }, + "eval": { + "task": "image-segmentation", + "dataset": { + "path": "danjacobellis/scene_parse_150", + "split": "validation", + "samples": 1000, + "label_mapping_file": "scripts/e2e_eval/datasets/ade20k_gt_to_model_label.json", + "columns_mapping": { + "annotation_column": "annotation" + } + } + } +} diff --git a/examples/recipes/nvidia_segformer-b5-finetuned-ade-640-640/image-segmentation_w8a8_config.json b/examples/recipes/nvidia_segformer-b5-finetuned-ade-640-640/image-segmentation_w8a8_config.json new file mode 100644 index 000000000..80894222b --- /dev/null +++ b/examples/recipes/nvidia_segformer-b5-finetuned-ade-640-640/image-segmentation_w8a8_config.json @@ -0,0 +1,71 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 640, + 640 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-segmentation", + "model_name": "nvidia/segformer-b5-finetuned-ade-640-640" + }, + "compile": null, + "loader": { + "task": "image-segmentation", + "model_class": "AutoModelForSemanticSegmentation", + "model_type": "segformer" + }, + "eval": { + "task": "image-segmentation", + "dataset": { + "path": "danjacobellis/scene_parse_150", + "split": "validation", + "samples": 1000, + "label_mapping_file": "scripts/e2e_eval/datasets/ade20k_gt_to_model_label.json", + "columns_mapping": { + "annotation_column": "annotation" + } + } + } +} diff --git a/examples/recipes/openai_clip-vit-base-patch16/feature-extraction_fp16_config.json b/examples/recipes/openai_clip-vit-base-patch16/feature-extraction_fp16_config.json index 5a186260f..226d77d36 100644 --- a/examples/recipes/openai_clip-vit-base-patch16/feature-extraction_fp16_config.json +++ b/examples/recipes/openai_clip-vit-base-patch16/feature-extraction_fp16_config.json @@ -61,6 +61,7 @@ "dataset": { "path": "mteb/stsbenchmark-sts", "split": "test", + "samples": 100, "columns_mapping": { "input_column_1": "sentence1", "input_column_2": "sentence2", diff --git a/examples/recipes/openai_clip-vit-base-patch16/feature-extraction_w8a16_config.json b/examples/recipes/openai_clip-vit-base-patch16/feature-extraction_w8a16_config.json index 98323e872..5745e9ca0 100644 --- a/examples/recipes/openai_clip-vit-base-patch16/feature-extraction_w8a16_config.json +++ b/examples/recipes/openai_clip-vit-base-patch16/feature-extraction_w8a16_config.json @@ -78,6 +78,7 @@ "dataset": { "path": "mteb/stsbenchmark-sts", "split": "test", + "samples": 1000, "columns_mapping": { "input_column_1": "sentence1", "input_column_2": "sentence2", diff --git a/examples/recipes/openai_clip-vit-base-patch16/feature-extraction_w8a8_config.json b/examples/recipes/openai_clip-vit-base-patch16/feature-extraction_w8a8_config.json index ae40a470a..ed19dcf30 100644 --- a/examples/recipes/openai_clip-vit-base-patch16/feature-extraction_w8a8_config.json +++ b/examples/recipes/openai_clip-vit-base-patch16/feature-extraction_w8a8_config.json @@ -78,6 +78,7 @@ "dataset": { "path": "mteb/stsbenchmark-sts", "split": "test", + "samples": 1000, "columns_mapping": { "input_column_1": "sentence1", "input_column_2": "sentence2", diff --git a/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_fp16_config_image-encoder.json b/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_fp16_config_image-encoder.json new file mode 100644 index 000000000..d16a0697d --- /dev/null +++ b/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_fp16_config_image-encoder.json @@ -0,0 +1,62 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "image_embeds" + }, + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true, + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "image-feature-extraction", + "model_class": "CLIPVisionModelWithProjection", + "model_type": "clip_vision_model" + }, + "eval": { + "task": "zero-shot-image-classification", + "dataset": { + "path": "uoft-cs/cifar100", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "img", + "label_column": "fine_label" + } + } + } +} diff --git a/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_fp16_config_text-encoder.json b/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_fp16_config_text-encoder.json new file mode 100644 index 000000000..b32582151 --- /dev/null +++ b/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_fp16_config_text-encoder.json @@ -0,0 +1,72 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 77 + ], + "value_range": [ + 0, + 49408 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 77 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "text_embeds" + }, + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true, + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "CLIPTextModelWithProjection", + "model_type": "clip_text_model" + }, + "eval": { + "task": "zero-shot-image-classification", + "dataset": { + "path": "uoft-cs/cifar100", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "img", + "label_column": "fine_label" + } + } + } +} diff --git a/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_w8a16_config_image-encoder.json b/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_w8a16_config_image-encoder.json new file mode 100644 index 000000000..cf8ede994 --- /dev/null +++ b/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_w8a16_config_image-encoder.json @@ -0,0 +1,79 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "image_embeds" + }, + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true, + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-feature-extraction", + "model_name": "openai/clip-vit-base-patch16" + }, + "compile": null, + "loader": { + "task": "image-feature-extraction", + "model_class": "CLIPVisionModelWithProjection", + "model_type": "clip_vision_model" + }, + "eval": { + "task": "zero-shot-image-classification", + "dataset": { + "path": "uoft-cs/cifar100", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column": "img", + "label_column": "fine_label" + } + } + } +} diff --git a/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_w8a16_config_text-encoder.json b/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_w8a16_config_text-encoder.json new file mode 100644 index 000000000..6b569997c --- /dev/null +++ b/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_w8a16_config_text-encoder.json @@ -0,0 +1,89 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 77 + ], + "value_range": [ + 0, + 49408 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 77 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "text_embeds" + }, + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true, + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "feature-extraction", + "model_name": "openai/clip-vit-base-patch16" + }, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "CLIPTextModelWithProjection", + "model_type": "clip_text_model" + }, + "eval": { + "task": "zero-shot-image-classification", + "dataset": { + "path": "uoft-cs/cifar100", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column": "img", + "label_column": "fine_label" + } + } + } +} diff --git a/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_w8a8_config_image-encoder.json b/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_w8a8_config_image-encoder.json new file mode 100644 index 000000000..0108d5344 --- /dev/null +++ b/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_w8a8_config_image-encoder.json @@ -0,0 +1,79 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "image_embeds" + }, + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true, + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-feature-extraction", + "model_name": "openai/clip-vit-base-patch16" + }, + "compile": null, + "loader": { + "task": "image-feature-extraction", + "model_class": "CLIPVisionModelWithProjection", + "model_type": "clip_vision_model" + }, + "eval": { + "task": "zero-shot-image-classification", + "dataset": { + "path": "uoft-cs/cifar100", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column": "img", + "label_column": "fine_label" + } + } + } +} diff --git a/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_w8a8_config_text-encoder.json b/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_w8a8_config_text-encoder.json new file mode 100644 index 000000000..6b379566e --- /dev/null +++ b/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_w8a8_config_text-encoder.json @@ -0,0 +1,89 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 77 + ], + "value_range": [ + 0, + 49408 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 77 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "text_embeds" + }, + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true, + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "feature-extraction", + "model_name": "openai/clip-vit-base-patch16" + }, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "CLIPTextModelWithProjection", + "model_type": "clip_text_model" + }, + "eval": { + "task": "zero-shot-image-classification", + "dataset": { + "path": "uoft-cs/cifar100", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column": "img", + "label_column": "fine_label" + } + } + } +} diff --git a/examples/recipes/openai_clip-vit-base-patch32/feature-extraction_fp16_config.json b/examples/recipes/openai_clip-vit-base-patch32/feature-extraction_fp16_config.json new file mode 100644 index 000000000..226d77d36 --- /dev/null +++ b/examples/recipes/openai_clip-vit-base-patch32/feature-extraction_fp16_config.json @@ -0,0 +1,72 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 77 + ], + "value_range": [ + 0, + 49408 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 77 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "text_embeds" + }, + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true, + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true + }, + "quant": null, + "loader": { + "task": "feature-extraction", + "model_class": "CLIPTextModelWithProjection", + "model_type": "clip_text_model" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/openai_clip-vit-base-patch32/feature-extraction_w8a16_config.json b/examples/recipes/openai_clip-vit-base-patch32/feature-extraction_w8a16_config.json new file mode 100644 index 000000000..0dde45b25 --- /dev/null +++ b/examples/recipes/openai_clip-vit-base-patch32/feature-extraction_w8a16_config.json @@ -0,0 +1,89 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 77 + ], + "value_range": [ + 0, + 49408 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 77 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "text_embeds" + }, + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true, + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "feature-extraction", + "model_name": "openai/clip-vit-base-patch32" + }, + "loader": { + "task": "feature-extraction", + "model_class": "CLIPTextModelWithProjection", + "model_type": "clip_text_model" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/openai_clip-vit-base-patch32/feature-extraction_w8a8_config.json b/examples/recipes/openai_clip-vit-base-patch32/feature-extraction_w8a8_config.json new file mode 100644 index 000000000..b4d99249e --- /dev/null +++ b/examples/recipes/openai_clip-vit-base-patch32/feature-extraction_w8a8_config.json @@ -0,0 +1,89 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 77 + ], + "value_range": [ + 0, + 49408 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 77 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "text_embeds" + }, + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true, + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "feature-extraction", + "model_name": "openai/clip-vit-base-patch32" + }, + "loader": { + "task": "feature-extraction", + "model_class": "CLIPTextModelWithProjection", + "model_type": "clip_text_model" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_fp16_config_image-encoder.json b/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_fp16_config_image-encoder.json new file mode 100644 index 000000000..04c51f130 --- /dev/null +++ b/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_fp16_config_image-encoder.json @@ -0,0 +1,62 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 336, + 336 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "image_embeds" + }, + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true, + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "image-feature-extraction", + "model_class": "CLIPVisionModelWithProjection", + "model_type": "clip_vision_model" + }, + "eval": { + "task": "zero-shot-image-classification", + "dataset": { + "path": "uoft-cs/cifar100", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "img", + "label_column": "fine_label" + } + } + } +} diff --git a/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_fp16_config_text-encoder.json b/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_fp16_config_text-encoder.json new file mode 100644 index 000000000..b32582151 --- /dev/null +++ b/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_fp16_config_text-encoder.json @@ -0,0 +1,72 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 77 + ], + "value_range": [ + 0, + 49408 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 77 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "text_embeds" + }, + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true, + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "CLIPTextModelWithProjection", + "model_type": "clip_text_model" + }, + "eval": { + "task": "zero-shot-image-classification", + "dataset": { + "path": "uoft-cs/cifar100", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "img", + "label_column": "fine_label" + } + } + } +} diff --git a/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_w8a16_config_image-encoder.json b/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_w8a16_config_image-encoder.json new file mode 100644 index 000000000..c10bff087 --- /dev/null +++ b/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_w8a16_config_image-encoder.json @@ -0,0 +1,79 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 336, + 336 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "image_embeds" + }, + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true, + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-feature-extraction", + "model_name": "openai/clip-vit-large-patch14-336" + }, + "compile": null, + "loader": { + "task": "image-feature-extraction", + "model_class": "CLIPVisionModelWithProjection", + "model_type": "clip_vision_model" + }, + "eval": { + "task": "zero-shot-image-classification", + "dataset": { + "path": "uoft-cs/cifar100", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column": "img", + "label_column": "fine_label" + } + } + } +} diff --git a/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_w8a16_config_text-encoder.json b/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_w8a16_config_text-encoder.json new file mode 100644 index 000000000..a3591f781 --- /dev/null +++ b/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_w8a16_config_text-encoder.json @@ -0,0 +1,89 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 77 + ], + "value_range": [ + 0, + 49408 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 77 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "text_embeds" + }, + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true, + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "feature-extraction", + "model_name": "openai/clip-vit-large-patch14-336" + }, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "CLIPTextModelWithProjection", + "model_type": "clip_text_model" + }, + "eval": { + "task": "zero-shot-image-classification", + "dataset": { + "path": "uoft-cs/cifar100", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column": "img", + "label_column": "fine_label" + } + } + } +} diff --git a/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_w8a8_config_image-encoder.json b/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_w8a8_config_image-encoder.json new file mode 100644 index 000000000..bb7016c8c --- /dev/null +++ b/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_w8a8_config_image-encoder.json @@ -0,0 +1,79 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 336, + 336 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "image_embeds" + }, + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true, + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-feature-extraction", + "model_name": "openai/clip-vit-large-patch14-336" + }, + "compile": null, + "loader": { + "task": "image-feature-extraction", + "model_class": "CLIPVisionModelWithProjection", + "model_type": "clip_vision_model" + }, + "eval": { + "task": "zero-shot-image-classification", + "dataset": { + "path": "uoft-cs/cifar100", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column": "img", + "label_column": "fine_label" + } + } + } +} diff --git a/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_w8a8_config_text-encoder.json b/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_w8a8_config_text-encoder.json new file mode 100644 index 000000000..bd2d5928a --- /dev/null +++ b/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_w8a8_config_text-encoder.json @@ -0,0 +1,89 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 77 + ], + "value_range": [ + 0, + 49408 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 77 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "text_embeds" + }, + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true, + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "feature-extraction", + "model_name": "openai/clip-vit-large-patch14-336" + }, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "CLIPTextModelWithProjection", + "model_type": "clip_text_model" + }, + "eval": { + "task": "zero-shot-image-classification", + "dataset": { + "path": "uoft-cs/cifar100", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column": "img", + "label_column": "fine_label" + } + } + } +} diff --git a/examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_fp16_config_image-encoder.json b/examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_fp16_config_image-encoder.json new file mode 100644 index 000000000..d16a0697d --- /dev/null +++ b/examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_fp16_config_image-encoder.json @@ -0,0 +1,62 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "image_embeds" + }, + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true, + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "image-feature-extraction", + "model_class": "CLIPVisionModelWithProjection", + "model_type": "clip_vision_model" + }, + "eval": { + "task": "zero-shot-image-classification", + "dataset": { + "path": "uoft-cs/cifar100", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "img", + "label_column": "fine_label" + } + } + } +} diff --git a/examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_fp16_config_text-encoder.json b/examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_fp16_config_text-encoder.json new file mode 100644 index 000000000..b32582151 --- /dev/null +++ b/examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_fp16_config_text-encoder.json @@ -0,0 +1,72 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 77 + ], + "value_range": [ + 0, + 49408 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 77 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "text_embeds" + }, + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true, + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "CLIPTextModelWithProjection", + "model_type": "clip_text_model" + }, + "eval": { + "task": "zero-shot-image-classification", + "dataset": { + "path": "uoft-cs/cifar100", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "img", + "label_column": "fine_label" + } + } + } +} diff --git a/examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_w8a16_config_image-encoder.json b/examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_w8a16_config_image-encoder.json new file mode 100644 index 000000000..e6236da7e --- /dev/null +++ b/examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_w8a16_config_image-encoder.json @@ -0,0 +1,79 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "image_embeds" + }, + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true, + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-feature-extraction", + "model_name": "openai/clip-vit-large-patch14" + }, + "compile": null, + "loader": { + "task": "image-feature-extraction", + "model_class": "CLIPVisionModelWithProjection", + "model_type": "clip_vision_model" + }, + "eval": { + "task": "zero-shot-image-classification", + "dataset": { + "path": "uoft-cs/cifar100", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column": "img", + "label_column": "fine_label" + } + } + } +} diff --git a/examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_w8a16_config_text-encoder.json b/examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_w8a16_config_text-encoder.json new file mode 100644 index 000000000..222a26f34 --- /dev/null +++ b/examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_w8a16_config_text-encoder.json @@ -0,0 +1,89 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 77 + ], + "value_range": [ + 0, + 49408 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 77 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "text_embeds" + }, + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true, + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "feature-extraction", + "model_name": "openai/clip-vit-large-patch14" + }, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "CLIPTextModelWithProjection", + "model_type": "clip_text_model" + }, + "eval": { + "task": "zero-shot-image-classification", + "dataset": { + "path": "uoft-cs/cifar100", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column": "img", + "label_column": "fine_label" + } + } + } +} diff --git a/examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_w8a8_config_image-encoder.json b/examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_w8a8_config_image-encoder.json new file mode 100644 index 000000000..fbf2c7ef6 --- /dev/null +++ b/examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_w8a8_config_image-encoder.json @@ -0,0 +1,79 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "image_embeds" + }, + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true, + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-feature-extraction", + "model_name": "openai/clip-vit-large-patch14" + }, + "compile": null, + "loader": { + "task": "image-feature-extraction", + "model_class": "CLIPVisionModelWithProjection", + "model_type": "clip_vision_model" + }, + "eval": { + "task": "zero-shot-image-classification", + "dataset": { + "path": "uoft-cs/cifar100", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column": "img", + "label_column": "fine_label" + } + } + } +} diff --git a/examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_w8a8_config_text-encoder.json b/examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_w8a8_config_text-encoder.json new file mode 100644 index 000000000..f9a9045b5 --- /dev/null +++ b/examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_w8a8_config_text-encoder.json @@ -0,0 +1,89 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 77 + ], + "value_range": [ + 0, + 49408 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 77 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "text_embeds" + }, + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true, + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "feature-extraction", + "model_name": "openai/clip-vit-large-patch14" + }, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "CLIPTextModelWithProjection", + "model_type": "clip_text_model" + }, + "eval": { + "task": "zero-shot-image-classification", + "dataset": { + "path": "uoft-cs/cifar100", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column": "img", + "label_column": "fine_label" + } + } + } +} diff --git a/examples/recipes/rizvandwiki_gender-classification/image-classification_fp16_config.json b/examples/recipes/rizvandwiki_gender-classification/image-classification_fp16_config.json new file mode 100644 index 000000000..ef5ac83f3 --- /dev/null +++ b/examples/recipes/rizvandwiki_gender-classification/image-classification_fp16_config.json @@ -0,0 +1,52 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "vit" + }, + "eval": { + "task": "image-classification", + "dataset": { + "samples": 100, + "path": "~/.cache/winml/eval_datasets/build_fairface", + "columns_mapping": { + "label_column": "gender" + }, + "build_script": "scripts/e2e_eval/datasets/build_fairface.py" + } + } +} diff --git a/examples/recipes/rizvandwiki_gender-classification/image-classification_w8a16_config.json b/examples/recipes/rizvandwiki_gender-classification/image-classification_w8a16_config.json new file mode 100644 index 000000000..2e43e5ab2 --- /dev/null +++ b/examples/recipes/rizvandwiki_gender-classification/image-classification_w8a16_config.json @@ -0,0 +1,69 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-classification", + "model_name": "rizvandwiki/gender-classification" + }, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "vit" + }, + "eval": { + "task": "image-classification", + "dataset": { + "samples": 1000, + "path": "~/.cache/winml/eval_datasets/build_fairface", + "columns_mapping": { + "label_column": "gender" + }, + "build_script": "scripts/e2e_eval/datasets/build_fairface.py" + } + } +} diff --git a/examples/recipes/rizvandwiki_gender-classification/image-classification_w8a8_config.json b/examples/recipes/rizvandwiki_gender-classification/image-classification_w8a8_config.json new file mode 100644 index 000000000..897fc4975 --- /dev/null +++ b/examples/recipes/rizvandwiki_gender-classification/image-classification_w8a8_config.json @@ -0,0 +1,69 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-classification", + "model_name": "rizvandwiki/gender-classification" + }, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "vit" + }, + "eval": { + "task": "image-classification", + "dataset": { + "samples": 1000, + "path": "~/.cache/winml/eval_datasets/build_fairface", + "columns_mapping": { + "label_column": "gender" + }, + "build_script": "scripts/e2e_eval/datasets/build_fairface.py" + } + } +} diff --git a/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/feature-extraction_fp16_config.json b/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/feature-extraction_fp16_config.json index ba6575a35..ff77c9eb3 100644 --- a/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/feature-extraction_fp16_config.json +++ b/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/feature-extraction_fp16_config.json @@ -67,6 +67,7 @@ "dataset": { "path": "mteb/stsbenchmark-sts", "split": "test", + "samples": 100, "columns_mapping": { "input_column_1": "sentence1", "input_column_2": "sentence2", diff --git a/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/feature-extraction_w8a16_config.json b/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/feature-extraction_w8a16_config.json index d05456800..77ccc0498 100644 --- a/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/feature-extraction_w8a16_config.json +++ b/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/feature-extraction_w8a16_config.json @@ -84,6 +84,7 @@ "dataset": { "path": "mteb/stsbenchmark-sts", "split": "test", + "samples": 1000, "columns_mapping": { "input_column_1": "sentence1", "input_column_2": "sentence2", diff --git a/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/feature-extraction_w8a8_config.json b/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/feature-extraction_w8a8_config.json index c7de443dc..1fe2412af 100644 --- a/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/feature-extraction_w8a8_config.json +++ b/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/feature-extraction_w8a8_config.json @@ -84,6 +84,7 @@ "dataset": { "path": "mteb/stsbenchmark-sts", "split": "test", + "samples": 1000, "columns_mapping": { "input_column_1": "sentence1", "input_column_2": "sentence2", diff --git a/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/sentence-similarity_fp16_config.json b/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/sentence-similarity_fp16_config.json index eed96889f..30f1521ce 100644 --- a/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/sentence-similarity_fp16_config.json +++ b/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/sentence-similarity_fp16_config.json @@ -67,6 +67,7 @@ "dataset": { "path": "mteb/stsbenchmark-sts", "split": "test", + "samples": 100, "columns_mapping": { "input_column_1": "sentence1", "input_column_2": "sentence2", diff --git a/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/sentence-similarity_w8a16_config.json b/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/sentence-similarity_w8a16_config.json index 49c2bb8b8..28962bc74 100644 --- a/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/sentence-similarity_w8a16_config.json +++ b/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/sentence-similarity_w8a16_config.json @@ -84,6 +84,7 @@ "dataset": { "path": "mteb/stsbenchmark-sts", "split": "test", + "samples": 1000, "columns_mapping": { "input_column_1": "sentence1", "input_column_2": "sentence2", diff --git a/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/sentence-similarity_w8a8_config.json b/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/sentence-similarity_w8a8_config.json index 29534eb50..212204b0d 100644 --- a/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/sentence-similarity_w8a8_config.json +++ b/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/sentence-similarity_w8a8_config.json @@ -84,6 +84,7 @@ "dataset": { "path": "mteb/stsbenchmark-sts", "split": "test", + "samples": 1000, "columns_mapping": { "input_column_1": "sentence1", "input_column_2": "sentence2", diff --git a/examples/recipes/sentence-transformers_all-mpnet-base-v2/feature-extraction_fp16_config.json b/examples/recipes/sentence-transformers_all-mpnet-base-v2/feature-extraction_fp16_config.json new file mode 100644 index 000000000..e731b2a2a --- /dev/null +++ b/examples/recipes/sentence-transformers_all-mpnet-base-v2/feature-extraction_fp16_config.json @@ -0,0 +1,64 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30527 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "feature-extraction", + "model_class": "AutoModel", + "model_type": "mpnet" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/sentence-transformers_all-mpnet-base-v2/feature-extraction_w8a16_config.json b/examples/recipes/sentence-transformers_all-mpnet-base-v2/feature-extraction_w8a16_config.json new file mode 100644 index 000000000..b88600555 --- /dev/null +++ b/examples/recipes/sentence-transformers_all-mpnet-base-v2/feature-extraction_w8a16_config.json @@ -0,0 +1,82 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30527 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "feature-extraction", + "model_name": "sentence-transformers/all-mpnet-base-v2" + }, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "AutoModel", + "model_type": "mpnet" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/sentence-transformers_all-mpnet-base-v2/feature-extraction_w8a8_config.json b/examples/recipes/sentence-transformers_all-mpnet-base-v2/feature-extraction_w8a8_config.json new file mode 100644 index 000000000..9c62ef765 --- /dev/null +++ b/examples/recipes/sentence-transformers_all-mpnet-base-v2/feature-extraction_w8a8_config.json @@ -0,0 +1,82 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30527 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "feature-extraction", + "model_name": "sentence-transformers/all-mpnet-base-v2" + }, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "AutoModel", + "model_type": "mpnet" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/sentence-transformers_all-mpnet-base-v2/sentence-similarity_fp16_config.json b/examples/recipes/sentence-transformers_all-mpnet-base-v2/sentence-similarity_fp16_config.json new file mode 100644 index 000000000..25b22809c --- /dev/null +++ b/examples/recipes/sentence-transformers_all-mpnet-base-v2/sentence-similarity_fp16_config.json @@ -0,0 +1,64 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30527 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "sentence-similarity", + "model_class": "AutoModel", + "model_type": "mpnet" + }, + "eval": { + "task": "sentence-similarity", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/sentence-transformers_all-mpnet-base-v2/sentence-similarity_w8a16_config.json b/examples/recipes/sentence-transformers_all-mpnet-base-v2/sentence-similarity_w8a16_config.json new file mode 100644 index 000000000..345311097 --- /dev/null +++ b/examples/recipes/sentence-transformers_all-mpnet-base-v2/sentence-similarity_w8a16_config.json @@ -0,0 +1,82 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30527 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "sentence-similarity", + "model_name": "sentence-transformers/all-mpnet-base-v2" + }, + "compile": null, + "loader": { + "task": "sentence-similarity", + "model_class": "AutoModel", + "model_type": "mpnet" + }, + "eval": { + "task": "sentence-similarity", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/sentence-transformers_all-mpnet-base-v2/sentence-similarity_w8a8_config.json b/examples/recipes/sentence-transformers_all-mpnet-base-v2/sentence-similarity_w8a8_config.json new file mode 100644 index 000000000..f50018ec8 --- /dev/null +++ b/examples/recipes/sentence-transformers_all-mpnet-base-v2/sentence-similarity_w8a8_config.json @@ -0,0 +1,82 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30527 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "sentence-similarity", + "model_name": "sentence-transformers/all-mpnet-base-v2" + }, + "compile": null, + "loader": { + "task": "sentence-similarity", + "model_class": "AutoModel", + "model_type": "mpnet" + }, + "eval": { + "task": "sentence-similarity", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/feature-extraction_fp16_config.json b/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/feature-extraction_fp16_config.json new file mode 100644 index 000000000..e731b2a2a --- /dev/null +++ b/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/feature-extraction_fp16_config.json @@ -0,0 +1,64 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30527 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "feature-extraction", + "model_class": "AutoModel", + "model_type": "mpnet" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/feature-extraction_w8a16_config.json b/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/feature-extraction_w8a16_config.json new file mode 100644 index 000000000..e00cce6f7 --- /dev/null +++ b/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/feature-extraction_w8a16_config.json @@ -0,0 +1,82 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30527 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "feature-extraction", + "model_name": "sentence-transformers/multi-qa-mpnet-base-dot-v1" + }, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "AutoModel", + "model_type": "mpnet" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/feature-extraction_w8a8_config.json b/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/feature-extraction_w8a8_config.json new file mode 100644 index 000000000..8c46e4b7d --- /dev/null +++ b/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/feature-extraction_w8a8_config.json @@ -0,0 +1,82 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30527 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "feature-extraction", + "model_name": "sentence-transformers/multi-qa-mpnet-base-dot-v1" + }, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "AutoModel", + "model_type": "mpnet" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/sentence-similarity_fp16_config.json b/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/sentence-similarity_fp16_config.json new file mode 100644 index 000000000..25b22809c --- /dev/null +++ b/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/sentence-similarity_fp16_config.json @@ -0,0 +1,64 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30527 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "sentence-similarity", + "model_class": "AutoModel", + "model_type": "mpnet" + }, + "eval": { + "task": "sentence-similarity", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/sentence-similarity_w8a16_config.json b/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/sentence-similarity_w8a16_config.json new file mode 100644 index 000000000..500b5ff1a --- /dev/null +++ b/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/sentence-similarity_w8a16_config.json @@ -0,0 +1,82 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30527 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "sentence-similarity", + "model_name": "sentence-transformers/multi-qa-mpnet-base-dot-v1" + }, + "compile": null, + "loader": { + "task": "sentence-similarity", + "model_class": "AutoModel", + "model_type": "mpnet" + }, + "eval": { + "task": "sentence-similarity", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/sentence-similarity_w8a8_config.json b/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/sentence-similarity_w8a8_config.json new file mode 100644 index 000000000..f6dde153b --- /dev/null +++ b/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/sentence-similarity_w8a8_config.json @@ -0,0 +1,82 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30527 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "sentence-similarity", + "model_name": "sentence-transformers/multi-qa-mpnet-base-dot-v1" + }, + "compile": null, + "loader": { + "task": "sentence-similarity", + "model_class": "AutoModel", + "model_type": "mpnet" + }, + "eval": { + "task": "sentence-similarity", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/feature-extraction_fp16_config.json b/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/feature-extraction_fp16_config.json new file mode 100644 index 000000000..65f2713df --- /dev/null +++ b/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/feature-extraction_fp16_config.json @@ -0,0 +1,79 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 250037 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "AutoModel", + "model_type": "bert" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/feature-extraction_w8a16_config.json b/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/feature-extraction_w8a16_config.json new file mode 100644 index 000000000..16ab4d489 --- /dev/null +++ b/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/feature-extraction_w8a16_config.json @@ -0,0 +1,96 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 250037 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "feature-extraction", + "model_name": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" + }, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "AutoModel", + "model_type": "bert" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/feature-extraction_w8a8_config.json b/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/feature-extraction_w8a8_config.json new file mode 100644 index 000000000..c57c8ff8a --- /dev/null +++ b/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/feature-extraction_w8a8_config.json @@ -0,0 +1,96 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 250037 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "feature-extraction", + "model_name": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" + }, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "AutoModel", + "model_type": "bert" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/sentence-similarity_fp16_config.json b/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/sentence-similarity_fp16_config.json new file mode 100644 index 000000000..daea4950c --- /dev/null +++ b/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/sentence-similarity_fp16_config.json @@ -0,0 +1,78 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 250037 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "loader": { + "task": "sentence-similarity", + "model_class": "AutoModel", + "model_type": "bert" + }, + "eval": { + "task": "sentence-similarity", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/sentence-similarity_w8a16_config.json b/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/sentence-similarity_w8a16_config.json new file mode 100644 index 000000000..cdb314043 --- /dev/null +++ b/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/sentence-similarity_w8a16_config.json @@ -0,0 +1,95 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 250037 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "sentence-similarity", + "model_name": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" + }, + "loader": { + "task": "sentence-similarity", + "model_class": "AutoModel", + "model_type": "bert" + }, + "eval": { + "task": "sentence-similarity", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/sentence-similarity_w8a8_config.json b/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/sentence-similarity_w8a8_config.json new file mode 100644 index 000000000..9429b4790 --- /dev/null +++ b/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/sentence-similarity_w8a8_config.json @@ -0,0 +1,95 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 250037 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "sentence-similarity", + "model_name": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" + }, + "loader": { + "task": "sentence-similarity", + "model_class": "AutoModel", + "model_type": "bert" + }, + "eval": { + "task": "sentence-similarity", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/sentence-similarity_fp16_config.json b/examples/recipes/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/sentence-similarity_fp16_config.json new file mode 100644 index 000000000..92b6ae628 --- /dev/null +++ b/examples/recipes/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/sentence-similarity_fp16_config.json @@ -0,0 +1,66 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 250002 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "loader": { + "task": "sentence-similarity", + "model_class": "AutoModel", + "model_type": "xlm-roberta" + }, + "eval": { + "task": "sentence-similarity", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/sentence-similarity_w8a16_config.json b/examples/recipes/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/sentence-similarity_w8a16_config.json new file mode 100644 index 000000000..8cba69ee4 --- /dev/null +++ b/examples/recipes/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/sentence-similarity_w8a16_config.json @@ -0,0 +1,83 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 250002 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "sentence-similarity", + "model_name": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2" + }, + "loader": { + "task": "sentence-similarity", + "model_class": "AutoModel", + "model_type": "xlm-roberta" + }, + "eval": { + "task": "sentence-similarity", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/sentence-similarity_w8a8_config.json b/examples/recipes/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/sentence-similarity_w8a8_config.json new file mode 100644 index 000000000..af6827752 --- /dev/null +++ b/examples/recipes/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/sentence-similarity_w8a8_config.json @@ -0,0 +1,83 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 250002 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "sentence-similarity", + "model_name": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2" + }, + "loader": { + "task": "sentence-similarity", + "model_class": "AutoModel", + "model_type": "xlm-roberta" + }, + "eval": { + "task": "sentence-similarity", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/w11wo_indonesian-roberta-base-posp-tagger/token-classification_fp16_config.json b/examples/recipes/w11wo_indonesian-roberta-base-posp-tagger/token-classification_fp16_config.json new file mode 100644 index 000000000..8d02715fe --- /dev/null +++ b/examples/recipes/w11wo_indonesian-roberta-base-posp-tagger/token-classification_fp16_config.json @@ -0,0 +1,64 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 50265 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "loader": { + "task": "token-classification", + "model_class": "AutoModelForTokenClassification", + "model_type": "roberta" + }, + "eval": { + "task": "token-classification", + "dataset": { + "samples": 100, + "path": "~/.cache/winml/eval_datasets/build_indonlu_posp", + "columns_mapping": { + "label_column": "pos_tags" + }, + "build_script": "scripts/e2e_eval/datasets/build_indonlu_posp.py" + } + } +} diff --git a/examples/recipes/w11wo_indonesian-roberta-base-posp-tagger/token-classification_w8a16_config.json b/examples/recipes/w11wo_indonesian-roberta-base-posp-tagger/token-classification_w8a16_config.json new file mode 100644 index 000000000..d00b21020 --- /dev/null +++ b/examples/recipes/w11wo_indonesian-roberta-base-posp-tagger/token-classification_w8a16_config.json @@ -0,0 +1,81 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 50265 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "token-classification", + "model_name": "w11wo/indonesian-roberta-base-posp-tagger" + }, + "loader": { + "task": "token-classification", + "model_class": "AutoModelForTokenClassification", + "model_type": "roberta" + }, + "eval": { + "task": "token-classification", + "dataset": { + "samples": 1000, + "path": "~/.cache/winml/eval_datasets/build_indonlu_posp", + "columns_mapping": { + "label_column": "pos_tags" + }, + "build_script": "scripts/e2e_eval/datasets/build_indonlu_posp.py" + } + } +} diff --git a/examples/recipes/w11wo_indonesian-roberta-base-posp-tagger/token-classification_w8a8_config.json b/examples/recipes/w11wo_indonesian-roberta-base-posp-tagger/token-classification_w8a8_config.json new file mode 100644 index 000000000..873c29dbf --- /dev/null +++ b/examples/recipes/w11wo_indonesian-roberta-base-posp-tagger/token-classification_w8a8_config.json @@ -0,0 +1,81 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 50265 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint8", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "token-classification", + "model_name": "w11wo/indonesian-roberta-base-posp-tagger" + }, + "loader": { + "task": "token-classification", + "model_class": "AutoModelForTokenClassification", + "model_type": "roberta" + }, + "eval": { + "task": "token-classification", + "dataset": { + "samples": 1000, + "path": "~/.cache/winml/eval_datasets/build_indonlu_posp", + "columns_mapping": { + "label_column": "pos_tags" + }, + "build_script": "scripts/e2e_eval/datasets/build_indonlu_posp.py" + } + } +} diff --git a/examples/release-builtin-models.md b/examples/release-builtin-models.md new file mode 100644 index 000000000..cfe517aab --- /dev/null +++ b/examples/release-builtin-models.md @@ -0,0 +1,127 @@ +# Release Built-in Models & Recipes + +This runbook is the single entry point for two recurring release tasks: + +1. **Refresh `examples/recipes/README.md`** (Built-in Model count + Models table) on the current working branch and push. +2. **Pick recipe configs** onto a fresh branch forked from `main` and open a PR. + +The two non-trivial pieces of logic (eval-result counting and recipe picking) live in +Python scripts; everything else (`git`, `gh`) is invoked directly from the commands below. + +| Script | Purpose | +|---|---| +| [scripts/rebuild_recipes_readme.py](../scripts/rebuild_recipes_readme.py) | Regenerate the `## Models` section of `examples/recipes/README.md` from eval results. Prose before `## Models` is preserved verbatim. | +| [scripts/pick_builtin_recipes.py](../scripts/pick_builtin_recipes.py) | Copy qualifying recipe configs into `examples/recipes//`. Supports `--dry-run` and `--prune`. Does **not** modify the README. | + +--- + +## Definitions + +### The 10 (EP, device) buckets + +A *bucket* is one folder under `examples///`. The full set is: + +| # | EP | Device | Folder | Result filename pattern | +|---|---|---|---|---| +| 1 | `dml` | gpu | `examples/dml/gpu//` | `_eval_result.json` | +| 2 | `mlas` | cpu | `examples/mlas/cpu//` | `_eval_result.json` | +| 3 | `migraphx` | gpu | `examples/migraphx/gpu//` | `_eval_result.json` | +| 4 | `nv_tensorrt_rtx` | gpu | `examples/nv_tensorrt_rtx/gpu//` | `_eval_result.json` | +| 5 | `openvino` | cpu | `examples/openvino/cpu//` | `_eval_result.json` | +| 6 | `openvino` | gpu | `examples/openvino/gpu//` | `_eval_result.json` | +| 7 | `qnn` | gpu | `examples/qnn/gpu//` | `_eval_result.json` | +| 8 | `openvino` | npu | `examples/openvino/npu//` | `__eval_result.json` | +| 9 | `qnn` | npu | `examples/qnn/npu//` | `__eval_result.json` | +| 10 | `vitisai` | npu | `examples/vitisai/npu//` | `__eval_result.json` | + +`` = `` with the first `/` replaced by `_` (e.g. `microsoft/resnet-50` → `microsoft_resnet-50`). +For NPU `` ∈ {`fp16`, `w8a8`, `w8a16`}. For CPU/GPU EPs there is no precision suffix (EP default precision). + +See also [test_config.md](test_config.md) and [generate_config.md](generate_config.md) for the layout these results come from. + +### Built-in Model criterion + +A `(model, task)` pair is **Built-in** iff its **fp16** eval passes on **every** one of the 10 buckets: + +- For CPU/GPU buckets, "fp16 passes" means `_eval_result.json` exists (these EPs run their default precision, treated as fp16-equivalent for this criterion). +- For NPU buckets, "fp16 passes" means `_fp16_eval_result.json` exists. + +### Recipe picking criterion + +For each Built-in `(model, task)`: + +- **fp16** recipe: always picked (mandatory by definition). +- **w8a8** recipe: picked iff `_w8a8_eval_result.json` exists on **at least one** NPU EP. +- **w8a16** recipe: picked iff `_w8a16_eval_result.json` exists on at least one NPU EP. + +Recipes are sourced from the matching NPU folder; composite models (e.g. CLIP zero-shot) copy every file matching `__config*.json`. + +--- + +## Stage 1 — Refresh `recipes/README.md` (this branch) + +Regenerate the README in-place (preserving prose above `## Models`), commit on the **current** branch, push. + +```powershell +uv run python scripts/rebuild_recipes_readme.py +git add examples/recipes/README.md +git diff --cached --quiet examples/recipes/README.md +if ($LASTEXITCODE -ne 0) { + git commit -m "examples/recipes: refresh built-in model README" + git push origin (git rev-parse --abbrev-ref HEAD) +} else { + Write-Host "README unchanged; nothing to commit." +} +``` + +--- + +## Stage 2 — Pick recipe configs onto a branch off `main` and open a PR + +The picker reads `//.../_*_eval_result.json` from disk to decide +what qualifies, so it must run on a branch that **has** those eval results +(typically your working branch). We then snapshot `examples/recipes/` to a +temp dir, switch to a fresh branch forked from `main`, and replace +`examples/recipes/` wholesale — so additions **and deletions** relative to +`main` both land in the PR. + +```powershell +# 0. Run on the branch that contains the eval results. Tree must be clean. +git status --porcelain # must be empty + +# 1. Pick recipes into examples/recipes/. +uv run python scripts/pick_builtin_recipes.py --prune + +# 2. Snapshot the final examples/recipes/ contents to a temp dir. +$tmp = New-Item -ItemType Directory -Path (Join-Path $env:TEMP ("builtin-recipes-" + [guid]::NewGuid().Guid)) +Copy-Item -Recurse examples/recipes/* $tmp + +# 3. Restore working branch (drop the picker's changes). +git checkout -- examples/recipes +git clean -fd examples/recipes + +# 4. Fork from origin/main. +git fetch origin main +$branch = "shzhen/update-builtin-recipes-" + (Get-Date -Format yyyyMMdd) +git switch --create $branch origin/main + +# 5. Replace examples/recipes/ wholesale with the snapshot. +Get-ChildItem examples/recipes -Exclude README.md | Remove-Item -Recurse -Force +Copy-Item -Recurse $tmp/* examples/recipes/ -Force +Remove-Item -Recurse -Force $tmp + +# 6. Rebuild the README so the Models table matches the picked configs. +uv run python scripts/rebuild_recipes_readme.py + +# 7. Commit, push, open PR. +git add examples/recipes +git commit -m "examples/recipes: refresh built-in model recipes" +git push --set-upstream origin $branch +gh pr create --base main --head $branch ` + --title "Refresh built-in model recipes" ` + --body "Auto-generated by ``scripts/pick_builtin_recipes.py --prune``." +``` + +Step 5 keeps `examples/recipes/README.md` from `main` in place; step 6 then overwrites +its `## Models` section. If you need to recover from an interrupted run, the snapshot +lives at the `$tmp` path printed by `Copy-Item`. diff --git a/scripts/pick_builtin_recipes.py b/scripts/pick_builtin_recipes.py new file mode 100644 index 000000000..c19aa547f --- /dev/null +++ b/scripts/pick_builtin_recipes.py @@ -0,0 +1,185 @@ +"""Pick recipes for Built-in Models into examples/recipes/. + +A `(slug, task)` pair is **Built-in** iff fp16 eval passes on every one of +the 10 (EP, device) buckets — see `rebuild_recipes_readme.discover_builtin_pairs`. + +For each Built-in pair this script copies recipe config files into +`examples/recipes//`: + + - `_fp16_config*.json` — always picked (sourced from an NPU bucket + whose fp16 eval passed, or a CPU/GPU bucket as a defensive fallback). + - `_w8a8_config*.json` — picked iff w8a8 eval passes on **at least + one** NPU EP (sourced from that EP). + - `_w8a16_config*.json` — picked iff w8a16 eval passes on at least + one NPU EP (sourced from that EP). + +Composite tasks (e.g. CLIP zero-shot-image-classification) produce multiple +config files matching `__config*.json`; all matching files +are copied. + +`examples/recipes/README.md` is **not** modified here — run +`scripts/rebuild_recipes_readme.py` separately. + +Run with `--dry-run` to preview without writing. +""" +from __future__ import annotations + +import argparse +import shutil +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parent)) +from rebuild_recipes_readme import ( # noqa: E402 + EPS_ALL, + EVAL_SUFFIX, + NPU_EPS, + discover_builtin_pairs, +) + + +REPO = Path(__file__).resolve().parents[1] +EX = REPO / "examples" +RECIPES = EX / "recipes" + +QUANT_PRECISIONS = ("w8a8", "w8a16") + + +def find_source_dir(slug: str, task: str, precision: str) -> Path | None: + """Return the bucket whose `__eval_result.json` exists. + + NPU EPs are preferred (they carry precision in the filename). fp16 falls + back to any CPU/GPU bucket as a defensive measure. + """ + for ep, hw in NPU_EPS: + d = EX / ep / hw / slug + if (d / f"{task}_{precision}{EVAL_SUFFIX}").exists(): + return d + if precision == "fp16": + for ep, hw in EPS_ALL: + if (ep, hw) in NPU_EPS: + continue + d = EX / ep / hw / slug + if ( + (d / f"{task}{EVAL_SUFFIX}").exists() + or (d / f"{task}_fp16{EVAL_SUFFIX}").exists() + ): + return d + return None + + +def source_config_files(src_dir: Path, task: str, precision: str) -> list[Path]: + """Return matching `__config*.json` (NPU) or + `_config*.json` (CPU/GPU) files in src_dir.""" + matches = sorted(src_dir.glob(f"{task}_{precision}_config*.json")) + if matches: + return matches + return sorted(src_dir.glob(f"{task}_config*.json")) + + +def recipe_target_name(src_name: str, task: str, precision: str) -> str: + """Map a source config filename to its recipe filename. + + Examples: + `image-classification_config.json` -> `image-classification_fp16_config.json` + `image-classification_fp16_config.json` -> `image-classification_fp16_config.json` + `zero-shot-image-classification_config_text-encoder.json` + -> `zero-shot-image-classification_fp16_config_text-encoder.json` + """ + npu_prefix = f"{task}_{precision}_config" + if src_name.startswith(npu_prefix): + return src_name + cpu_prefix = f"{task}_config" + assert src_name.startswith(cpu_prefix), src_name + suffix = src_name[len(cpu_prefix):] + return f"{npu_prefix}{suffix}" + + +def copy_recipe( + slug: str, + task: str, + precision: str, + src_dir: Path, + dry_run: bool, +) -> list[str]: + sources = source_config_files(src_dir, task, precision) + if not sources: + return [] + dest_dir = RECIPES / slug + if not dry_run: + dest_dir.mkdir(parents=True, exist_ok=True) + written: list[str] = [] + for src in sources: + dest_name = recipe_target_name(src.name, task, precision) + dest = dest_dir / dest_name + if not dry_run: + shutil.copy2(src, dest) + written.append(dest_name) + return written + + +def clean_existing_recipes(slugs_to_keep: set[str], dry_run: bool) -> list[str]: + removed: list[str] = [] + for child in sorted(RECIPES.iterdir()): + if not child.is_dir(): + continue + if child.name not in slugs_to_keep: + if not dry_run: + shutil.rmtree(child) + removed.append(child.name) + return removed + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--dry-run", action="store_true") + parser.add_argument( + "--prune", + action="store_true", + help="Remove existing recipe folders that are no longer Built-in.", + ) + args = parser.parse_args() + + pairs = discover_builtin_pairs() + print(f"Built-in (slug, task) pairs: {len(pairs)}") + + slug_set: set[str] = set() + total_files = 0 + for slug, task in pairs: + slug_set.add(slug) + fp16_src = find_source_dir(slug, task, "fp16") + if fp16_src is None: + print(f" ! SKIP {slug} | {task} (no fp16 source found)") + continue + files = copy_recipe(slug, task, "fp16", fp16_src, args.dry_run) + total_files += len(files) + print( + f" + {slug} {task} fp16 <- " + f"{fp16_src.relative_to(REPO)} ({len(files)} files)" + ) + for precision in QUANT_PRECISIONS: + qsrc = find_source_dir(slug, task, precision) + if qsrc is None: + continue + qfiles = copy_recipe(slug, task, precision, qsrc, args.dry_run) + total_files += len(qfiles) + print( + f" + {slug} {task} {precision} <- " + f"{qsrc.relative_to(REPO)} ({len(qfiles)} files)" + ) + + if args.prune: + removed = clean_existing_recipes(slug_set, args.dry_run) + for slug in removed: + print(f" - removed recipes/{slug}") + + print( + f"\nWrote {total_files} recipe file(s) across {len(slug_set)} model folder(s)." + ) + if args.dry_run: + print("(dry-run: no files were modified)") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/rebuild_recipes_readme.py b/scripts/rebuild_recipes_readme.py new file mode 100644 index 000000000..93ccba7a0 --- /dev/null +++ b/scripts/rebuild_recipes_readme.py @@ -0,0 +1,131 @@ +"""Regenerate `examples/recipes/README.md` by walking `examples///`. + +Discovery is **filesystem-driven**: no static model/task list is consulted. + +For every `examples////` folder we scan for fp16-passing +eval results: + + - NPU buckets: `_fp16_eval_result.json` + - CPU/GPU buckets: `_eval_result.json` (EP default precision) or + `_fp16_eval_result.json` + +A `(slug, task)` pair is **Built-in** iff every one of the 10 (EP, device) +buckets contains a passing fp16 eval result. + +The README's prose (everything before `## Models`) is preserved verbatim; +only the Models table and a `Total` line are rewritten. +""" +from __future__ import annotations + +from pathlib import Path + + +REPO = Path(__file__).resolve().parents[1] +EX = REPO / "examples" +README = EX / "recipes" / "README.md" +MARKER = "## Models" + +EPS_ALL: list[tuple[str, str]] = [ + ("dml", "gpu"), + ("mlas", "cpu"), + ("migraphx", "gpu"), + ("nv_tensorrt_rtx", "gpu"), + ("openvino", "cpu"), + ("openvino", "gpu"), + ("openvino", "npu"), + ("qnn", "gpu"), + ("qnn", "npu"), + ("vitisai", "npu"), +] +NPU_EPS: list[tuple[str, str]] = [("openvino", "npu"), ("qnn", "npu"), ("vitisai", "npu")] +EVAL_SUFFIX = "_eval_result.json" + + +def slug_to_hf_id(slug: str) -> str: + """Convert a folder slug back to a HuggingFace id. + + The slug is `_` where the first `_` replaces the owner/name + slash. HF owners do not contain `_` (they use `-`), so splitting on the + first `_` is unambiguous. + """ + return slug.replace("_", "/", 1) + + +def fp16_tasks_in_bucket(bucket_dir: Path, hw: str) -> set[tuple[str, str]]: + """Return {(slug, task)} that have a passing fp16 eval in this bucket.""" + out: set[tuple[str, str]] = set() + if not bucket_dir.is_dir(): + return out + for slug_dir in bucket_dir.iterdir(): + if not slug_dir.is_dir(): + continue + for ev in slug_dir.glob(f"*{EVAL_SUFFIX}"): + stem = ev.name[: -len(EVAL_SUFFIX)] # strip trailing "_eval_result.json" + if hw == "npu": + # NPU layout: "_"; only fp16 counts here. + if "_" not in stem: + continue + task, precision = stem.rsplit("_", 1) + if precision == "fp16": + out.add((slug_dir.name, task)) + else: + # CPU/GPU layout: "" (default precision) or "_fp16". + if stem.endswith("_fp16"): + task = stem[: -len("_fp16")] + else: + task = stem + out.add((slug_dir.name, task)) + return out + + +def npu_quant_passes(slug: str, task: str, precision: str) -> bool: + """True iff `__eval_result.json` exists on any NPU EP.""" + for ep, hw in NPU_EPS: + if (EX / ep / hw / slug / f"{task}_{precision}{EVAL_SUFFIX}").exists(): + return True + return False + + +def discover_builtin_pairs() -> list[tuple[str, str]]: + """Walk examples/ and return Built-in (slug, task) pairs.""" + per_bucket = { + (ep, hw): fp16_tasks_in_bucket(EX / ep / hw, hw) for ep, hw in EPS_ALL + } + all_pairs = set().union(*per_bucket.values()) + builtin = [ + pair for pair in sorted(all_pairs) + if all(pair in per_bucket[k] for k in per_bucket) + ] + return builtin + + +def render_models_section(pairs: list[tuple[str, str]]) -> str: + rows = "\n".join(f"| {slug_to_hf_id(slug)} | {task} |" for slug, task in pairs) + return ( + f"{MARKER}\n" + f"\n" + f"Total: **{len(pairs)}** (model, task) tuples that pass fp16 eval on " + f"all {len(EPS_ALL)} (EP, device) buckets.\n" + f"\n" + f"| Model | Task |\n" + f"|---|---|\n" + f"{rows}\n" + ) + + +def main() -> int: + pairs = discover_builtin_pairs() + text = README.read_text(encoding="utf-8") + idx = text.find(MARKER) + if idx == -1: + new = text.rstrip() + "\n\n" + render_models_section(pairs) + else: + new = text[:idx] + render_models_section(pairs) + README.write_text(new, encoding="utf-8") + print(f"Wrote {README}") + print(f" Built-in (slug, task) tuples: {len(pairs)}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) From 8997c4b70b3cf1c8ef2c37cf04b839cb715e32aa Mon Sep 17 00:00:00 2001 From: "Shiyi Zheng (from Dev Box)" Date: Mon, 22 Jun 2026 11:56:15 +0800 Subject: [PATCH 2/3] Remove tooling files; keep only recipes content --- examples/release-builtin-models.md | 127 -------------------- scripts/pick_builtin_recipes.py | 185 ----------------------------- scripts/rebuild_recipes_readme.py | 131 -------------------- 3 files changed, 443 deletions(-) delete mode 100644 examples/release-builtin-models.md delete mode 100644 scripts/pick_builtin_recipes.py delete mode 100644 scripts/rebuild_recipes_readme.py diff --git a/examples/release-builtin-models.md b/examples/release-builtin-models.md deleted file mode 100644 index cfe517aab..000000000 --- a/examples/release-builtin-models.md +++ /dev/null @@ -1,127 +0,0 @@ -# Release Built-in Models & Recipes - -This runbook is the single entry point for two recurring release tasks: - -1. **Refresh `examples/recipes/README.md`** (Built-in Model count + Models table) on the current working branch and push. -2. **Pick recipe configs** onto a fresh branch forked from `main` and open a PR. - -The two non-trivial pieces of logic (eval-result counting and recipe picking) live in -Python scripts; everything else (`git`, `gh`) is invoked directly from the commands below. - -| Script | Purpose | -|---|---| -| [scripts/rebuild_recipes_readme.py](../scripts/rebuild_recipes_readme.py) | Regenerate the `## Models` section of `examples/recipes/README.md` from eval results. Prose before `## Models` is preserved verbatim. | -| [scripts/pick_builtin_recipes.py](../scripts/pick_builtin_recipes.py) | Copy qualifying recipe configs into `examples/recipes//`. Supports `--dry-run` and `--prune`. Does **not** modify the README. | - ---- - -## Definitions - -### The 10 (EP, device) buckets - -A *bucket* is one folder under `examples///`. The full set is: - -| # | EP | Device | Folder | Result filename pattern | -|---|---|---|---|---| -| 1 | `dml` | gpu | `examples/dml/gpu//` | `_eval_result.json` | -| 2 | `mlas` | cpu | `examples/mlas/cpu//` | `_eval_result.json` | -| 3 | `migraphx` | gpu | `examples/migraphx/gpu//` | `_eval_result.json` | -| 4 | `nv_tensorrt_rtx` | gpu | `examples/nv_tensorrt_rtx/gpu//` | `_eval_result.json` | -| 5 | `openvino` | cpu | `examples/openvino/cpu//` | `_eval_result.json` | -| 6 | `openvino` | gpu | `examples/openvino/gpu//` | `_eval_result.json` | -| 7 | `qnn` | gpu | `examples/qnn/gpu//` | `_eval_result.json` | -| 8 | `openvino` | npu | `examples/openvino/npu//` | `__eval_result.json` | -| 9 | `qnn` | npu | `examples/qnn/npu//` | `__eval_result.json` | -| 10 | `vitisai` | npu | `examples/vitisai/npu//` | `__eval_result.json` | - -`` = `` with the first `/` replaced by `_` (e.g. `microsoft/resnet-50` → `microsoft_resnet-50`). -For NPU `` ∈ {`fp16`, `w8a8`, `w8a16`}. For CPU/GPU EPs there is no precision suffix (EP default precision). - -See also [test_config.md](test_config.md) and [generate_config.md](generate_config.md) for the layout these results come from. - -### Built-in Model criterion - -A `(model, task)` pair is **Built-in** iff its **fp16** eval passes on **every** one of the 10 buckets: - -- For CPU/GPU buckets, "fp16 passes" means `_eval_result.json` exists (these EPs run their default precision, treated as fp16-equivalent for this criterion). -- For NPU buckets, "fp16 passes" means `_fp16_eval_result.json` exists. - -### Recipe picking criterion - -For each Built-in `(model, task)`: - -- **fp16** recipe: always picked (mandatory by definition). -- **w8a8** recipe: picked iff `_w8a8_eval_result.json` exists on **at least one** NPU EP. -- **w8a16** recipe: picked iff `_w8a16_eval_result.json` exists on at least one NPU EP. - -Recipes are sourced from the matching NPU folder; composite models (e.g. CLIP zero-shot) copy every file matching `__config*.json`. - ---- - -## Stage 1 — Refresh `recipes/README.md` (this branch) - -Regenerate the README in-place (preserving prose above `## Models`), commit on the **current** branch, push. - -```powershell -uv run python scripts/rebuild_recipes_readme.py -git add examples/recipes/README.md -git diff --cached --quiet examples/recipes/README.md -if ($LASTEXITCODE -ne 0) { - git commit -m "examples/recipes: refresh built-in model README" - git push origin (git rev-parse --abbrev-ref HEAD) -} else { - Write-Host "README unchanged; nothing to commit." -} -``` - ---- - -## Stage 2 — Pick recipe configs onto a branch off `main` and open a PR - -The picker reads `//.../_*_eval_result.json` from disk to decide -what qualifies, so it must run on a branch that **has** those eval results -(typically your working branch). We then snapshot `examples/recipes/` to a -temp dir, switch to a fresh branch forked from `main`, and replace -`examples/recipes/` wholesale — so additions **and deletions** relative to -`main` both land in the PR. - -```powershell -# 0. Run on the branch that contains the eval results. Tree must be clean. -git status --porcelain # must be empty - -# 1. Pick recipes into examples/recipes/. -uv run python scripts/pick_builtin_recipes.py --prune - -# 2. Snapshot the final examples/recipes/ contents to a temp dir. -$tmp = New-Item -ItemType Directory -Path (Join-Path $env:TEMP ("builtin-recipes-" + [guid]::NewGuid().Guid)) -Copy-Item -Recurse examples/recipes/* $tmp - -# 3. Restore working branch (drop the picker's changes). -git checkout -- examples/recipes -git clean -fd examples/recipes - -# 4. Fork from origin/main. -git fetch origin main -$branch = "shzhen/update-builtin-recipes-" + (Get-Date -Format yyyyMMdd) -git switch --create $branch origin/main - -# 5. Replace examples/recipes/ wholesale with the snapshot. -Get-ChildItem examples/recipes -Exclude README.md | Remove-Item -Recurse -Force -Copy-Item -Recurse $tmp/* examples/recipes/ -Force -Remove-Item -Recurse -Force $tmp - -# 6. Rebuild the README so the Models table matches the picked configs. -uv run python scripts/rebuild_recipes_readme.py - -# 7. Commit, push, open PR. -git add examples/recipes -git commit -m "examples/recipes: refresh built-in model recipes" -git push --set-upstream origin $branch -gh pr create --base main --head $branch ` - --title "Refresh built-in model recipes" ` - --body "Auto-generated by ``scripts/pick_builtin_recipes.py --prune``." -``` - -Step 5 keeps `examples/recipes/README.md` from `main` in place; step 6 then overwrites -its `## Models` section. If you need to recover from an interrupted run, the snapshot -lives at the `$tmp` path printed by `Copy-Item`. diff --git a/scripts/pick_builtin_recipes.py b/scripts/pick_builtin_recipes.py deleted file mode 100644 index c19aa547f..000000000 --- a/scripts/pick_builtin_recipes.py +++ /dev/null @@ -1,185 +0,0 @@ -"""Pick recipes for Built-in Models into examples/recipes/. - -A `(slug, task)` pair is **Built-in** iff fp16 eval passes on every one of -the 10 (EP, device) buckets — see `rebuild_recipes_readme.discover_builtin_pairs`. - -For each Built-in pair this script copies recipe config files into -`examples/recipes//`: - - - `_fp16_config*.json` — always picked (sourced from an NPU bucket - whose fp16 eval passed, or a CPU/GPU bucket as a defensive fallback). - - `_w8a8_config*.json` — picked iff w8a8 eval passes on **at least - one** NPU EP (sourced from that EP). - - `_w8a16_config*.json` — picked iff w8a16 eval passes on at least - one NPU EP (sourced from that EP). - -Composite tasks (e.g. CLIP zero-shot-image-classification) produce multiple -config files matching `__config*.json`; all matching files -are copied. - -`examples/recipes/README.md` is **not** modified here — run -`scripts/rebuild_recipes_readme.py` separately. - -Run with `--dry-run` to preview without writing. -""" -from __future__ import annotations - -import argparse -import shutil -import sys -from pathlib import Path - -sys.path.insert(0, str(Path(__file__).resolve().parent)) -from rebuild_recipes_readme import ( # noqa: E402 - EPS_ALL, - EVAL_SUFFIX, - NPU_EPS, - discover_builtin_pairs, -) - - -REPO = Path(__file__).resolve().parents[1] -EX = REPO / "examples" -RECIPES = EX / "recipes" - -QUANT_PRECISIONS = ("w8a8", "w8a16") - - -def find_source_dir(slug: str, task: str, precision: str) -> Path | None: - """Return the bucket whose `__eval_result.json` exists. - - NPU EPs are preferred (they carry precision in the filename). fp16 falls - back to any CPU/GPU bucket as a defensive measure. - """ - for ep, hw in NPU_EPS: - d = EX / ep / hw / slug - if (d / f"{task}_{precision}{EVAL_SUFFIX}").exists(): - return d - if precision == "fp16": - for ep, hw in EPS_ALL: - if (ep, hw) in NPU_EPS: - continue - d = EX / ep / hw / slug - if ( - (d / f"{task}{EVAL_SUFFIX}").exists() - or (d / f"{task}_fp16{EVAL_SUFFIX}").exists() - ): - return d - return None - - -def source_config_files(src_dir: Path, task: str, precision: str) -> list[Path]: - """Return matching `__config*.json` (NPU) or - `_config*.json` (CPU/GPU) files in src_dir.""" - matches = sorted(src_dir.glob(f"{task}_{precision}_config*.json")) - if matches: - return matches - return sorted(src_dir.glob(f"{task}_config*.json")) - - -def recipe_target_name(src_name: str, task: str, precision: str) -> str: - """Map a source config filename to its recipe filename. - - Examples: - `image-classification_config.json` -> `image-classification_fp16_config.json` - `image-classification_fp16_config.json` -> `image-classification_fp16_config.json` - `zero-shot-image-classification_config_text-encoder.json` - -> `zero-shot-image-classification_fp16_config_text-encoder.json` - """ - npu_prefix = f"{task}_{precision}_config" - if src_name.startswith(npu_prefix): - return src_name - cpu_prefix = f"{task}_config" - assert src_name.startswith(cpu_prefix), src_name - suffix = src_name[len(cpu_prefix):] - return f"{npu_prefix}{suffix}" - - -def copy_recipe( - slug: str, - task: str, - precision: str, - src_dir: Path, - dry_run: bool, -) -> list[str]: - sources = source_config_files(src_dir, task, precision) - if not sources: - return [] - dest_dir = RECIPES / slug - if not dry_run: - dest_dir.mkdir(parents=True, exist_ok=True) - written: list[str] = [] - for src in sources: - dest_name = recipe_target_name(src.name, task, precision) - dest = dest_dir / dest_name - if not dry_run: - shutil.copy2(src, dest) - written.append(dest_name) - return written - - -def clean_existing_recipes(slugs_to_keep: set[str], dry_run: bool) -> list[str]: - removed: list[str] = [] - for child in sorted(RECIPES.iterdir()): - if not child.is_dir(): - continue - if child.name not in slugs_to_keep: - if not dry_run: - shutil.rmtree(child) - removed.append(child.name) - return removed - - -def main() -> int: - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument("--dry-run", action="store_true") - parser.add_argument( - "--prune", - action="store_true", - help="Remove existing recipe folders that are no longer Built-in.", - ) - args = parser.parse_args() - - pairs = discover_builtin_pairs() - print(f"Built-in (slug, task) pairs: {len(pairs)}") - - slug_set: set[str] = set() - total_files = 0 - for slug, task in pairs: - slug_set.add(slug) - fp16_src = find_source_dir(slug, task, "fp16") - if fp16_src is None: - print(f" ! SKIP {slug} | {task} (no fp16 source found)") - continue - files = copy_recipe(slug, task, "fp16", fp16_src, args.dry_run) - total_files += len(files) - print( - f" + {slug} {task} fp16 <- " - f"{fp16_src.relative_to(REPO)} ({len(files)} files)" - ) - for precision in QUANT_PRECISIONS: - qsrc = find_source_dir(slug, task, precision) - if qsrc is None: - continue - qfiles = copy_recipe(slug, task, precision, qsrc, args.dry_run) - total_files += len(qfiles) - print( - f" + {slug} {task} {precision} <- " - f"{qsrc.relative_to(REPO)} ({len(qfiles)} files)" - ) - - if args.prune: - removed = clean_existing_recipes(slug_set, args.dry_run) - for slug in removed: - print(f" - removed recipes/{slug}") - - print( - f"\nWrote {total_files} recipe file(s) across {len(slug_set)} model folder(s)." - ) - if args.dry_run: - print("(dry-run: no files were modified)") - return 0 - - -if __name__ == "__main__": - raise SystemExit(main()) diff --git a/scripts/rebuild_recipes_readme.py b/scripts/rebuild_recipes_readme.py deleted file mode 100644 index 93ccba7a0..000000000 --- a/scripts/rebuild_recipes_readme.py +++ /dev/null @@ -1,131 +0,0 @@ -"""Regenerate `examples/recipes/README.md` by walking `examples///`. - -Discovery is **filesystem-driven**: no static model/task list is consulted. - -For every `examples////` folder we scan for fp16-passing -eval results: - - - NPU buckets: `_fp16_eval_result.json` - - CPU/GPU buckets: `_eval_result.json` (EP default precision) or - `_fp16_eval_result.json` - -A `(slug, task)` pair is **Built-in** iff every one of the 10 (EP, device) -buckets contains a passing fp16 eval result. - -The README's prose (everything before `## Models`) is preserved verbatim; -only the Models table and a `Total` line are rewritten. -""" -from __future__ import annotations - -from pathlib import Path - - -REPO = Path(__file__).resolve().parents[1] -EX = REPO / "examples" -README = EX / "recipes" / "README.md" -MARKER = "## Models" - -EPS_ALL: list[tuple[str, str]] = [ - ("dml", "gpu"), - ("mlas", "cpu"), - ("migraphx", "gpu"), - ("nv_tensorrt_rtx", "gpu"), - ("openvino", "cpu"), - ("openvino", "gpu"), - ("openvino", "npu"), - ("qnn", "gpu"), - ("qnn", "npu"), - ("vitisai", "npu"), -] -NPU_EPS: list[tuple[str, str]] = [("openvino", "npu"), ("qnn", "npu"), ("vitisai", "npu")] -EVAL_SUFFIX = "_eval_result.json" - - -def slug_to_hf_id(slug: str) -> str: - """Convert a folder slug back to a HuggingFace id. - - The slug is `_` where the first `_` replaces the owner/name - slash. HF owners do not contain `_` (they use `-`), so splitting on the - first `_` is unambiguous. - """ - return slug.replace("_", "/", 1) - - -def fp16_tasks_in_bucket(bucket_dir: Path, hw: str) -> set[tuple[str, str]]: - """Return {(slug, task)} that have a passing fp16 eval in this bucket.""" - out: set[tuple[str, str]] = set() - if not bucket_dir.is_dir(): - return out - for slug_dir in bucket_dir.iterdir(): - if not slug_dir.is_dir(): - continue - for ev in slug_dir.glob(f"*{EVAL_SUFFIX}"): - stem = ev.name[: -len(EVAL_SUFFIX)] # strip trailing "_eval_result.json" - if hw == "npu": - # NPU layout: "_"; only fp16 counts here. - if "_" not in stem: - continue - task, precision = stem.rsplit("_", 1) - if precision == "fp16": - out.add((slug_dir.name, task)) - else: - # CPU/GPU layout: "" (default precision) or "_fp16". - if stem.endswith("_fp16"): - task = stem[: -len("_fp16")] - else: - task = stem - out.add((slug_dir.name, task)) - return out - - -def npu_quant_passes(slug: str, task: str, precision: str) -> bool: - """True iff `__eval_result.json` exists on any NPU EP.""" - for ep, hw in NPU_EPS: - if (EX / ep / hw / slug / f"{task}_{precision}{EVAL_SUFFIX}").exists(): - return True - return False - - -def discover_builtin_pairs() -> list[tuple[str, str]]: - """Walk examples/ and return Built-in (slug, task) pairs.""" - per_bucket = { - (ep, hw): fp16_tasks_in_bucket(EX / ep / hw, hw) for ep, hw in EPS_ALL - } - all_pairs = set().union(*per_bucket.values()) - builtin = [ - pair for pair in sorted(all_pairs) - if all(pair in per_bucket[k] for k in per_bucket) - ] - return builtin - - -def render_models_section(pairs: list[tuple[str, str]]) -> str: - rows = "\n".join(f"| {slug_to_hf_id(slug)} | {task} |" for slug, task in pairs) - return ( - f"{MARKER}\n" - f"\n" - f"Total: **{len(pairs)}** (model, task) tuples that pass fp16 eval on " - f"all {len(EPS_ALL)} (EP, device) buckets.\n" - f"\n" - f"| Model | Task |\n" - f"|---|---|\n" - f"{rows}\n" - ) - - -def main() -> int: - pairs = discover_builtin_pairs() - text = README.read_text(encoding="utf-8") - idx = text.find(MARKER) - if idx == -1: - new = text.rstrip() + "\n\n" + render_models_section(pairs) - else: - new = text[:idx] + render_models_section(pairs) - README.write_text(new, encoding="utf-8") - print(f"Wrote {README}") - print(f" Built-in (slug, task) tuples: {len(pairs)}") - return 0 - - -if __name__ == "__main__": - raise SystemExit(main()) From 71d879218675ae3f36730ec0669ad953e15745d3 Mon Sep 17 00:00:00 2001 From: "Shiyi Zheng (from Dev Box)" Date: Mon, 22 Jun 2026 12:25:05 +0800 Subject: [PATCH 3/3] examples/recipes: tighten w8a8/w8a16 to require pass on all 3 NPU EPs --- .../feature-extraction_w8a16_config.json | 96 ----------------- .../feature-extraction_w8a8_config.json | 96 ----------------- .../sentence-similarity_w8a16_config.json | 96 ----------------- .../sentence-similarity_w8a8_config.json | 96 ----------------- .../sentence-similarity_w8a8_config.json | 95 ----------------- .../feature-extraction_w8a8_config.json | 84 --------------- .../sentence-similarity_w8a8_config.json | 84 --------------- .../feature-extraction_w8a16_config.json | 96 ----------------- .../feature-extraction_w8a8_config.json | 96 ----------------- .../sentence-similarity_w8a16_config.json | 95 ----------------- .../sentence-similarity_w8a8_config.json | 95 ----------------- .../token-classification_w8a16_config.json | 93 ---------------- .../token-classification_w8a8_config.json | 93 ---------------- .../fill-mask_w8a8_config.json | 82 -------------- .../fill-mask_w8a8_config.json | 83 --------------- .../fill-mask_w8a8_config.json | 82 -------------- .../feature-extraction_w8a16_config.json | 96 ----------------- .../feature-extraction_w8a8_config.json | 96 ----------------- .../text-classification_w8a16_config.json | 95 ----------------- .../text-classification_w8a8_config.json | 95 ----------------- .../token-classification_w8a8_config.json | 80 -------------- .../text-classification_w8a16_config.json | 91 ---------------- .../text-classification_w8a8_config.json | 91 ---------------- examples/recipes/README.md | 2 +- .../image-feature-extraction_w8a8_config.json | 67 ------------ .../question-answering_w8a8_config.json | 98 ----------------- .../image-classification_w8a8_config.json | 67 ------------ .../text-classification_w8a8_config.json | 81 -------------- .../token-classification_w8a8_config.json | 93 ---------------- .../question-answering_w8a8_config.json | 99 ----------------- .../question-answering_w8a8_config.json | 87 --------------- .../question-answering_w8a8_config.json | 87 --------------- .../image-classification_w8a8_config.json | 70 ------------ .../question-answering_w8a8_config.json | 86 --------------- .../question-answering_w8a8_config.json | 86 --------------- .../text-classification_w8a8_config.json | 80 -------------- .../fill-mask_w8a8_config.json | 81 -------------- .../token-classification_w8a16_config.json | 93 ---------------- .../token-classification_w8a8_config.json | 93 ---------------- .../image-classification_w8a16_config.json | 67 ------------ .../image-classification_w8a8_config.json | 67 ------------ .../image-feature-extraction_w8a8_config.json | 66 ------------ .../image-feature-extraction_w8a8_config.json | 66 ------------ .../image-feature-extraction_w8a8_config.json | 66 ------------ .../image-feature-extraction_w8a8_config.json | 66 ------------ .../image-feature-extraction_w8a8_config.json | 66 ------------ .../feature-extraction_w8a16_config.json | 96 ----------------- .../feature-extraction_w8a8_config.json | 96 ----------------- .../fill-mask_w8a8_config.json | 95 ----------------- .../fill-mask_w8a8_config.json | 94 ---------------- .../fill-mask_w8a8_config.json | 94 ---------------- .../question-answering_w8a8_config.json | 100 ------------------ .../image-feature-extraction_w8a8_config.json | 66 ------------ .../image-classification_w8a8_config.json | 66 ------------ .../zero-shot-classification_w8a8_config.json | 85 --------------- .../feature-extraction_w8a8_config.json | 90 ---------------- .../image-segmentation_w8a8_config.json | 70 ------------ .../image-feature-extraction_w8a8_config.json | 66 ------------ .../image-classification_w8a8_config.json | 67 ------------ .../image-classification_w8a8_config.json | 67 ------------ .../image-classification_w8a8_config.json | 67 ------------ .../image-classification_w8a16_config.json | 67 ------------ .../image-classification_w8a8_config.json | 67 ------------ .../question-answering_w8a8_config.json | 98 ----------------- .../image-segmentation_w8a8_config.json | 71 ------------- .../image-segmentation_w8a8_config.json | 71 ------------- .../image-segmentation_w8a8_config.json | 71 ------------- .../feature-extraction_w8a8_config.json | 89 ---------------- ...sification_w8a16_config_image-encoder.json | 79 -------------- ...ssification_w8a16_config_text-encoder.json | 89 ---------------- ...ssification_w8a8_config_image-encoder.json | 79 -------------- ...assification_w8a8_config_text-encoder.json | 89 ---------------- .../feature-extraction_w8a8_config.json | 89 ---------------- ...ssification_w8a8_config_image-encoder.json | 79 -------------- ...assification_w8a8_config_text-encoder.json | 89 ---------------- ...ssification_w8a8_config_image-encoder.json | 79 -------------- ...assification_w8a8_config_text-encoder.json | 89 ---------------- .../image-classification_w8a8_config.json | 69 ------------ .../feature-extraction_w8a8_config.json | 95 ----------------- .../sentence-similarity_w8a8_config.json | 95 ----------------- .../feature-extraction_w8a8_config.json | 82 -------------- .../sentence-similarity_w8a8_config.json | 82 -------------- .../feature-extraction_w8a8_config.json | 82 -------------- .../sentence-similarity_w8a8_config.json | 82 -------------- .../feature-extraction_w8a16_config.json | 96 ----------------- .../feature-extraction_w8a8_config.json | 96 ----------------- .../sentence-similarity_w8a16_config.json | 95 ----------------- .../sentence-similarity_w8a8_config.json | 95 ----------------- .../sentence-similarity_w8a8_config.json | 83 --------------- .../token-classification_w8a8_config.json | 81 -------------- 90 files changed, 1 insertion(+), 7486 deletions(-) delete mode 100644 examples/recipes/BAAI_bge-base-en-v1.5/feature-extraction_w8a16_config.json delete mode 100644 examples/recipes/BAAI_bge-base-en-v1.5/feature-extraction_w8a8_config.json delete mode 100644 examples/recipes/BAAI_bge-base-en-v1.5/sentence-similarity_w8a16_config.json delete mode 100644 examples/recipes/BAAI_bge-base-en-v1.5/sentence-similarity_w8a8_config.json delete mode 100644 examples/recipes/BAAI_bge-large-en-v1.5/sentence-similarity_w8a8_config.json delete mode 100644 examples/recipes/BAAI_bge-m3/feature-extraction_w8a8_config.json delete mode 100644 examples/recipes/BAAI_bge-m3/sentence-similarity_w8a8_config.json delete mode 100644 examples/recipes/BAAI_bge-small-en-v1.5/feature-extraction_w8a16_config.json delete mode 100644 examples/recipes/BAAI_bge-small-en-v1.5/feature-extraction_w8a8_config.json delete mode 100644 examples/recipes/BAAI_bge-small-en-v1.5/sentence-similarity_w8a16_config.json delete mode 100644 examples/recipes/BAAI_bge-small-en-v1.5/sentence-similarity_w8a8_config.json delete mode 100644 examples/recipes/Babelscape_wikineural-multilingual-ner/token-classification_w8a16_config.json delete mode 100644 examples/recipes/Babelscape_wikineural-multilingual-ner/token-classification_w8a8_config.json delete mode 100644 examples/recipes/FacebookAI_roberta-base/fill-mask_w8a8_config.json delete mode 100644 examples/recipes/FacebookAI_roberta-large/fill-mask_w8a8_config.json delete mode 100644 examples/recipes/FacebookAI_xlm-roberta-base/fill-mask_w8a8_config.json delete mode 100644 examples/recipes/Intel_bert-base-uncased-mrpc/feature-extraction_w8a16_config.json delete mode 100644 examples/recipes/Intel_bert-base-uncased-mrpc/feature-extraction_w8a8_config.json delete mode 100644 examples/recipes/Intel_bert-base-uncased-mrpc/text-classification_w8a16_config.json delete mode 100644 examples/recipes/Intel_bert-base-uncased-mrpc/text-classification_w8a8_config.json delete mode 100644 examples/recipes/Isotonic_distilbert_finetuned_ai4privacy_v2/token-classification_w8a8_config.json delete mode 100644 examples/recipes/ProsusAI_finbert/text-classification_w8a16_config.json delete mode 100644 examples/recipes/ProsusAI_finbert/text-classification_w8a8_config.json delete mode 100644 examples/recipes/StanfordAIMI_dinov2-base-xray-224/image-feature-extraction_w8a8_config.json delete mode 100644 examples/recipes/ahotrod_electra_large_discriminator_squad2_512/question-answering_w8a8_config.json delete mode 100644 examples/recipes/apple_mobilevit-small/image-classification_w8a8_config.json delete mode 100644 examples/recipes/cardiffnlp_twitter-roberta-base-sentiment-latest/text-classification_w8a8_config.json delete mode 100644 examples/recipes/dbmdz_bert-large-cased-finetuned-conll03-english/token-classification_w8a8_config.json delete mode 100644 examples/recipes/deepset_bert-large-uncased-whole-word-masking-squad2/question-answering_w8a8_config.json delete mode 100644 examples/recipes/deepset_roberta-base-squad2/question-answering_w8a8_config.json delete mode 100644 examples/recipes/deepset_tinyroberta-squad2/question-answering_w8a8_config.json delete mode 100644 examples/recipes/dima806_fairface_age_image_detection/image-classification_w8a8_config.json delete mode 100644 examples/recipes/distilbert_distilbert-base-cased-distilled-squad/question-answering_w8a8_config.json delete mode 100644 examples/recipes/distilbert_distilbert-base-uncased-distilled-squad/question-answering_w8a8_config.json delete mode 100644 examples/recipes/distilbert_distilbert-base-uncased-finetuned-sst-2-english/text-classification_w8a8_config.json delete mode 100644 examples/recipes/distilbert_distilbert-base-uncased/fill-mask_w8a8_config.json delete mode 100644 examples/recipes/dslim_bert-base-NER/token-classification_w8a16_config.json delete mode 100644 examples/recipes/dslim_bert-base-NER/token-classification_w8a8_config.json delete mode 100644 examples/recipes/facebook_convnext-tiny-224/image-classification_w8a16_config.json delete mode 100644 examples/recipes/facebook_convnext-tiny-224/image-classification_w8a8_config.json delete mode 100644 examples/recipes/facebook_dino-vitb16/image-feature-extraction_w8a8_config.json delete mode 100644 examples/recipes/facebook_dino-vits16/image-feature-extraction_w8a8_config.json delete mode 100644 examples/recipes/facebook_dinov2-base/image-feature-extraction_w8a8_config.json delete mode 100644 examples/recipes/facebook_dinov2-large/image-feature-extraction_w8a8_config.json delete mode 100644 examples/recipes/facebook_dinov2-small/image-feature-extraction_w8a8_config.json delete mode 100644 examples/recipes/google-bert_bert-base-multilingual-cased/feature-extraction_w8a16_config.json delete mode 100644 examples/recipes/google-bert_bert-base-multilingual-cased/feature-extraction_w8a8_config.json delete mode 100644 examples/recipes/google-bert_bert-base-multilingual-cased/fill-mask_w8a8_config.json delete mode 100644 examples/recipes/google-bert_bert-base-multilingual-uncased/fill-mask_w8a8_config.json delete mode 100644 examples/recipes/google-bert_bert-base-uncased/fill-mask_w8a8_config.json delete mode 100644 examples/recipes/google-bert_bert-large-uncased-whole-word-masking-finetuned-squad/question-answering_w8a8_config.json delete mode 100644 examples/recipes/google_vit-base-patch16-224-in21k/image-feature-extraction_w8a8_config.json delete mode 100644 examples/recipes/google_vit-base-patch16-224/image-classification_w8a8_config.json delete mode 100644 examples/recipes/joeddav_xlm-roberta-large-xnli/zero-shot-classification_w8a8_config.json delete mode 100644 examples/recipes/laion_CLIP-ViT-B-32-laion2B-s34B-b79K/feature-extraction_w8a8_config.json delete mode 100644 examples/recipes/mattmdjaga_segformer_b2_clothes/image-segmentation_w8a8_config.json delete mode 100644 examples/recipes/microsoft_rad-dino/image-feature-extraction_w8a8_config.json delete mode 100644 examples/recipes/microsoft_resnet-18/image-classification_w8a8_config.json delete mode 100644 examples/recipes/microsoft_resnet-50/image-classification_w8a8_config.json delete mode 100644 examples/recipes/microsoft_swin-large-patch4-window7-224/image-classification_w8a8_config.json delete mode 100644 examples/recipes/microsoft_swinv2-tiny-patch4-window16-256/image-classification_w8a16_config.json delete mode 100644 examples/recipes/microsoft_swinv2-tiny-patch4-window16-256/image-classification_w8a8_config.json delete mode 100644 examples/recipes/monologg_koelectra-small-v2-distilled-korquad-384/question-answering_w8a8_config.json delete mode 100644 examples/recipes/nvidia_segformer-b1-finetuned-ade-512-512/image-segmentation_w8a8_config.json delete mode 100644 examples/recipes/nvidia_segformer-b2-finetuned-ade-512-512/image-segmentation_w8a8_config.json delete mode 100644 examples/recipes/nvidia_segformer-b5-finetuned-ade-640-640/image-segmentation_w8a8_config.json delete mode 100644 examples/recipes/openai_clip-vit-base-patch16/feature-extraction_w8a8_config.json delete mode 100644 examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_w8a16_config_image-encoder.json delete mode 100644 examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_w8a16_config_text-encoder.json delete mode 100644 examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_w8a8_config_image-encoder.json delete mode 100644 examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_w8a8_config_text-encoder.json delete mode 100644 examples/recipes/openai_clip-vit-base-patch32/feature-extraction_w8a8_config.json delete mode 100644 examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_w8a8_config_image-encoder.json delete mode 100644 examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_w8a8_config_text-encoder.json delete mode 100644 examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_w8a8_config_image-encoder.json delete mode 100644 examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_w8a8_config_text-encoder.json delete mode 100644 examples/recipes/rizvandwiki_gender-classification/image-classification_w8a8_config.json delete mode 100644 examples/recipes/sentence-transformers_all-MiniLM-L6-v2/feature-extraction_w8a8_config.json delete mode 100644 examples/recipes/sentence-transformers_all-MiniLM-L6-v2/sentence-similarity_w8a8_config.json delete mode 100644 examples/recipes/sentence-transformers_all-mpnet-base-v2/feature-extraction_w8a8_config.json delete mode 100644 examples/recipes/sentence-transformers_all-mpnet-base-v2/sentence-similarity_w8a8_config.json delete mode 100644 examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/feature-extraction_w8a8_config.json delete mode 100644 examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/sentence-similarity_w8a8_config.json delete mode 100644 examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/feature-extraction_w8a16_config.json delete mode 100644 examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/feature-extraction_w8a8_config.json delete mode 100644 examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/sentence-similarity_w8a16_config.json delete mode 100644 examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/sentence-similarity_w8a8_config.json delete mode 100644 examples/recipes/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/sentence-similarity_w8a8_config.json delete mode 100644 examples/recipes/w11wo_indonesian-roberta-base-posp-tagger/token-classification_w8a8_config.json diff --git a/examples/recipes/BAAI_bge-base-en-v1.5/feature-extraction_w8a16_config.json b/examples/recipes/BAAI_bge-base-en-v1.5/feature-extraction_w8a16_config.json deleted file mode 100644 index 0e0b7c083..000000000 --- a/examples/recipes/BAAI_bge-base-en-v1.5/feature-extraction_w8a16_config.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 30522 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "last_hidden_state" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint16", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "feature-extraction", - "model_name": "BAAI/bge-base-en-v1.5" - }, - "compile": null, - "loader": { - "task": "feature-extraction", - "model_class": "AutoModel", - "model_type": "bert" - }, - "eval": { - "task": "feature-extraction", - "dataset": { - "path": "mteb/stsbenchmark-sts", - "split": "test", - "samples": 1000, - "columns_mapping": { - "input_column_1": "sentence1", - "input_column_2": "sentence2", - "score_column": "score" - } - } - } -} diff --git a/examples/recipes/BAAI_bge-base-en-v1.5/feature-extraction_w8a8_config.json b/examples/recipes/BAAI_bge-base-en-v1.5/feature-extraction_w8a8_config.json deleted file mode 100644 index 7665bf642..000000000 --- a/examples/recipes/BAAI_bge-base-en-v1.5/feature-extraction_w8a8_config.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 30522 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "last_hidden_state" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "feature-extraction", - "model_name": "BAAI/bge-base-en-v1.5" - }, - "compile": null, - "loader": { - "task": "feature-extraction", - "model_class": "AutoModel", - "model_type": "bert" - }, - "eval": { - "task": "feature-extraction", - "dataset": { - "path": "mteb/stsbenchmark-sts", - "split": "test", - "samples": 1000, - "columns_mapping": { - "input_column_1": "sentence1", - "input_column_2": "sentence2", - "score_column": "score" - } - } - } -} diff --git a/examples/recipes/BAAI_bge-base-en-v1.5/sentence-similarity_w8a16_config.json b/examples/recipes/BAAI_bge-base-en-v1.5/sentence-similarity_w8a16_config.json deleted file mode 100644 index b75f8cd6d..000000000 --- a/examples/recipes/BAAI_bge-base-en-v1.5/sentence-similarity_w8a16_config.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 30522 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "last_hidden_state" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint16", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "sentence-similarity", - "model_name": "BAAI/bge-base-en-v1.5" - }, - "compile": null, - "loader": { - "task": "sentence-similarity", - "model_class": "AutoModel", - "model_type": "bert" - }, - "eval": { - "task": "sentence-similarity", - "dataset": { - "path": "mteb/stsbenchmark-sts", - "split": "test", - "samples": 1000, - "columns_mapping": { - "input_column_1": "sentence1", - "input_column_2": "sentence2", - "score_column": "score" - } - } - } -} diff --git a/examples/recipes/BAAI_bge-base-en-v1.5/sentence-similarity_w8a8_config.json b/examples/recipes/BAAI_bge-base-en-v1.5/sentence-similarity_w8a8_config.json deleted file mode 100644 index 39c5e60a6..000000000 --- a/examples/recipes/BAAI_bge-base-en-v1.5/sentence-similarity_w8a8_config.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 30522 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "last_hidden_state" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "sentence-similarity", - "model_name": "BAAI/bge-base-en-v1.5" - }, - "compile": null, - "loader": { - "task": "sentence-similarity", - "model_class": "AutoModel", - "model_type": "bert" - }, - "eval": { - "task": "sentence-similarity", - "dataset": { - "path": "mteb/stsbenchmark-sts", - "split": "test", - "samples": 1000, - "columns_mapping": { - "input_column_1": "sentence1", - "input_column_2": "sentence2", - "score_column": "score" - } - } - } -} diff --git a/examples/recipes/BAAI_bge-large-en-v1.5/sentence-similarity_w8a8_config.json b/examples/recipes/BAAI_bge-large-en-v1.5/sentence-similarity_w8a8_config.json deleted file mode 100644 index a1630b546..000000000 --- a/examples/recipes/BAAI_bge-large-en-v1.5/sentence-similarity_w8a8_config.json +++ /dev/null @@ -1,95 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 30522 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "last_hidden_state" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "sentence-similarity", - "model_name": "BAAI/bge-large-en-v1.5" - }, - "loader": { - "task": "sentence-similarity", - "model_class": "AutoModel", - "model_type": "bert" - }, - "eval": { - "task": "sentence-similarity", - "dataset": { - "path": "mteb/stsbenchmark-sts", - "split": "test", - "samples": 1000, - "columns_mapping": { - "input_column_1": "sentence1", - "input_column_2": "sentence2", - "score_column": "score" - } - } - } -} diff --git a/examples/recipes/BAAI_bge-m3/feature-extraction_w8a8_config.json b/examples/recipes/BAAI_bge-m3/feature-extraction_w8a8_config.json deleted file mode 100644 index d3cd88764..000000000 --- a/examples/recipes/BAAI_bge-m3/feature-extraction_w8a8_config.json +++ /dev/null @@ -1,84 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 1024 - ], - "value_range": [ - 0, - 250002 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 1024 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "last_hidden_state" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "feature-extraction", - "model_name": "BAAI/bge-m3" - }, - "compile": null, - "loader": { - "task": "feature-extraction", - "model_class": "AutoModel", - "model_type": "xlm-roberta" - }, - "eval": { - "task": "feature-extraction", - "dataset": { - "path": "mteb/stsbenchmark-sts", - "split": "test", - "samples": 100, - "columns_mapping": { - "input_column_1": "sentence1", - "input_column_2": "sentence2", - "score_column": "score" - } - } - } -} diff --git a/examples/recipes/BAAI_bge-m3/sentence-similarity_w8a8_config.json b/examples/recipes/BAAI_bge-m3/sentence-similarity_w8a8_config.json deleted file mode 100644 index 7e87ee267..000000000 --- a/examples/recipes/BAAI_bge-m3/sentence-similarity_w8a8_config.json +++ /dev/null @@ -1,84 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 1024 - ], - "value_range": [ - 0, - 250002 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 1024 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "last_hidden_state" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "sentence-similarity", - "model_name": "BAAI/bge-m3" - }, - "compile": null, - "loader": { - "task": "sentence-similarity", - "model_class": "AutoModel", - "model_type": "xlm-roberta" - }, - "eval": { - "task": "sentence-similarity", - "dataset": { - "path": "mteb/stsbenchmark-sts", - "split": "test", - "samples": 100, - "columns_mapping": { - "input_column_1": "sentence1", - "input_column_2": "sentence2", - "score_column": "score" - } - } - } -} diff --git a/examples/recipes/BAAI_bge-small-en-v1.5/feature-extraction_w8a16_config.json b/examples/recipes/BAAI_bge-small-en-v1.5/feature-extraction_w8a16_config.json deleted file mode 100644 index c629a6ca6..000000000 --- a/examples/recipes/BAAI_bge-small-en-v1.5/feature-extraction_w8a16_config.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 30522 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "last_hidden_state" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint16", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "feature-extraction", - "model_name": "BAAI/bge-small-en-v1.5" - }, - "compile": null, - "loader": { - "task": "feature-extraction", - "model_class": "AutoModel", - "model_type": "bert" - }, - "eval": { - "task": "feature-extraction", - "dataset": { - "path": "mteb/stsbenchmark-sts", - "split": "test", - "samples": 1000, - "columns_mapping": { - "input_column_1": "sentence1", - "input_column_2": "sentence2", - "score_column": "score" - } - } - } -} diff --git a/examples/recipes/BAAI_bge-small-en-v1.5/feature-extraction_w8a8_config.json b/examples/recipes/BAAI_bge-small-en-v1.5/feature-extraction_w8a8_config.json deleted file mode 100644 index 09510e0b9..000000000 --- a/examples/recipes/BAAI_bge-small-en-v1.5/feature-extraction_w8a8_config.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 30522 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "last_hidden_state" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "feature-extraction", - "model_name": "BAAI/bge-small-en-v1.5" - }, - "compile": null, - "loader": { - "task": "feature-extraction", - "model_class": "AutoModel", - "model_type": "bert" - }, - "eval": { - "task": "feature-extraction", - "dataset": { - "path": "mteb/stsbenchmark-sts", - "split": "test", - "samples": 1000, - "columns_mapping": { - "input_column_1": "sentence1", - "input_column_2": "sentence2", - "score_column": "score" - } - } - } -} diff --git a/examples/recipes/BAAI_bge-small-en-v1.5/sentence-similarity_w8a16_config.json b/examples/recipes/BAAI_bge-small-en-v1.5/sentence-similarity_w8a16_config.json deleted file mode 100644 index f4423ad76..000000000 --- a/examples/recipes/BAAI_bge-small-en-v1.5/sentence-similarity_w8a16_config.json +++ /dev/null @@ -1,95 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 30522 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "last_hidden_state" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint16", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "sentence-similarity", - "model_name": "BAAI/bge-small-en-v1.5" - }, - "loader": { - "task": "sentence-similarity", - "model_class": "AutoModel", - "model_type": "bert" - }, - "eval": { - "task": "sentence-similarity", - "dataset": { - "path": "mteb/stsbenchmark-sts", - "split": "test", - "samples": 1000, - "columns_mapping": { - "input_column_1": "sentence1", - "input_column_2": "sentence2", - "score_column": "score" - } - } - } -} diff --git a/examples/recipes/BAAI_bge-small-en-v1.5/sentence-similarity_w8a8_config.json b/examples/recipes/BAAI_bge-small-en-v1.5/sentence-similarity_w8a8_config.json deleted file mode 100644 index 1ec46532a..000000000 --- a/examples/recipes/BAAI_bge-small-en-v1.5/sentence-similarity_w8a8_config.json +++ /dev/null @@ -1,95 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 30522 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "last_hidden_state" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "sentence-similarity", - "model_name": "BAAI/bge-small-en-v1.5" - }, - "loader": { - "task": "sentence-similarity", - "model_class": "AutoModel", - "model_type": "bert" - }, - "eval": { - "task": "sentence-similarity", - "dataset": { - "path": "mteb/stsbenchmark-sts", - "split": "test", - "samples": 1000, - "columns_mapping": { - "input_column_1": "sentence1", - "input_column_2": "sentence2", - "score_column": "score" - } - } - } -} diff --git a/examples/recipes/Babelscape_wikineural-multilingual-ner/token-classification_w8a16_config.json b/examples/recipes/Babelscape_wikineural-multilingual-ner/token-classification_w8a16_config.json deleted file mode 100644 index 652d9b774..000000000 --- a/examples/recipes/Babelscape_wikineural-multilingual-ner/token-classification_w8a16_config.json +++ /dev/null @@ -1,93 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 119547 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint16", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "token-classification", - "model_name": "Babelscape/wikineural-multilingual-ner" - }, - "compile": null, - "loader": { - "task": "token-classification", - "model_class": "AutoModelForTokenClassification", - "model_type": "bert" - }, - "eval": { - "task": "token-classification", - "dataset": { - "samples": 1000, - "path": "BramVanroy/conll2003", - "columns_mapping": { - "label_column": "ner_tags" - } - } - } -} diff --git a/examples/recipes/Babelscape_wikineural-multilingual-ner/token-classification_w8a8_config.json b/examples/recipes/Babelscape_wikineural-multilingual-ner/token-classification_w8a8_config.json deleted file mode 100644 index 3cf38f64f..000000000 --- a/examples/recipes/Babelscape_wikineural-multilingual-ner/token-classification_w8a8_config.json +++ /dev/null @@ -1,93 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 119547 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "token-classification", - "model_name": "Babelscape/wikineural-multilingual-ner" - }, - "compile": null, - "loader": { - "task": "token-classification", - "model_class": "AutoModelForTokenClassification", - "model_type": "bert" - }, - "eval": { - "task": "token-classification", - "dataset": { - "samples": 1000, - "path": "BramVanroy/conll2003", - "columns_mapping": { - "label_column": "ner_tags" - } - } - } -} diff --git a/examples/recipes/FacebookAI_roberta-base/fill-mask_w8a8_config.json b/examples/recipes/FacebookAI_roberta-base/fill-mask_w8a8_config.json deleted file mode 100644 index 21596e908..000000000 --- a/examples/recipes/FacebookAI_roberta-base/fill-mask_w8a8_config.json +++ /dev/null @@ -1,82 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 50265 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "fill-mask", - "model_name": "FacebookAI/roberta-base" - }, - "loader": { - "task": "fill-mask", - "model_class": "AutoModelForMaskedLM", - "model_type": "roberta" - }, - "eval": { - "task": "fill-mask", - "dataset": { - "path": "Salesforce/wikitext", - "name": "wikitext-2-raw-v1", - "split": "test", - "samples": 100, - "columns_mapping": { - "input_column": "text" - } - } - } -} diff --git a/examples/recipes/FacebookAI_roberta-large/fill-mask_w8a8_config.json b/examples/recipes/FacebookAI_roberta-large/fill-mask_w8a8_config.json deleted file mode 100644 index 179efa9df..000000000 --- a/examples/recipes/FacebookAI_roberta-large/fill-mask_w8a8_config.json +++ /dev/null @@ -1,83 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 50265 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "fill-mask", - "model_name": "FacebookAI/roberta-large" - }, - "compile": null, - "loader": { - "task": "fill-mask", - "model_class": "AutoModelForMaskedLM", - "model_type": "roberta" - }, - "eval": { - "task": "fill-mask", - "dataset": { - "path": "Salesforce/wikitext", - "name": "wikitext-2-raw-v1", - "split": "test", - "samples": 100, - "columns_mapping": { - "input_column": "text" - } - } - } -} diff --git a/examples/recipes/FacebookAI_xlm-roberta-base/fill-mask_w8a8_config.json b/examples/recipes/FacebookAI_xlm-roberta-base/fill-mask_w8a8_config.json deleted file mode 100644 index cd8be5738..000000000 --- a/examples/recipes/FacebookAI_xlm-roberta-base/fill-mask_w8a8_config.json +++ /dev/null @@ -1,82 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 250002 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "fill-mask", - "model_name": "FacebookAI/xlm-roberta-base" - }, - "loader": { - "task": "fill-mask", - "model_class": "AutoModelForMaskedLM", - "model_type": "xlm-roberta" - }, - "eval": { - "task": "fill-mask", - "dataset": { - "path": "Salesforce/wikitext", - "name": "wikitext-2-raw-v1", - "split": "test", - "samples": 100, - "columns_mapping": { - "input_column": "text" - } - } - } -} diff --git a/examples/recipes/Intel_bert-base-uncased-mrpc/feature-extraction_w8a16_config.json b/examples/recipes/Intel_bert-base-uncased-mrpc/feature-extraction_w8a16_config.json deleted file mode 100644 index 077b9581e..000000000 --- a/examples/recipes/Intel_bert-base-uncased-mrpc/feature-extraction_w8a16_config.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 30522 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "last_hidden_state" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint16", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "feature-extraction", - "model_name": "Intel/bert-base-uncased-mrpc" - }, - "compile": null, - "loader": { - "task": "feature-extraction", - "model_class": "AutoModel", - "model_type": "bert" - }, - "eval": { - "task": "feature-extraction", - "dataset": { - "path": "mteb/stsbenchmark-sts", - "split": "test", - "samples": 1000, - "columns_mapping": { - "input_column_1": "sentence1", - "input_column_2": "sentence2", - "score_column": "score" - } - } - } -} diff --git a/examples/recipes/Intel_bert-base-uncased-mrpc/feature-extraction_w8a8_config.json b/examples/recipes/Intel_bert-base-uncased-mrpc/feature-extraction_w8a8_config.json deleted file mode 100644 index 1a1bd128f..000000000 --- a/examples/recipes/Intel_bert-base-uncased-mrpc/feature-extraction_w8a8_config.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 30522 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "last_hidden_state" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "feature-extraction", - "model_name": "Intel/bert-base-uncased-mrpc" - }, - "compile": null, - "loader": { - "task": "feature-extraction", - "model_class": "AutoModel", - "model_type": "bert" - }, - "eval": { - "task": "feature-extraction", - "dataset": { - "path": "mteb/stsbenchmark-sts", - "split": "test", - "samples": 1000, - "columns_mapping": { - "input_column_1": "sentence1", - "input_column_2": "sentence2", - "score_column": "score" - } - } - } -} diff --git a/examples/recipes/Intel_bert-base-uncased-mrpc/text-classification_w8a16_config.json b/examples/recipes/Intel_bert-base-uncased-mrpc/text-classification_w8a16_config.json deleted file mode 100644 index d35a3c499..000000000 --- a/examples/recipes/Intel_bert-base-uncased-mrpc/text-classification_w8a16_config.json +++ /dev/null @@ -1,95 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 30522 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint16", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "text-classification", - "model_name": "Intel/bert-base-uncased-mrpc" - }, - "compile": null, - "loader": { - "task": "text-classification", - "model_class": "AutoModelForSequenceClassification", - "model_type": "bert" - }, - "eval": { - "task": "text-classification", - "dataset": { - "path": "nyu-mll/glue", - "name": "mrpc", - "samples": 1000, - "columns_mapping": { - "input_column": "sentence1", - "second_input_column": "sentence2" - } - } - } -} diff --git a/examples/recipes/Intel_bert-base-uncased-mrpc/text-classification_w8a8_config.json b/examples/recipes/Intel_bert-base-uncased-mrpc/text-classification_w8a8_config.json deleted file mode 100644 index 15280ae41..000000000 --- a/examples/recipes/Intel_bert-base-uncased-mrpc/text-classification_w8a8_config.json +++ /dev/null @@ -1,95 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 30522 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "text-classification", - "model_name": "Intel/bert-base-uncased-mrpc" - }, - "compile": null, - "loader": { - "task": "text-classification", - "model_class": "AutoModelForSequenceClassification", - "model_type": "bert" - }, - "eval": { - "task": "text-classification", - "dataset": { - "path": "nyu-mll/glue", - "name": "mrpc", - "samples": 1000, - "columns_mapping": { - "input_column": "sentence1", - "second_input_column": "sentence2" - } - } - } -} diff --git a/examples/recipes/Isotonic_distilbert_finetuned_ai4privacy_v2/token-classification_w8a8_config.json b/examples/recipes/Isotonic_distilbert_finetuned_ai4privacy_v2/token-classification_w8a8_config.json deleted file mode 100644 index e1063961f..000000000 --- a/examples/recipes/Isotonic_distilbert_finetuned_ai4privacy_v2/token-classification_w8a8_config.json +++ /dev/null @@ -1,80 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 30522 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": {}, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "token-classification", - "model_name": "Isotonic/distilbert_finetuned_ai4privacy_v2" - }, - "compile": null, - "loader": { - "task": "token-classification", - "model_class": "AutoModelForTokenClassification", - "model_type": "distilbert" - }, - "eval": { - "task": "token-classification", - "dataset": { - "samples": 1000, - "path": "~/.cache/winml/eval_datasets/build_ai4privacy", - "columns_mapping": { - "label_column": "ner_tags" - }, - "build_script": "scripts/e2e_eval/datasets/build_ai4privacy.py" - } - } -} diff --git a/examples/recipes/ProsusAI_finbert/text-classification_w8a16_config.json b/examples/recipes/ProsusAI_finbert/text-classification_w8a16_config.json deleted file mode 100644 index 6290e8fff..000000000 --- a/examples/recipes/ProsusAI_finbert/text-classification_w8a16_config.json +++ /dev/null @@ -1,91 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 30522 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint16", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "text-classification", - "model_name": "ProsusAI/finbert" - }, - "compile": null, - "loader": { - "task": "text-classification", - "model_class": "AutoModelForSequenceClassification", - "model_type": "bert" - }, - "eval": { - "task": "text-classification", - "dataset": { - "path": "privet1mir/finbert_dataset", - "split": "val", - "samples": 1000 - } - } -} diff --git a/examples/recipes/ProsusAI_finbert/text-classification_w8a8_config.json b/examples/recipes/ProsusAI_finbert/text-classification_w8a8_config.json deleted file mode 100644 index e921e92dd..000000000 --- a/examples/recipes/ProsusAI_finbert/text-classification_w8a8_config.json +++ /dev/null @@ -1,91 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 30522 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "text-classification", - "model_name": "ProsusAI/finbert" - }, - "compile": null, - "loader": { - "task": "text-classification", - "model_class": "AutoModelForSequenceClassification", - "model_type": "bert" - }, - "eval": { - "task": "text-classification", - "dataset": { - "path": "privet1mir/finbert_dataset", - "split": "val", - "samples": 1000 - } - } -} diff --git a/examples/recipes/README.md b/examples/recipes/README.md index e0393a07d..1077e4f74 100644 --- a/examples/recipes/README.md +++ b/examples/recipes/README.md @@ -1,4 +1,4 @@ -# Built-in Model Recipes +# Built-in Model Recipes Curated recipe configuration samples for **portable, high-performance, and high-quality** AI models on Windows ML, working consistently across supported EPs. diff --git a/examples/recipes/StanfordAIMI_dinov2-base-xray-224/image-feature-extraction_w8a8_config.json b/examples/recipes/StanfordAIMI_dinov2-base-xray-224/image-feature-extraction_w8a8_config.json deleted file mode 100644 index 39ffd70dd..000000000 --- a/examples/recipes/StanfordAIMI_dinov2-base-xray-224/image-feature-extraction_w8a8_config.json +++ /dev/null @@ -1,67 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "pixel_values", - "dtype": "float32", - "shape": [ - 1, - 3, - 224, - 224 - ], - "value_range": [ - 0, - 1 - ] - } - ], - "output_tensors": [ - { - "name": "last_hidden_state" - } - ] - }, - "optim": {}, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "image-feature-extraction", - "model_name": "StanfordAIMI/dinov2-base-xray-224" - }, - "compile": null, - "loader": { - "task": "image-feature-extraction", - "model_class": "AutoModel", - "model_type": "dinov2" - }, - "eval": { - "task": "image-feature-extraction", - "dataset": { - "path": "Ewakaa/pneumonia_classification_chest_xray", - "split": "test", - "samples": 582 - } - } -} diff --git a/examples/recipes/ahotrod_electra_large_discriminator_squad2_512/question-answering_w8a8_config.json b/examples/recipes/ahotrod_electra_large_discriminator_squad2_512/question-answering_w8a8_config.json deleted file mode 100644 index 5bcf12382..000000000 --- a/examples/recipes/ahotrod_electra_large_discriminator_squad2_512/question-answering_w8a8_config.json +++ /dev/null @@ -1,98 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 30522 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "start_logits" - }, - { - "name": "end_logits" - } - ] - }, - "optim": {}, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "question-answering", - "model_name": "ahotrod/electra_large_discriminator_squad2_512" - }, - "compile": null, - "loader": { - "task": "question-answering", - "model_class": "AutoModelForQuestionAnswering", - "model_type": "electra" - }, - "eval": { - "task": "question-answering", - "dataset": { - "path": "rajpurkar/squad_v2", - "split": "validation", - "samples": 1000, - "columns_mapping": { - "question_column": "question", - "context_column": "context", - "id_column": "id", - "label_column": "answers" - } - } - } -} diff --git a/examples/recipes/apple_mobilevit-small/image-classification_w8a8_config.json b/examples/recipes/apple_mobilevit-small/image-classification_w8a8_config.json deleted file mode 100644 index 85332f723..000000000 --- a/examples/recipes/apple_mobilevit-small/image-classification_w8a8_config.json +++ /dev/null @@ -1,67 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "pixel_values", - "dtype": "float32", - "shape": [ - 1, - 3, - 256, - 256 - ], - "value_range": [ - 0, - 1 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": {}, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "image-classification", - "model_name": "apple/mobilevit-small" - }, - "compile": null, - "loader": { - "task": "image-classification", - "model_class": "AutoModelForImageClassification", - "model_type": "mobilevit" - }, - "eval": { - "task": "image-classification", - "dataset": { - "path": "timm/mini-imagenet", - "split": "test", - "samples": 1000 - } - } -} diff --git a/examples/recipes/cardiffnlp_twitter-roberta-base-sentiment-latest/text-classification_w8a8_config.json b/examples/recipes/cardiffnlp_twitter-roberta-base-sentiment-latest/text-classification_w8a8_config.json deleted file mode 100644 index 5bf569f6b..000000000 --- a/examples/recipes/cardiffnlp_twitter-roberta-base-sentiment-latest/text-classification_w8a8_config.json +++ /dev/null @@ -1,81 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 50265 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "text-classification", - "model_name": "cardiffnlp/twitter-roberta-base-sentiment-latest" - }, - "loader": { - "task": "text-classification", - "model_class": "AutoModelForSequenceClassification", - "model_type": "roberta" - }, - "eval": { - "task": "text-classification", - "dataset": { - "path": "tweet_eval", - "name": "sentiment", - "samples": 1000, - "columns_mapping": { - "input_column": "text" - } - } - } -} diff --git a/examples/recipes/dbmdz_bert-large-cased-finetuned-conll03-english/token-classification_w8a8_config.json b/examples/recipes/dbmdz_bert-large-cased-finetuned-conll03-english/token-classification_w8a8_config.json deleted file mode 100644 index d91a0863b..000000000 --- a/examples/recipes/dbmdz_bert-large-cased-finetuned-conll03-english/token-classification_w8a8_config.json +++ /dev/null @@ -1,93 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 28996 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "token-classification", - "model_name": "dbmdz/bert-large-cased-finetuned-conll03-english" - }, - "compile": null, - "loader": { - "task": "token-classification", - "model_class": "AutoModelForTokenClassification", - "model_type": "bert" - }, - "eval": { - "task": "token-classification", - "dataset": { - "samples": 1000, - "path": "BramVanroy/conll2003", - "columns_mapping": { - "label_column": "ner_tags" - } - } - } -} diff --git a/examples/recipes/deepset_bert-large-uncased-whole-word-masking-squad2/question-answering_w8a8_config.json b/examples/recipes/deepset_bert-large-uncased-whole-word-masking-squad2/question-answering_w8a8_config.json deleted file mode 100644 index 1e065c665..000000000 --- a/examples/recipes/deepset_bert-large-uncased-whole-word-masking-squad2/question-answering_w8a8_config.json +++ /dev/null @@ -1,99 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 30522 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "start_logits" - }, - { - "name": "end_logits" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "question-answering", - "model_name": "deepset/bert-large-uncased-whole-word-masking-squad2" - }, - "loader": { - "task": "question-answering", - "model_class": "AutoModelForQuestionAnswering", - "model_type": "bert" - }, - "eval": { - "task": "question-answering", - "dataset": { - "path": "rajpurkar/squad_v2", - "split": "validation", - "samples": 1000, - "columns_mapping": { - "question_column": "question", - "context_column": "context", - "id_column": "id", - "label_column": "answers" - } - } - } -} diff --git a/examples/recipes/deepset_roberta-base-squad2/question-answering_w8a8_config.json b/examples/recipes/deepset_roberta-base-squad2/question-answering_w8a8_config.json deleted file mode 100644 index 01a786971..000000000 --- a/examples/recipes/deepset_roberta-base-squad2/question-answering_w8a8_config.json +++ /dev/null @@ -1,87 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 50265 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "start_logits" - }, - { - "name": "end_logits" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "question-answering", - "model_name": "deepset/roberta-base-squad2" - }, - "loader": { - "task": "question-answering", - "model_class": "AutoModelForQuestionAnswering", - "model_type": "roberta" - }, - "eval": { - "task": "question-answering", - "dataset": { - "path": "rajpurkar/squad_v2", - "split": "validation", - "samples": 1000, - "columns_mapping": { - "question_column": "question", - "context_column": "context", - "id_column": "id", - "label_column": "answers" - } - } - } -} diff --git a/examples/recipes/deepset_tinyroberta-squad2/question-answering_w8a8_config.json b/examples/recipes/deepset_tinyroberta-squad2/question-answering_w8a8_config.json deleted file mode 100644 index 9322f582c..000000000 --- a/examples/recipes/deepset_tinyroberta-squad2/question-answering_w8a8_config.json +++ /dev/null @@ -1,87 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 50265 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "start_logits" - }, - { - "name": "end_logits" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "question-answering", - "model_name": "deepset/tinyroberta-squad2" - }, - "loader": { - "task": "question-answering", - "model_class": "AutoModelForQuestionAnswering", - "model_type": "roberta" - }, - "eval": { - "task": "question-answering", - "dataset": { - "path": "rajpurkar/squad_v2", - "split": "validation", - "samples": 1000, - "columns_mapping": { - "question_column": "question", - "context_column": "context", - "id_column": "id", - "label_column": "answers" - } - } - } -} diff --git a/examples/recipes/dima806_fairface_age_image_detection/image-classification_w8a8_config.json b/examples/recipes/dima806_fairface_age_image_detection/image-classification_w8a8_config.json deleted file mode 100644 index fcf6d4db5..000000000 --- a/examples/recipes/dima806_fairface_age_image_detection/image-classification_w8a8_config.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "pixel_values", - "dtype": "float32", - "shape": [ - 1, - 3, - 224, - 224 - ], - "value_range": [ - 0, - 1 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": {}, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "image-classification", - "model_name": "dima806/fairface_age_image_detection" - }, - "compile": null, - "loader": { - "task": "image-classification", - "model_class": "AutoModelForImageClassification", - "model_type": "vit" - }, - "eval": { - "task": "image-classification", - "dataset": { - "samples": 1000, - "path": "~/.cache/winml/eval_datasets/build_fairface", - "columns_mapping": { - "label_column": "age" - }, - "build_script": "scripts/e2e_eval/datasets/build_fairface.py" - } - } -} diff --git a/examples/recipes/distilbert_distilbert-base-cased-distilled-squad/question-answering_w8a8_config.json b/examples/recipes/distilbert_distilbert-base-cased-distilled-squad/question-answering_w8a8_config.json deleted file mode 100644 index c6386c944..000000000 --- a/examples/recipes/distilbert_distilbert-base-cased-distilled-squad/question-answering_w8a8_config.json +++ /dev/null @@ -1,86 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 28996 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "start_logits" - }, - { - "name": "end_logits" - } - ] - }, - "optim": {}, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "question-answering", - "model_name": "distilbert/distilbert-base-cased-distilled-squad" - }, - "compile": null, - "loader": { - "task": "question-answering", - "model_class": "AutoModelForQuestionAnswering", - "model_type": "distilbert" - }, - "eval": { - "task": "question-answering", - "dataset": { - "path": "rajpurkar/squad", - "split": "validation", - "samples": 1000, - "columns_mapping": { - "question_column": "question", - "context_column": "context", - "id_column": "id", - "label_column": "answers" - } - } - } -} diff --git a/examples/recipes/distilbert_distilbert-base-uncased-distilled-squad/question-answering_w8a8_config.json b/examples/recipes/distilbert_distilbert-base-uncased-distilled-squad/question-answering_w8a8_config.json deleted file mode 100644 index 2258aa259..000000000 --- a/examples/recipes/distilbert_distilbert-base-uncased-distilled-squad/question-answering_w8a8_config.json +++ /dev/null @@ -1,86 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 30522 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "start_logits" - }, - { - "name": "end_logits" - } - ] - }, - "optim": {}, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "question-answering", - "model_name": "distilbert/distilbert-base-uncased-distilled-squad" - }, - "compile": null, - "loader": { - "task": "question-answering", - "model_class": "AutoModelForQuestionAnswering", - "model_type": "distilbert" - }, - "eval": { - "task": "question-answering", - "dataset": { - "path": "rajpurkar/squad", - "split": "validation", - "samples": 1000, - "columns_mapping": { - "question_column": "question", - "context_column": "context", - "id_column": "id", - "label_column": "answers" - } - } - } -} diff --git a/examples/recipes/distilbert_distilbert-base-uncased-finetuned-sst-2-english/text-classification_w8a8_config.json b/examples/recipes/distilbert_distilbert-base-uncased-finetuned-sst-2-english/text-classification_w8a8_config.json deleted file mode 100644 index 9907a37ef..000000000 --- a/examples/recipes/distilbert_distilbert-base-uncased-finetuned-sst-2-english/text-classification_w8a8_config.json +++ /dev/null @@ -1,80 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 30522 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": {}, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "text-classification", - "model_name": "distilbert/distilbert-base-uncased-finetuned-sst-2-english" - }, - "compile": null, - "loader": { - "task": "text-classification", - "model_class": "AutoModelForSequenceClassification", - "model_type": "distilbert" - }, - "eval": { - "task": "text-classification", - "dataset": { - "path": "nyu-mll/glue", - "name": "sst2", - "samples": 1000, - "columns_mapping": { - "input_column": "sentence" - } - } - } -} diff --git a/examples/recipes/distilbert_distilbert-base-uncased/fill-mask_w8a8_config.json b/examples/recipes/distilbert_distilbert-base-uncased/fill-mask_w8a8_config.json deleted file mode 100644 index e93363d0e..000000000 --- a/examples/recipes/distilbert_distilbert-base-uncased/fill-mask_w8a8_config.json +++ /dev/null @@ -1,81 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 30522 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": {}, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "fill-mask", - "model_name": "distilbert/distilbert-base-uncased" - }, - "compile": null, - "loader": { - "task": "fill-mask", - "model_class": "AutoModelForMaskedLM", - "model_type": "distilbert" - }, - "eval": { - "task": "fill-mask", - "dataset": { - "path": "Salesforce/wikitext", - "name": "wikitext-2-raw-v1", - "split": "test", - "samples": 100, - "columns_mapping": { - "input_column": "text" - } - } - } -} diff --git a/examples/recipes/dslim_bert-base-NER/token-classification_w8a16_config.json b/examples/recipes/dslim_bert-base-NER/token-classification_w8a16_config.json deleted file mode 100644 index 4d3a2dbef..000000000 --- a/examples/recipes/dslim_bert-base-NER/token-classification_w8a16_config.json +++ /dev/null @@ -1,93 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 28996 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint16", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "token-classification", - "model_name": "dslim/bert-base-NER" - }, - "compile": null, - "loader": { - "task": "token-classification", - "model_class": "AutoModelForTokenClassification", - "model_type": "bert" - }, - "eval": { - "task": "token-classification", - "dataset": { - "samples": 1000, - "path": "BramVanroy/conll2003", - "columns_mapping": { - "label_column": "ner_tags" - } - } - } -} diff --git a/examples/recipes/dslim_bert-base-NER/token-classification_w8a8_config.json b/examples/recipes/dslim_bert-base-NER/token-classification_w8a8_config.json deleted file mode 100644 index de26003dc..000000000 --- a/examples/recipes/dslim_bert-base-NER/token-classification_w8a8_config.json +++ /dev/null @@ -1,93 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 28996 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "token-classification", - "model_name": "dslim/bert-base-NER" - }, - "compile": null, - "loader": { - "task": "token-classification", - "model_class": "AutoModelForTokenClassification", - "model_type": "bert" - }, - "eval": { - "task": "token-classification", - "dataset": { - "samples": 1000, - "path": "BramVanroy/conll2003", - "columns_mapping": { - "label_column": "ner_tags" - } - } - } -} diff --git a/examples/recipes/facebook_convnext-tiny-224/image-classification_w8a16_config.json b/examples/recipes/facebook_convnext-tiny-224/image-classification_w8a16_config.json deleted file mode 100644 index f92b2fe5d..000000000 --- a/examples/recipes/facebook_convnext-tiny-224/image-classification_w8a16_config.json +++ /dev/null @@ -1,67 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "pixel_values", - "dtype": "float32", - "shape": [ - 1, - 3, - 224, - 224 - ], - "value_range": [ - 0, - 1 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": {}, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint16", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "image-classification", - "model_name": "facebook/convnext-tiny-224" - }, - "compile": null, - "loader": { - "task": "image-classification", - "model_class": "AutoModelForImageClassification", - "model_type": "convnext" - }, - "eval": { - "task": "image-classification", - "dataset": { - "path": "timm/mini-imagenet", - "split": "test", - "samples": 1000 - } - } -} diff --git a/examples/recipes/facebook_convnext-tiny-224/image-classification_w8a8_config.json b/examples/recipes/facebook_convnext-tiny-224/image-classification_w8a8_config.json deleted file mode 100644 index d4766924f..000000000 --- a/examples/recipes/facebook_convnext-tiny-224/image-classification_w8a8_config.json +++ /dev/null @@ -1,67 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "pixel_values", - "dtype": "float32", - "shape": [ - 1, - 3, - 224, - 224 - ], - "value_range": [ - 0, - 1 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": {}, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "image-classification", - "model_name": "facebook/convnext-tiny-224" - }, - "compile": null, - "loader": { - "task": "image-classification", - "model_class": "AutoModelForImageClassification", - "model_type": "convnext" - }, - "eval": { - "task": "image-classification", - "dataset": { - "path": "timm/mini-imagenet", - "split": "test", - "samples": 1000 - } - } -} diff --git a/examples/recipes/facebook_dino-vitb16/image-feature-extraction_w8a8_config.json b/examples/recipes/facebook_dino-vitb16/image-feature-extraction_w8a8_config.json deleted file mode 100644 index d328593d8..000000000 --- a/examples/recipes/facebook_dino-vitb16/image-feature-extraction_w8a8_config.json +++ /dev/null @@ -1,66 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "pixel_values", - "dtype": "float32", - "shape": [ - 1, - 3, - 224, - 224 - ], - "value_range": [ - 0, - 1 - ] - } - ], - "output_tensors": [ - { - "name": "last_hidden_state" - } - ] - }, - "optim": {}, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "image-feature-extraction", - "model_name": "facebook/dino-vitb16" - }, - "loader": { - "task": "image-feature-extraction", - "model_class": "AutoModel", - "model_type": "vit" - }, - "eval": { - "task": "image-feature-extraction", - "dataset": { - "path": "timm/mini-imagenet", - "split": "test", - "samples": 1000 - } - } -} diff --git a/examples/recipes/facebook_dino-vits16/image-feature-extraction_w8a8_config.json b/examples/recipes/facebook_dino-vits16/image-feature-extraction_w8a8_config.json deleted file mode 100644 index e33ef4491..000000000 --- a/examples/recipes/facebook_dino-vits16/image-feature-extraction_w8a8_config.json +++ /dev/null @@ -1,66 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "pixel_values", - "dtype": "float32", - "shape": [ - 1, - 3, - 224, - 224 - ], - "value_range": [ - 0, - 1 - ] - } - ], - "output_tensors": [ - { - "name": "last_hidden_state" - } - ] - }, - "optim": {}, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "image-feature-extraction", - "model_name": "facebook/dino-vits16" - }, - "loader": { - "task": "image-feature-extraction", - "model_class": "AutoModel", - "model_type": "vit" - }, - "eval": { - "task": "image-feature-extraction", - "dataset": { - "path": "timm/mini-imagenet", - "split": "test", - "samples": 1000 - } - } -} diff --git a/examples/recipes/facebook_dinov2-base/image-feature-extraction_w8a8_config.json b/examples/recipes/facebook_dinov2-base/image-feature-extraction_w8a8_config.json deleted file mode 100644 index f9e729c97..000000000 --- a/examples/recipes/facebook_dinov2-base/image-feature-extraction_w8a8_config.json +++ /dev/null @@ -1,66 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "pixel_values", - "dtype": "float32", - "shape": [ - 1, - 3, - 224, - 224 - ], - "value_range": [ - 0, - 1 - ] - } - ], - "output_tensors": [ - { - "name": "last_hidden_state" - } - ] - }, - "optim": {}, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "image-feature-extraction", - "model_name": "facebook/dinov2-base" - }, - "loader": { - "task": "image-feature-extraction", - "model_class": "AutoModel", - "model_type": "dinov2" - }, - "eval": { - "task": "image-feature-extraction", - "dataset": { - "path": "timm/mini-imagenet", - "split": "test", - "samples": 1000 - } - } -} diff --git a/examples/recipes/facebook_dinov2-large/image-feature-extraction_w8a8_config.json b/examples/recipes/facebook_dinov2-large/image-feature-extraction_w8a8_config.json deleted file mode 100644 index 2b3f6cc27..000000000 --- a/examples/recipes/facebook_dinov2-large/image-feature-extraction_w8a8_config.json +++ /dev/null @@ -1,66 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "pixel_values", - "dtype": "float32", - "shape": [ - 1, - 3, - 224, - 224 - ], - "value_range": [ - 0, - 1 - ] - } - ], - "output_tensors": [ - { - "name": "last_hidden_state" - } - ] - }, - "optim": {}, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "image-feature-extraction", - "model_name": "facebook/dinov2-large" - }, - "loader": { - "task": "image-feature-extraction", - "model_class": "AutoModel", - "model_type": "dinov2" - }, - "eval": { - "task": "image-feature-extraction", - "dataset": { - "path": "timm/mini-imagenet", - "split": "test", - "samples": 1000 - } - } -} diff --git a/examples/recipes/facebook_dinov2-small/image-feature-extraction_w8a8_config.json b/examples/recipes/facebook_dinov2-small/image-feature-extraction_w8a8_config.json deleted file mode 100644 index c3a3af051..000000000 --- a/examples/recipes/facebook_dinov2-small/image-feature-extraction_w8a8_config.json +++ /dev/null @@ -1,66 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "pixel_values", - "dtype": "float32", - "shape": [ - 1, - 3, - 224, - 224 - ], - "value_range": [ - 0, - 1 - ] - } - ], - "output_tensors": [ - { - "name": "last_hidden_state" - } - ] - }, - "optim": {}, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "image-feature-extraction", - "model_name": "facebook/dinov2-small" - }, - "loader": { - "task": "image-feature-extraction", - "model_class": "AutoModel", - "model_type": "dinov2" - }, - "eval": { - "task": "image-feature-extraction", - "dataset": { - "path": "timm/mini-imagenet", - "split": "test", - "samples": 1000 - } - } -} diff --git a/examples/recipes/google-bert_bert-base-multilingual-cased/feature-extraction_w8a16_config.json b/examples/recipes/google-bert_bert-base-multilingual-cased/feature-extraction_w8a16_config.json deleted file mode 100644 index 1e5074c51..000000000 --- a/examples/recipes/google-bert_bert-base-multilingual-cased/feature-extraction_w8a16_config.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 119547 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "last_hidden_state" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint16", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "feature-extraction", - "model_name": "google-bert/bert-base-multilingual-cased" - }, - "compile": null, - "loader": { - "task": "feature-extraction", - "model_class": "AutoModel", - "model_type": "bert" - }, - "eval": { - "task": "feature-extraction", - "dataset": { - "path": "mteb/stsbenchmark-sts", - "split": "test", - "samples": 1000, - "columns_mapping": { - "input_column_1": "sentence1", - "input_column_2": "sentence2", - "score_column": "score" - } - } - } -} diff --git a/examples/recipes/google-bert_bert-base-multilingual-cased/feature-extraction_w8a8_config.json b/examples/recipes/google-bert_bert-base-multilingual-cased/feature-extraction_w8a8_config.json deleted file mode 100644 index 111ff0161..000000000 --- a/examples/recipes/google-bert_bert-base-multilingual-cased/feature-extraction_w8a8_config.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 119547 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "last_hidden_state" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "feature-extraction", - "model_name": "google-bert/bert-base-multilingual-cased" - }, - "compile": null, - "loader": { - "task": "feature-extraction", - "model_class": "AutoModel", - "model_type": "bert" - }, - "eval": { - "task": "feature-extraction", - "dataset": { - "path": "mteb/stsbenchmark-sts", - "split": "test", - "samples": 1000, - "columns_mapping": { - "input_column_1": "sentence1", - "input_column_2": "sentence2", - "score_column": "score" - } - } - } -} diff --git a/examples/recipes/google-bert_bert-base-multilingual-cased/fill-mask_w8a8_config.json b/examples/recipes/google-bert_bert-base-multilingual-cased/fill-mask_w8a8_config.json deleted file mode 100644 index 067e5e45a..000000000 --- a/examples/recipes/google-bert_bert-base-multilingual-cased/fill-mask_w8a8_config.json +++ /dev/null @@ -1,95 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 119547 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "fill-mask", - "model_name": "google-bert/bert-base-multilingual-cased" - }, - "compile": null, - "loader": { - "task": "fill-mask", - "model_class": "AutoModelForMaskedLM", - "model_type": "bert" - }, - "eval": { - "task": "fill-mask", - "dataset": { - "path": "Salesforce/wikitext", - "name": "wikitext-2-raw-v1", - "split": "test", - "samples": 100, - "columns_mapping": { - "input_column": "text" - } - } - } -} diff --git a/examples/recipes/google-bert_bert-base-multilingual-uncased/fill-mask_w8a8_config.json b/examples/recipes/google-bert_bert-base-multilingual-uncased/fill-mask_w8a8_config.json deleted file mode 100644 index 69762381d..000000000 --- a/examples/recipes/google-bert_bert-base-multilingual-uncased/fill-mask_w8a8_config.json +++ /dev/null @@ -1,94 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 105879 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "fill-mask", - "model_name": "google-bert/bert-base-multilingual-uncased" - }, - "loader": { - "task": "fill-mask", - "model_class": "AutoModelForMaskedLM", - "model_type": "bert" - }, - "eval": { - "task": "fill-mask", - "dataset": { - "path": "Salesforce/wikitext", - "name": "wikitext-2-raw-v1", - "split": "test", - "samples": 100, - "columns_mapping": { - "input_column": "text" - } - } - } -} diff --git a/examples/recipes/google-bert_bert-base-uncased/fill-mask_w8a8_config.json b/examples/recipes/google-bert_bert-base-uncased/fill-mask_w8a8_config.json deleted file mode 100644 index 4fddd5121..000000000 --- a/examples/recipes/google-bert_bert-base-uncased/fill-mask_w8a8_config.json +++ /dev/null @@ -1,94 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 30522 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "fill-mask", - "model_name": "google-bert/bert-base-uncased" - }, - "loader": { - "task": "fill-mask", - "model_class": "AutoModelForMaskedLM", - "model_type": "bert" - }, - "eval": { - "task": "fill-mask", - "dataset": { - "path": "Salesforce/wikitext", - "name": "wikitext-2-raw-v1", - "split": "test", - "samples": 100, - "columns_mapping": { - "input_column": "text" - } - } - } -} diff --git a/examples/recipes/google-bert_bert-large-uncased-whole-word-masking-finetuned-squad/question-answering_w8a8_config.json b/examples/recipes/google-bert_bert-large-uncased-whole-word-masking-finetuned-squad/question-answering_w8a8_config.json deleted file mode 100644 index 69690900b..000000000 --- a/examples/recipes/google-bert_bert-large-uncased-whole-word-masking-finetuned-squad/question-answering_w8a8_config.json +++ /dev/null @@ -1,100 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 30522 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "start_logits" - }, - { - "name": "end_logits" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "question-answering", - "model_name": "google-bert/bert-large-uncased-whole-word-masking-finetuned-squad" - }, - "compile": null, - "loader": { - "task": "question-answering", - "model_class": "AutoModelForQuestionAnswering", - "model_type": "bert" - }, - "eval": { - "task": "question-answering", - "dataset": { - "path": "rajpurkar/squad", - "split": "validation", - "samples": 1000, - "columns_mapping": { - "question_column": "question", - "context_column": "context", - "id_column": "id", - "label_column": "answers" - } - } - } -} diff --git a/examples/recipes/google_vit-base-patch16-224-in21k/image-feature-extraction_w8a8_config.json b/examples/recipes/google_vit-base-patch16-224-in21k/image-feature-extraction_w8a8_config.json deleted file mode 100644 index 7124b916a..000000000 --- a/examples/recipes/google_vit-base-patch16-224-in21k/image-feature-extraction_w8a8_config.json +++ /dev/null @@ -1,66 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "pixel_values", - "dtype": "float32", - "shape": [ - 1, - 3, - 224, - 224 - ], - "value_range": [ - 0, - 1 - ] - } - ], - "output_tensors": [ - { - "name": "last_hidden_state" - } - ] - }, - "optim": {}, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "image-feature-extraction", - "model_name": "google/vit-base-patch16-224-in21k" - }, - "loader": { - "task": "image-feature-extraction", - "model_class": "AutoModel", - "model_type": "vit" - }, - "eval": { - "task": "image-feature-extraction", - "dataset": { - "path": "timm/mini-imagenet", - "split": "test", - "samples": 1000 - } - } -} diff --git a/examples/recipes/google_vit-base-patch16-224/image-classification_w8a8_config.json b/examples/recipes/google_vit-base-patch16-224/image-classification_w8a8_config.json deleted file mode 100644 index 9cf94c890..000000000 --- a/examples/recipes/google_vit-base-patch16-224/image-classification_w8a8_config.json +++ /dev/null @@ -1,66 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "pixel_values", - "dtype": "float32", - "shape": [ - 1, - 3, - 224, - 224 - ], - "value_range": [ - 0, - 1 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": {}, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "image-classification", - "model_name": "google/vit-base-patch16-224" - }, - "loader": { - "task": "image-classification", - "model_class": "AutoModelForImageClassification", - "model_type": "vit" - }, - "eval": { - "task": "image-classification", - "dataset": { - "path": "timm/mini-imagenet", - "split": "test", - "samples": 1000 - } - } -} diff --git a/examples/recipes/joeddav_xlm-roberta-large-xnli/zero-shot-classification_w8a8_config.json b/examples/recipes/joeddav_xlm-roberta-large-xnli/zero-shot-classification_w8a8_config.json deleted file mode 100644 index b215cc87b..000000000 --- a/examples/recipes/joeddav_xlm-roberta-large-xnli/zero-shot-classification_w8a8_config.json +++ /dev/null @@ -1,85 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 250002 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "zero-shot-classification", - "model_name": "joeddav/xlm-roberta-large-xnli" - }, - "compile": null, - "loader": { - "task": "zero-shot-classification", - "model_class": "AutoModelForSequenceClassification", - "model_type": "xlm-roberta" - }, - "eval": { - "task": "zero-shot-classification", - "dataset": { - "path": "fancyzhx/ag_news", - "split": "test", - "samples": 200, - "columns_mapping": { - "input_column": "text", - "label_column": "label", - "candidate_labels": "World,Sports,Business,Sci/Tech", - "hypothesis_template": "This text is about {}." - } - } - } -} diff --git a/examples/recipes/laion_CLIP-ViT-B-32-laion2B-s34B-b79K/feature-extraction_w8a8_config.json b/examples/recipes/laion_CLIP-ViT-B-32-laion2B-s34B-b79K/feature-extraction_w8a8_config.json deleted file mode 100644 index 0d45c3391..000000000 --- a/examples/recipes/laion_CLIP-ViT-B-32-laion2B-s34B-b79K/feature-extraction_w8a8_config.json +++ /dev/null @@ -1,90 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 77 - ], - "value_range": [ - 0, - 49408 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 77 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "text_embeds" - }, - { - "name": "last_hidden_state" - } - ] - }, - "optim": { - "clamp_constant_values": true, - "gelu_fusion": true, - "layer_norm_fusion": true, - "matmul_add_fusion": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "feature-extraction", - "model_name": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K" - }, - "compile": null, - "loader": { - "task": "feature-extraction", - "model_class": "CLIPTextModelWithProjection", - "model_type": "clip_text_model" - }, - "eval": { - "task": "feature-extraction", - "dataset": { - "path": "mteb/stsbenchmark-sts", - "split": "test", - "samples": 1000, - "columns_mapping": { - "input_column_1": "sentence1", - "input_column_2": "sentence2", - "score_column": "score" - } - } - } -} diff --git a/examples/recipes/mattmdjaga_segformer_b2_clothes/image-segmentation_w8a8_config.json b/examples/recipes/mattmdjaga_segformer_b2_clothes/image-segmentation_w8a8_config.json deleted file mode 100644 index f23ded80b..000000000 --- a/examples/recipes/mattmdjaga_segformer_b2_clothes/image-segmentation_w8a8_config.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "pixel_values", - "dtype": "float32", - "shape": [ - 1, - 3, - 512, - 512 - ], - "value_range": [ - 0, - 1 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": {}, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "image-segmentation", - "model_name": "mattmdjaga/segformer_b2_clothes" - }, - "compile": null, - "loader": { - "task": "image-segmentation", - "model_class": "AutoModelForSemanticSegmentation", - "model_type": "segformer" - }, - "eval": { - "task": "image-segmentation", - "dataset": { - "path": "mattmdjaga/human_parsing_dataset", - "split": "train", - "samples": 1000, - "columns_mapping": { - "annotation_column": "mask" - } - } - } -} diff --git a/examples/recipes/microsoft_rad-dino/image-feature-extraction_w8a8_config.json b/examples/recipes/microsoft_rad-dino/image-feature-extraction_w8a8_config.json deleted file mode 100644 index 7f02b647f..000000000 --- a/examples/recipes/microsoft_rad-dino/image-feature-extraction_w8a8_config.json +++ /dev/null @@ -1,66 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "pixel_values", - "dtype": "float32", - "shape": [ - 1, - 3, - 518, - 518 - ], - "value_range": [ - 0, - 1 - ] - } - ], - "output_tensors": [ - { - "name": "last_hidden_state" - } - ] - }, - "optim": {}, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "image-feature-extraction", - "model_name": "microsoft/rad-dino" - }, - "loader": { - "task": "image-feature-extraction", - "model_class": "AutoModel", - "model_type": "dinov2" - }, - "eval": { - "task": "image-feature-extraction", - "dataset": { - "path": "Ewakaa/pneumonia_classification_chest_xray", - "split": "test", - "samples": 582 - } - } -} diff --git a/examples/recipes/microsoft_resnet-18/image-classification_w8a8_config.json b/examples/recipes/microsoft_resnet-18/image-classification_w8a8_config.json deleted file mode 100644 index d4853a128..000000000 --- a/examples/recipes/microsoft_resnet-18/image-classification_w8a8_config.json +++ /dev/null @@ -1,67 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "pixel_values", - "dtype": "float32", - "shape": [ - 1, - 3, - 224, - 224 - ], - "value_range": [ - 0, - 1 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": {}, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "image-classification", - "model_name": "microsoft/resnet-18" - }, - "compile": null, - "loader": { - "task": "image-classification", - "model_class": "AutoModelForImageClassification", - "model_type": "resnet" - }, - "eval": { - "task": "image-classification", - "dataset": { - "path": "timm/mini-imagenet", - "split": "test", - "samples": 100 - } - } -} diff --git a/examples/recipes/microsoft_resnet-50/image-classification_w8a8_config.json b/examples/recipes/microsoft_resnet-50/image-classification_w8a8_config.json deleted file mode 100644 index 010c8389f..000000000 --- a/examples/recipes/microsoft_resnet-50/image-classification_w8a8_config.json +++ /dev/null @@ -1,67 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "pixel_values", - "dtype": "float32", - "shape": [ - 1, - 3, - 224, - 224 - ], - "value_range": [ - 0, - 1 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": {}, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "image-classification", - "model_name": "microsoft/resnet-50" - }, - "compile": null, - "loader": { - "task": "image-classification", - "model_class": "AutoModelForImageClassification", - "model_type": "resnet" - }, - "eval": { - "task": "image-classification", - "dataset": { - "path": "timm/mini-imagenet", - "split": "test", - "samples": 1000 - } - } -} diff --git a/examples/recipes/microsoft_swin-large-patch4-window7-224/image-classification_w8a8_config.json b/examples/recipes/microsoft_swin-large-patch4-window7-224/image-classification_w8a8_config.json deleted file mode 100644 index 99b8754de..000000000 --- a/examples/recipes/microsoft_swin-large-patch4-window7-224/image-classification_w8a8_config.json +++ /dev/null @@ -1,67 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "pixel_values", - "dtype": "float32", - "shape": [ - 1, - 3, - 224, - 224 - ], - "value_range": [ - 0, - 1 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": {}, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "image-classification", - "model_name": "microsoft/swin-large-patch4-window7-224" - }, - "compile": null, - "loader": { - "task": "image-classification", - "model_class": "AutoModelForImageClassification", - "model_type": "swin" - }, - "eval": { - "task": "image-classification", - "dataset": { - "path": "timm/mini-imagenet", - "split": "test", - "samples": 1000 - } - } -} diff --git a/examples/recipes/microsoft_swinv2-tiny-patch4-window16-256/image-classification_w8a16_config.json b/examples/recipes/microsoft_swinv2-tiny-patch4-window16-256/image-classification_w8a16_config.json deleted file mode 100644 index 89edca400..000000000 --- a/examples/recipes/microsoft_swinv2-tiny-patch4-window16-256/image-classification_w8a16_config.json +++ /dev/null @@ -1,67 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "pixel_values", - "dtype": "float32", - "shape": [ - 1, - 3, - 256, - 256 - ], - "value_range": [ - 0, - 1 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": {}, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint16", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "image-classification", - "model_name": "microsoft/swinv2-tiny-patch4-window16-256" - }, - "compile": null, - "loader": { - "task": "image-classification", - "model_class": "AutoModelForImageClassification", - "model_type": "swinv2" - }, - "eval": { - "task": "image-classification", - "dataset": { - "path": "timm/mini-imagenet", - "split": "test", - "samples": 100 - } - } -} diff --git a/examples/recipes/microsoft_swinv2-tiny-patch4-window16-256/image-classification_w8a8_config.json b/examples/recipes/microsoft_swinv2-tiny-patch4-window16-256/image-classification_w8a8_config.json deleted file mode 100644 index b11bf17c1..000000000 --- a/examples/recipes/microsoft_swinv2-tiny-patch4-window16-256/image-classification_w8a8_config.json +++ /dev/null @@ -1,67 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "pixel_values", - "dtype": "float32", - "shape": [ - 1, - 3, - 256, - 256 - ], - "value_range": [ - 0, - 1 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": {}, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "image-classification", - "model_name": "microsoft/swinv2-tiny-patch4-window16-256" - }, - "compile": null, - "loader": { - "task": "image-classification", - "model_class": "AutoModelForImageClassification", - "model_type": "swinv2" - }, - "eval": { - "task": "image-classification", - "dataset": { - "path": "timm/mini-imagenet", - "split": "test", - "samples": 100 - } - } -} diff --git a/examples/recipes/monologg_koelectra-small-v2-distilled-korquad-384/question-answering_w8a8_config.json b/examples/recipes/monologg_koelectra-small-v2-distilled-korquad-384/question-answering_w8a8_config.json deleted file mode 100644 index 17a040296..000000000 --- a/examples/recipes/monologg_koelectra-small-v2-distilled-korquad-384/question-answering_w8a8_config.json +++ /dev/null @@ -1,98 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 32200 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "start_logits" - }, - { - "name": "end_logits" - } - ] - }, - "optim": {}, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "question-answering", - "model_name": "monologg/koelectra-small-v2-distilled-korquad-384" - }, - "compile": null, - "loader": { - "task": "question-answering", - "model_class": "AutoModelForQuestionAnswering", - "model_type": "electra" - }, - "eval": { - "task": "question-answering", - "dataset": { - "path": "KorQuAD/squad_kor_v1", - "split": "validation", - "samples": 1000, - "columns_mapping": { - "question_column": "question", - "context_column": "context", - "id_column": "id", - "label_column": "answers" - } - } - } -} diff --git a/examples/recipes/nvidia_segformer-b1-finetuned-ade-512-512/image-segmentation_w8a8_config.json b/examples/recipes/nvidia_segformer-b1-finetuned-ade-512-512/image-segmentation_w8a8_config.json deleted file mode 100644 index 8c64a9223..000000000 --- a/examples/recipes/nvidia_segformer-b1-finetuned-ade-512-512/image-segmentation_w8a8_config.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "pixel_values", - "dtype": "float32", - "shape": [ - 1, - 3, - 512, - 512 - ], - "value_range": [ - 0, - 1 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": {}, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "image-segmentation", - "model_name": "nvidia/segformer-b1-finetuned-ade-512-512" - }, - "compile": null, - "loader": { - "task": "image-segmentation", - "model_class": "AutoModelForSemanticSegmentation", - "model_type": "segformer" - }, - "eval": { - "task": "image-segmentation", - "dataset": { - "path": "danjacobellis/scene_parse_150", - "split": "validation", - "samples": 1000, - "label_mapping_file": "scripts/e2e_eval/datasets/ade20k_gt_to_model_label.json", - "columns_mapping": { - "annotation_column": "annotation" - } - } - } -} diff --git a/examples/recipes/nvidia_segformer-b2-finetuned-ade-512-512/image-segmentation_w8a8_config.json b/examples/recipes/nvidia_segformer-b2-finetuned-ade-512-512/image-segmentation_w8a8_config.json deleted file mode 100644 index 610a2173a..000000000 --- a/examples/recipes/nvidia_segformer-b2-finetuned-ade-512-512/image-segmentation_w8a8_config.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "pixel_values", - "dtype": "float32", - "shape": [ - 1, - 3, - 512, - 512 - ], - "value_range": [ - 0, - 1 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": {}, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "image-segmentation", - "model_name": "nvidia/segformer-b2-finetuned-ade-512-512" - }, - "compile": null, - "loader": { - "task": "image-segmentation", - "model_class": "AutoModelForSemanticSegmentation", - "model_type": "segformer" - }, - "eval": { - "task": "image-segmentation", - "dataset": { - "path": "danjacobellis/scene_parse_150", - "split": "validation", - "samples": 1000, - "label_mapping_file": "scripts/e2e_eval/datasets/ade20k_gt_to_model_label.json", - "columns_mapping": { - "annotation_column": "annotation" - } - } - } -} diff --git a/examples/recipes/nvidia_segformer-b5-finetuned-ade-640-640/image-segmentation_w8a8_config.json b/examples/recipes/nvidia_segformer-b5-finetuned-ade-640-640/image-segmentation_w8a8_config.json deleted file mode 100644 index 80894222b..000000000 --- a/examples/recipes/nvidia_segformer-b5-finetuned-ade-640-640/image-segmentation_w8a8_config.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "pixel_values", - "dtype": "float32", - "shape": [ - 1, - 3, - 640, - 640 - ], - "value_range": [ - 0, - 1 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": {}, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "image-segmentation", - "model_name": "nvidia/segformer-b5-finetuned-ade-640-640" - }, - "compile": null, - "loader": { - "task": "image-segmentation", - "model_class": "AutoModelForSemanticSegmentation", - "model_type": "segformer" - }, - "eval": { - "task": "image-segmentation", - "dataset": { - "path": "danjacobellis/scene_parse_150", - "split": "validation", - "samples": 1000, - "label_mapping_file": "scripts/e2e_eval/datasets/ade20k_gt_to_model_label.json", - "columns_mapping": { - "annotation_column": "annotation" - } - } - } -} diff --git a/examples/recipes/openai_clip-vit-base-patch16/feature-extraction_w8a8_config.json b/examples/recipes/openai_clip-vit-base-patch16/feature-extraction_w8a8_config.json deleted file mode 100644 index ed19dcf30..000000000 --- a/examples/recipes/openai_clip-vit-base-patch16/feature-extraction_w8a8_config.json +++ /dev/null @@ -1,89 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 77 - ], - "value_range": [ - 0, - 49408 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 77 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "text_embeds" - }, - { - "name": "last_hidden_state" - } - ] - }, - "optim": { - "clamp_constant_values": true, - "gelu_fusion": true, - "layer_norm_fusion": true, - "matmul_add_fusion": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "feature-extraction", - "model_name": "openai/clip-vit-base-patch16" - }, - "loader": { - "task": "feature-extraction", - "model_class": "CLIPTextModelWithProjection", - "model_type": "clip_text_model" - }, - "eval": { - "task": "feature-extraction", - "dataset": { - "path": "mteb/stsbenchmark-sts", - "split": "test", - "samples": 1000, - "columns_mapping": { - "input_column_1": "sentence1", - "input_column_2": "sentence2", - "score_column": "score" - } - } - } -} diff --git a/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_w8a16_config_image-encoder.json b/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_w8a16_config_image-encoder.json deleted file mode 100644 index cf8ede994..000000000 --- a/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_w8a16_config_image-encoder.json +++ /dev/null @@ -1,79 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "pixel_values", - "dtype": "float32", - "shape": [ - 1, - 3, - 224, - 224 - ], - "value_range": [ - 0, - 1 - ] - } - ], - "output_tensors": [ - { - "name": "image_embeds" - }, - { - "name": "last_hidden_state" - } - ] - }, - "optim": { - "clamp_constant_values": true, - "gelu_fusion": true, - "layer_norm_fusion": true, - "matmul_add_fusion": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint16", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "image-feature-extraction", - "model_name": "openai/clip-vit-base-patch16" - }, - "compile": null, - "loader": { - "task": "image-feature-extraction", - "model_class": "CLIPVisionModelWithProjection", - "model_type": "clip_vision_model" - }, - "eval": { - "task": "zero-shot-image-classification", - "dataset": { - "path": "uoft-cs/cifar100", - "split": "test", - "samples": 1000, - "columns_mapping": { - "input_column": "img", - "label_column": "fine_label" - } - } - } -} diff --git a/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_w8a16_config_text-encoder.json b/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_w8a16_config_text-encoder.json deleted file mode 100644 index 6b569997c..000000000 --- a/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_w8a16_config_text-encoder.json +++ /dev/null @@ -1,89 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 77 - ], - "value_range": [ - 0, - 49408 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 77 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "text_embeds" - }, - { - "name": "last_hidden_state" - } - ] - }, - "optim": { - "clamp_constant_values": true, - "gelu_fusion": true, - "layer_norm_fusion": true, - "matmul_add_fusion": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint16", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "feature-extraction", - "model_name": "openai/clip-vit-base-patch16" - }, - "compile": null, - "loader": { - "task": "feature-extraction", - "model_class": "CLIPTextModelWithProjection", - "model_type": "clip_text_model" - }, - "eval": { - "task": "zero-shot-image-classification", - "dataset": { - "path": "uoft-cs/cifar100", - "split": "test", - "samples": 1000, - "columns_mapping": { - "input_column": "img", - "label_column": "fine_label" - } - } - } -} diff --git a/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_w8a8_config_image-encoder.json b/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_w8a8_config_image-encoder.json deleted file mode 100644 index 0108d5344..000000000 --- a/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_w8a8_config_image-encoder.json +++ /dev/null @@ -1,79 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "pixel_values", - "dtype": "float32", - "shape": [ - 1, - 3, - 224, - 224 - ], - "value_range": [ - 0, - 1 - ] - } - ], - "output_tensors": [ - { - "name": "image_embeds" - }, - { - "name": "last_hidden_state" - } - ] - }, - "optim": { - "clamp_constant_values": true, - "gelu_fusion": true, - "layer_norm_fusion": true, - "matmul_add_fusion": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "image-feature-extraction", - "model_name": "openai/clip-vit-base-patch16" - }, - "compile": null, - "loader": { - "task": "image-feature-extraction", - "model_class": "CLIPVisionModelWithProjection", - "model_type": "clip_vision_model" - }, - "eval": { - "task": "zero-shot-image-classification", - "dataset": { - "path": "uoft-cs/cifar100", - "split": "test", - "samples": 1000, - "columns_mapping": { - "input_column": "img", - "label_column": "fine_label" - } - } - } -} diff --git a/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_w8a8_config_text-encoder.json b/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_w8a8_config_text-encoder.json deleted file mode 100644 index 6b379566e..000000000 --- a/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_w8a8_config_text-encoder.json +++ /dev/null @@ -1,89 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 77 - ], - "value_range": [ - 0, - 49408 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 77 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "text_embeds" - }, - { - "name": "last_hidden_state" - } - ] - }, - "optim": { - "clamp_constant_values": true, - "gelu_fusion": true, - "layer_norm_fusion": true, - "matmul_add_fusion": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "feature-extraction", - "model_name": "openai/clip-vit-base-patch16" - }, - "compile": null, - "loader": { - "task": "feature-extraction", - "model_class": "CLIPTextModelWithProjection", - "model_type": "clip_text_model" - }, - "eval": { - "task": "zero-shot-image-classification", - "dataset": { - "path": "uoft-cs/cifar100", - "split": "test", - "samples": 1000, - "columns_mapping": { - "input_column": "img", - "label_column": "fine_label" - } - } - } -} diff --git a/examples/recipes/openai_clip-vit-base-patch32/feature-extraction_w8a8_config.json b/examples/recipes/openai_clip-vit-base-patch32/feature-extraction_w8a8_config.json deleted file mode 100644 index b4d99249e..000000000 --- a/examples/recipes/openai_clip-vit-base-patch32/feature-extraction_w8a8_config.json +++ /dev/null @@ -1,89 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 77 - ], - "value_range": [ - 0, - 49408 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 77 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "text_embeds" - }, - { - "name": "last_hidden_state" - } - ] - }, - "optim": { - "clamp_constant_values": true, - "gelu_fusion": true, - "layer_norm_fusion": true, - "matmul_add_fusion": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "feature-extraction", - "model_name": "openai/clip-vit-base-patch32" - }, - "loader": { - "task": "feature-extraction", - "model_class": "CLIPTextModelWithProjection", - "model_type": "clip_text_model" - }, - "eval": { - "task": "feature-extraction", - "dataset": { - "path": "mteb/stsbenchmark-sts", - "split": "test", - "samples": 1000, - "columns_mapping": { - "input_column_1": "sentence1", - "input_column_2": "sentence2", - "score_column": "score" - } - } - } -} diff --git a/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_w8a8_config_image-encoder.json b/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_w8a8_config_image-encoder.json deleted file mode 100644 index bb7016c8c..000000000 --- a/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_w8a8_config_image-encoder.json +++ /dev/null @@ -1,79 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "pixel_values", - "dtype": "float32", - "shape": [ - 1, - 3, - 336, - 336 - ], - "value_range": [ - 0, - 1 - ] - } - ], - "output_tensors": [ - { - "name": "image_embeds" - }, - { - "name": "last_hidden_state" - } - ] - }, - "optim": { - "clamp_constant_values": true, - "gelu_fusion": true, - "layer_norm_fusion": true, - "matmul_add_fusion": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "image-feature-extraction", - "model_name": "openai/clip-vit-large-patch14-336" - }, - "compile": null, - "loader": { - "task": "image-feature-extraction", - "model_class": "CLIPVisionModelWithProjection", - "model_type": "clip_vision_model" - }, - "eval": { - "task": "zero-shot-image-classification", - "dataset": { - "path": "uoft-cs/cifar100", - "split": "test", - "samples": 1000, - "columns_mapping": { - "input_column": "img", - "label_column": "fine_label" - } - } - } -} diff --git a/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_w8a8_config_text-encoder.json b/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_w8a8_config_text-encoder.json deleted file mode 100644 index bd2d5928a..000000000 --- a/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_w8a8_config_text-encoder.json +++ /dev/null @@ -1,89 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 77 - ], - "value_range": [ - 0, - 49408 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 77 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "text_embeds" - }, - { - "name": "last_hidden_state" - } - ] - }, - "optim": { - "clamp_constant_values": true, - "gelu_fusion": true, - "layer_norm_fusion": true, - "matmul_add_fusion": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "feature-extraction", - "model_name": "openai/clip-vit-large-patch14-336" - }, - "compile": null, - "loader": { - "task": "feature-extraction", - "model_class": "CLIPTextModelWithProjection", - "model_type": "clip_text_model" - }, - "eval": { - "task": "zero-shot-image-classification", - "dataset": { - "path": "uoft-cs/cifar100", - "split": "test", - "samples": 1000, - "columns_mapping": { - "input_column": "img", - "label_column": "fine_label" - } - } - } -} diff --git a/examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_w8a8_config_image-encoder.json b/examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_w8a8_config_image-encoder.json deleted file mode 100644 index fbf2c7ef6..000000000 --- a/examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_w8a8_config_image-encoder.json +++ /dev/null @@ -1,79 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "pixel_values", - "dtype": "float32", - "shape": [ - 1, - 3, - 224, - 224 - ], - "value_range": [ - 0, - 1 - ] - } - ], - "output_tensors": [ - { - "name": "image_embeds" - }, - { - "name": "last_hidden_state" - } - ] - }, - "optim": { - "clamp_constant_values": true, - "gelu_fusion": true, - "layer_norm_fusion": true, - "matmul_add_fusion": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "image-feature-extraction", - "model_name": "openai/clip-vit-large-patch14" - }, - "compile": null, - "loader": { - "task": "image-feature-extraction", - "model_class": "CLIPVisionModelWithProjection", - "model_type": "clip_vision_model" - }, - "eval": { - "task": "zero-shot-image-classification", - "dataset": { - "path": "uoft-cs/cifar100", - "split": "test", - "samples": 1000, - "columns_mapping": { - "input_column": "img", - "label_column": "fine_label" - } - } - } -} diff --git a/examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_w8a8_config_text-encoder.json b/examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_w8a8_config_text-encoder.json deleted file mode 100644 index f9a9045b5..000000000 --- a/examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_w8a8_config_text-encoder.json +++ /dev/null @@ -1,89 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 77 - ], - "value_range": [ - 0, - 49408 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 77 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "text_embeds" - }, - { - "name": "last_hidden_state" - } - ] - }, - "optim": { - "clamp_constant_values": true, - "gelu_fusion": true, - "layer_norm_fusion": true, - "matmul_add_fusion": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "feature-extraction", - "model_name": "openai/clip-vit-large-patch14" - }, - "compile": null, - "loader": { - "task": "feature-extraction", - "model_class": "CLIPTextModelWithProjection", - "model_type": "clip_text_model" - }, - "eval": { - "task": "zero-shot-image-classification", - "dataset": { - "path": "uoft-cs/cifar100", - "split": "test", - "samples": 1000, - "columns_mapping": { - "input_column": "img", - "label_column": "fine_label" - } - } - } -} diff --git a/examples/recipes/rizvandwiki_gender-classification/image-classification_w8a8_config.json b/examples/recipes/rizvandwiki_gender-classification/image-classification_w8a8_config.json deleted file mode 100644 index 897fc4975..000000000 --- a/examples/recipes/rizvandwiki_gender-classification/image-classification_w8a8_config.json +++ /dev/null @@ -1,69 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "pixel_values", - "dtype": "float32", - "shape": [ - 1, - 3, - 224, - 224 - ], - "value_range": [ - 0, - 1 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": {}, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "image-classification", - "model_name": "rizvandwiki/gender-classification" - }, - "loader": { - "task": "image-classification", - "model_class": "AutoModelForImageClassification", - "model_type": "vit" - }, - "eval": { - "task": "image-classification", - "dataset": { - "samples": 1000, - "path": "~/.cache/winml/eval_datasets/build_fairface", - "columns_mapping": { - "label_column": "gender" - }, - "build_script": "scripts/e2e_eval/datasets/build_fairface.py" - } - } -} diff --git a/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/feature-extraction_w8a8_config.json b/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/feature-extraction_w8a8_config.json deleted file mode 100644 index 1fe2412af..000000000 --- a/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/feature-extraction_w8a8_config.json +++ /dev/null @@ -1,95 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 30522 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "last_hidden_state" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "feature-extraction", - "model_name": "sentence-transformers/all-MiniLM-L6-v2" - }, - "loader": { - "task": "feature-extraction", - "model_class": "AutoModel", - "model_type": "bert" - }, - "eval": { - "task": "feature-extraction", - "dataset": { - "path": "mteb/stsbenchmark-sts", - "split": "test", - "samples": 1000, - "columns_mapping": { - "input_column_1": "sentence1", - "input_column_2": "sentence2", - "score_column": "score" - } - } - } -} diff --git a/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/sentence-similarity_w8a8_config.json b/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/sentence-similarity_w8a8_config.json deleted file mode 100644 index 212204b0d..000000000 --- a/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/sentence-similarity_w8a8_config.json +++ /dev/null @@ -1,95 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 30522 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "last_hidden_state" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "sentence-similarity", - "model_name": "sentence-transformers/all-MiniLM-L6-v2" - }, - "loader": { - "task": "sentence-similarity", - "model_class": "AutoModel", - "model_type": "bert" - }, - "eval": { - "task": "sentence-similarity", - "dataset": { - "path": "mteb/stsbenchmark-sts", - "split": "test", - "samples": 1000, - "columns_mapping": { - "input_column_1": "sentence1", - "input_column_2": "sentence2", - "score_column": "score" - } - } - } -} diff --git a/examples/recipes/sentence-transformers_all-mpnet-base-v2/feature-extraction_w8a8_config.json b/examples/recipes/sentence-transformers_all-mpnet-base-v2/feature-extraction_w8a8_config.json deleted file mode 100644 index 9c62ef765..000000000 --- a/examples/recipes/sentence-transformers_all-mpnet-base-v2/feature-extraction_w8a8_config.json +++ /dev/null @@ -1,82 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 30527 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "last_hidden_state" - } - ] - }, - "optim": {}, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "feature-extraction", - "model_name": "sentence-transformers/all-mpnet-base-v2" - }, - "compile": null, - "loader": { - "task": "feature-extraction", - "model_class": "AutoModel", - "model_type": "mpnet" - }, - "eval": { - "task": "feature-extraction", - "dataset": { - "path": "mteb/stsbenchmark-sts", - "split": "test", - "samples": 100, - "columns_mapping": { - "input_column_1": "sentence1", - "input_column_2": "sentence2", - "score_column": "score" - } - } - } -} diff --git a/examples/recipes/sentence-transformers_all-mpnet-base-v2/sentence-similarity_w8a8_config.json b/examples/recipes/sentence-transformers_all-mpnet-base-v2/sentence-similarity_w8a8_config.json deleted file mode 100644 index f50018ec8..000000000 --- a/examples/recipes/sentence-transformers_all-mpnet-base-v2/sentence-similarity_w8a8_config.json +++ /dev/null @@ -1,82 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 30527 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "last_hidden_state" - } - ] - }, - "optim": {}, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "sentence-similarity", - "model_name": "sentence-transformers/all-mpnet-base-v2" - }, - "compile": null, - "loader": { - "task": "sentence-similarity", - "model_class": "AutoModel", - "model_type": "mpnet" - }, - "eval": { - "task": "sentence-similarity", - "dataset": { - "path": "mteb/stsbenchmark-sts", - "split": "test", - "samples": 1000, - "columns_mapping": { - "input_column_1": "sentence1", - "input_column_2": "sentence2", - "score_column": "score" - } - } - } -} diff --git a/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/feature-extraction_w8a8_config.json b/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/feature-extraction_w8a8_config.json deleted file mode 100644 index 8c46e4b7d..000000000 --- a/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/feature-extraction_w8a8_config.json +++ /dev/null @@ -1,82 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 30527 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "last_hidden_state" - } - ] - }, - "optim": {}, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "feature-extraction", - "model_name": "sentence-transformers/multi-qa-mpnet-base-dot-v1" - }, - "compile": null, - "loader": { - "task": "feature-extraction", - "model_class": "AutoModel", - "model_type": "mpnet" - }, - "eval": { - "task": "feature-extraction", - "dataset": { - "path": "mteb/stsbenchmark-sts", - "split": "test", - "samples": 100, - "columns_mapping": { - "input_column_1": "sentence1", - "input_column_2": "sentence2", - "score_column": "score" - } - } - } -} diff --git a/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/sentence-similarity_w8a8_config.json b/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/sentence-similarity_w8a8_config.json deleted file mode 100644 index f6dde153b..000000000 --- a/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/sentence-similarity_w8a8_config.json +++ /dev/null @@ -1,82 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 30527 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "last_hidden_state" - } - ] - }, - "optim": {}, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "sentence-similarity", - "model_name": "sentence-transformers/multi-qa-mpnet-base-dot-v1" - }, - "compile": null, - "loader": { - "task": "sentence-similarity", - "model_class": "AutoModel", - "model_type": "mpnet" - }, - "eval": { - "task": "sentence-similarity", - "dataset": { - "path": "mteb/stsbenchmark-sts", - "split": "test", - "samples": 1000, - "columns_mapping": { - "input_column_1": "sentence1", - "input_column_2": "sentence2", - "score_column": "score" - } - } - } -} diff --git a/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/feature-extraction_w8a16_config.json b/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/feature-extraction_w8a16_config.json deleted file mode 100644 index 16ab4d489..000000000 --- a/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/feature-extraction_w8a16_config.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 250037 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "last_hidden_state" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint16", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "feature-extraction", - "model_name": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" - }, - "compile": null, - "loader": { - "task": "feature-extraction", - "model_class": "AutoModel", - "model_type": "bert" - }, - "eval": { - "task": "feature-extraction", - "dataset": { - "path": "mteb/stsbenchmark-sts", - "split": "test", - "samples": 1000, - "columns_mapping": { - "input_column_1": "sentence1", - "input_column_2": "sentence2", - "score_column": "score" - } - } - } -} diff --git a/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/feature-extraction_w8a8_config.json b/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/feature-extraction_w8a8_config.json deleted file mode 100644 index c57c8ff8a..000000000 --- a/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/feature-extraction_w8a8_config.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 250037 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "last_hidden_state" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "feature-extraction", - "model_name": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" - }, - "compile": null, - "loader": { - "task": "feature-extraction", - "model_class": "AutoModel", - "model_type": "bert" - }, - "eval": { - "task": "feature-extraction", - "dataset": { - "path": "mteb/stsbenchmark-sts", - "split": "test", - "samples": 1000, - "columns_mapping": { - "input_column_1": "sentence1", - "input_column_2": "sentence2", - "score_column": "score" - } - } - } -} diff --git a/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/sentence-similarity_w8a16_config.json b/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/sentence-similarity_w8a16_config.json deleted file mode 100644 index cdb314043..000000000 --- a/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/sentence-similarity_w8a16_config.json +++ /dev/null @@ -1,95 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 250037 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "last_hidden_state" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint16", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "sentence-similarity", - "model_name": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" - }, - "loader": { - "task": "sentence-similarity", - "model_class": "AutoModel", - "model_type": "bert" - }, - "eval": { - "task": "sentence-similarity", - "dataset": { - "path": "mteb/stsbenchmark-sts", - "split": "test", - "samples": 1000, - "columns_mapping": { - "input_column_1": "sentence1", - "input_column_2": "sentence2", - "score_column": "score" - } - } - } -} diff --git a/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/sentence-similarity_w8a8_config.json b/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/sentence-similarity_w8a8_config.json deleted file mode 100644 index 9429b4790..000000000 --- a/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/sentence-similarity_w8a8_config.json +++ /dev/null @@ -1,95 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 250037 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "last_hidden_state" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "sentence-similarity", - "model_name": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" - }, - "loader": { - "task": "sentence-similarity", - "model_class": "AutoModel", - "model_type": "bert" - }, - "eval": { - "task": "sentence-similarity", - "dataset": { - "path": "mteb/stsbenchmark-sts", - "split": "test", - "samples": 1000, - "columns_mapping": { - "input_column_1": "sentence1", - "input_column_2": "sentence2", - "score_column": "score" - } - } - } -} diff --git a/examples/recipes/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/sentence-similarity_w8a8_config.json b/examples/recipes/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/sentence-similarity_w8a8_config.json deleted file mode 100644 index af6827752..000000000 --- a/examples/recipes/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/sentence-similarity_w8a8_config.json +++ /dev/null @@ -1,83 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 250002 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "last_hidden_state" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "sentence-similarity", - "model_name": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2" - }, - "loader": { - "task": "sentence-similarity", - "model_class": "AutoModel", - "model_type": "xlm-roberta" - }, - "eval": { - "task": "sentence-similarity", - "dataset": { - "path": "mteb/stsbenchmark-sts", - "split": "test", - "samples": 1000, - "columns_mapping": { - "input_column_1": "sentence1", - "input_column_2": "sentence2", - "score_column": "score" - } - } - } -} diff --git a/examples/recipes/w11wo_indonesian-roberta-base-posp-tagger/token-classification_w8a8_config.json b/examples/recipes/w11wo_indonesian-roberta-base-posp-tagger/token-classification_w8a8_config.json deleted file mode 100644 index 873c29dbf..000000000 --- a/examples/recipes/w11wo_indonesian-roberta-base-posp-tagger/token-classification_w8a8_config.json +++ /dev/null @@ -1,81 +0,0 @@ -{ - "export": { - "opset_version": 17, - "batch_size": 1, - "export_params": true, - "do_constant_folding": true, - "verbose": false, - "dynamo": false, - "enable_hierarchy_tags": true, - "clean_onnx": false, - "hierarchy_tag_format": "full", - "input_tensors": [ - { - "name": "input_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 50265 - ] - }, - { - "name": "attention_mask", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - } - ], - "output_tensors": [ - { - "name": "logits" - } - ] - }, - "optim": { - "clamp_constant_values": true - }, - "quant": { - "mode": "qdq", - "samples": 10, - "calibration_method": "minmax", - "weight_type": "uint8", - "activation_type": "uint8", - "per_channel": false, - "symmetric": false, - "save_calibration": false, - "distribution": "uniform", - "seed": null, - "calibration_load_path": null, - "calibration_save_path": null, - "op_types_to_quantize": null, - "nodes_to_exclude": null, - "task": "token-classification", - "model_name": "w11wo/indonesian-roberta-base-posp-tagger" - }, - "loader": { - "task": "token-classification", - "model_class": "AutoModelForTokenClassification", - "model_type": "roberta" - }, - "eval": { - "task": "token-classification", - "dataset": { - "samples": 1000, - "path": "~/.cache/winml/eval_datasets/build_indonlu_posp", - "columns_mapping": { - "label_column": "pos_tags" - }, - "build_script": "scripts/e2e_eval/datasets/build_indonlu_posp.py" - } - } -}