diff --git a/examples/recipes/BAAI_bge-base-en-v1.5/feature-extraction_fp16_config.json b/examples/recipes/BAAI_bge-base-en-v1.5/feature-extraction_fp16_config.json new file mode 100644 index 000000000..2da045256 --- /dev/null +++ b/examples/recipes/BAAI_bge-base-en-v1.5/feature-extraction_fp16_config.json @@ -0,0 +1,79 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "AutoModel", + "model_type": "bert" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/BAAI_bge-base-en-v1.5/sentence-similarity_fp16_config.json b/examples/recipes/BAAI_bge-base-en-v1.5/sentence-similarity_fp16_config.json new file mode 100644 index 000000000..92e4fa925 --- /dev/null +++ b/examples/recipes/BAAI_bge-base-en-v1.5/sentence-similarity_fp16_config.json @@ -0,0 +1,79 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "sentence-similarity", + "model_class": "AutoModel", + "model_type": "bert" + }, + "eval": { + "task": "sentence-similarity", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/BAAI_bge-large-en-v1.5/sentence-similarity_fp16_config.json b/examples/recipes/BAAI_bge-large-en-v1.5/sentence-similarity_fp16_config.json index eed96889f..30f1521ce 100644 --- a/examples/recipes/BAAI_bge-large-en-v1.5/sentence-similarity_fp16_config.json +++ b/examples/recipes/BAAI_bge-large-en-v1.5/sentence-similarity_fp16_config.json @@ -67,6 +67,7 @@ "dataset": { "path": "mteb/stsbenchmark-sts", "split": "test", + "samples": 100, "columns_mapping": { "input_column_1": "sentence1", "input_column_2": "sentence2", diff --git a/examples/recipes/BAAI_bge-large-en-v1.5/sentence-similarity_w8a16_config.json b/examples/recipes/BAAI_bge-large-en-v1.5/sentence-similarity_w8a16_config.json index 6f1450a96..96abec426 100644 --- a/examples/recipes/BAAI_bge-large-en-v1.5/sentence-similarity_w8a16_config.json +++ b/examples/recipes/BAAI_bge-large-en-v1.5/sentence-similarity_w8a16_config.json @@ -84,6 +84,7 @@ "dataset": { "path": "mteb/stsbenchmark-sts", "split": "test", + "samples": 1000, "columns_mapping": { "input_column_1": "sentence1", "input_column_2": "sentence2", diff --git a/examples/recipes/BAAI_bge-m3/feature-extraction_fp16_config.json b/examples/recipes/BAAI_bge-m3/feature-extraction_fp16_config.json new file mode 100644 index 000000000..d7c9b8ebb --- /dev/null +++ b/examples/recipes/BAAI_bge-m3/feature-extraction_fp16_config.json @@ -0,0 +1,67 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 1024 + ], + "value_range": [ + 0, + 250002 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 1024 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "AutoModel", + "model_type": "xlm-roberta" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/feature-extraction_w8a8_config.json b/examples/recipes/BAAI_bge-m3/feature-extraction_w8a16_config.json similarity index 81% rename from examples/recipes/sentence-transformers_all-MiniLM-L6-v2/feature-extraction_w8a8_config.json rename to examples/recipes/BAAI_bge-m3/feature-extraction_w8a16_config.json index c7de443dc..cc1ad56c9 100644 --- a/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/feature-extraction_w8a8_config.json +++ b/examples/recipes/BAAI_bge-m3/feature-extraction_w8a16_config.json @@ -15,11 +15,11 @@ "dtype": "int32", "shape": [ 1, - 512 + 1024 ], "value_range": [ 0, - 30522 + 250002 ] }, { @@ -27,19 +27,7 @@ "dtype": "int32", "shape": [ 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 + 1024 ], "value_range": [ 0, @@ -61,7 +49,7 @@ "samples": 10, "calibration_method": "minmax", "weight_type": "uint8", - "activation_type": "uint8", + "activation_type": "uint16", "per_channel": false, "symmetric": false, "save_calibration": false, @@ -72,18 +60,20 @@ "op_types_to_quantize": null, "nodes_to_exclude": null, "task": "feature-extraction", - "model_name": "sentence-transformers/all-MiniLM-L6-v2" + "model_name": "BAAI/bge-m3" }, + "compile": null, "loader": { "task": "feature-extraction", "model_class": "AutoModel", - "model_type": "bert" + "model_type": "xlm-roberta" }, "eval": { "task": "feature-extraction", "dataset": { "path": "mteb/stsbenchmark-sts", "split": "test", + "samples": 100, "columns_mapping": { "input_column_1": "sentence1", "input_column_2": "sentence2", diff --git a/examples/recipes/BAAI_bge-m3/sentence-similarity_fp16_config.json b/examples/recipes/BAAI_bge-m3/sentence-similarity_fp16_config.json new file mode 100644 index 000000000..83f4fd1f0 --- /dev/null +++ b/examples/recipes/BAAI_bge-m3/sentence-similarity_fp16_config.json @@ -0,0 +1,67 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 1024 + ], + "value_range": [ + 0, + 250002 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 1024 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "sentence-similarity", + "model_class": "AutoModel", + "model_type": "xlm-roberta" + }, + "eval": { + "task": "sentence-similarity", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/sentence-similarity_w8a8_config.json b/examples/recipes/BAAI_bge-m3/sentence-similarity_w8a16_config.json similarity index 81% rename from examples/recipes/sentence-transformers_all-MiniLM-L6-v2/sentence-similarity_w8a8_config.json rename to examples/recipes/BAAI_bge-m3/sentence-similarity_w8a16_config.json index 29534eb50..06a124129 100644 --- a/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/sentence-similarity_w8a8_config.json +++ b/examples/recipes/BAAI_bge-m3/sentence-similarity_w8a16_config.json @@ -15,11 +15,11 @@ "dtype": "int32", "shape": [ 1, - 512 + 1024 ], "value_range": [ 0, - 30522 + 250002 ] }, { @@ -27,19 +27,7 @@ "dtype": "int32", "shape": [ 1, - 512 - ], - "value_range": [ - 0, - 2 - ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 + 1024 ], "value_range": [ 0, @@ -61,7 +49,7 @@ "samples": 10, "calibration_method": "minmax", "weight_type": "uint8", - "activation_type": "uint8", + "activation_type": "uint16", "per_channel": false, "symmetric": false, "save_calibration": false, @@ -72,18 +60,20 @@ "op_types_to_quantize": null, "nodes_to_exclude": null, "task": "sentence-similarity", - "model_name": "sentence-transformers/all-MiniLM-L6-v2" + "model_name": "BAAI/bge-m3" }, + "compile": null, "loader": { "task": "sentence-similarity", "model_class": "AutoModel", - "model_type": "bert" + "model_type": "xlm-roberta" }, "eval": { "task": "sentence-similarity", "dataset": { "path": "mteb/stsbenchmark-sts", "split": "test", + "samples": 100, "columns_mapping": { "input_column_1": "sentence1", "input_column_2": "sentence2", diff --git a/examples/recipes/BAAI_bge-small-en-v1.5/feature-extraction_fp16_config.json b/examples/recipes/BAAI_bge-small-en-v1.5/feature-extraction_fp16_config.json new file mode 100644 index 000000000..2da045256 --- /dev/null +++ b/examples/recipes/BAAI_bge-small-en-v1.5/feature-extraction_fp16_config.json @@ -0,0 +1,79 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "AutoModel", + "model_type": "bert" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/BAAI_bge-small-en-v1.5/sentence-similarity_fp16_config.json b/examples/recipes/BAAI_bge-small-en-v1.5/sentence-similarity_fp16_config.json new file mode 100644 index 000000000..30f1521ce --- /dev/null +++ b/examples/recipes/BAAI_bge-small-en-v1.5/sentence-similarity_fp16_config.json @@ -0,0 +1,78 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "loader": { + "task": "sentence-similarity", + "model_class": "AutoModel", + "model_type": "bert" + }, + "eval": { + "task": "sentence-similarity", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/Babelscape_wikineural-multilingual-ner/token-classification_fp16_config.json b/examples/recipes/Babelscape_wikineural-multilingual-ner/token-classification_fp16_config.json new file mode 100644 index 000000000..3a9c3545d --- /dev/null +++ b/examples/recipes/Babelscape_wikineural-multilingual-ner/token-classification_fp16_config.json @@ -0,0 +1,76 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 119547 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "token-classification", + "model_class": "AutoModelForTokenClassification", + "model_type": "bert" + }, + "eval": { + "task": "token-classification", + "dataset": { + "samples": 100, + "path": "BramVanroy/conll2003", + "columns_mapping": { + "label_column": "ner_tags" + } + } + } +} diff --git a/examples/recipes/FacebookAI_roberta-base/fill-mask_fp16_config.json b/examples/recipes/FacebookAI_roberta-base/fill-mask_fp16_config.json new file mode 100644 index 000000000..77bafe354 --- /dev/null +++ b/examples/recipes/FacebookAI_roberta-base/fill-mask_fp16_config.json @@ -0,0 +1,65 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 50265 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "loader": { + "task": "fill-mask", + "model_class": "AutoModelForMaskedLM", + "model_type": "roberta" + }, + "eval": { + "task": "fill-mask", + "dataset": { + "path": "Salesforce/wikitext", + "name": "wikitext-2-raw-v1", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "text" + } + } + } +} diff --git a/examples/recipes/FacebookAI_roberta-base/fill-mask_w8a16_config.json b/examples/recipes/FacebookAI_roberta-base/fill-mask_w8a16_config.json new file mode 100644 index 000000000..6ed6b6001 --- /dev/null +++ b/examples/recipes/FacebookAI_roberta-base/fill-mask_w8a16_config.json @@ -0,0 +1,82 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 50265 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "fill-mask", + "model_name": "FacebookAI/roberta-base" + }, + "loader": { + "task": "fill-mask", + "model_class": "AutoModelForMaskedLM", + "model_type": "roberta" + }, + "eval": { + "task": "fill-mask", + "dataset": { + "path": "Salesforce/wikitext", + "name": "wikitext-2-raw-v1", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "text" + } + } + } +} diff --git a/examples/recipes/FacebookAI_roberta-large/fill-mask_fp16_config.json b/examples/recipes/FacebookAI_roberta-large/fill-mask_fp16_config.json new file mode 100644 index 000000000..38ad865d7 --- /dev/null +++ b/examples/recipes/FacebookAI_roberta-large/fill-mask_fp16_config.json @@ -0,0 +1,66 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 50265 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "fill-mask", + "model_class": "AutoModelForMaskedLM", + "model_type": "roberta" + }, + "eval": { + "task": "fill-mask", + "dataset": { + "path": "Salesforce/wikitext", + "name": "wikitext-2-raw-v1", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "text" + } + } + } +} diff --git a/examples/recipes/FacebookAI_roberta-large/fill-mask_w8a16_config.json b/examples/recipes/FacebookAI_roberta-large/fill-mask_w8a16_config.json new file mode 100644 index 000000000..27ba8f9fb --- /dev/null +++ b/examples/recipes/FacebookAI_roberta-large/fill-mask_w8a16_config.json @@ -0,0 +1,83 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 50265 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "fill-mask", + "model_name": "FacebookAI/roberta-large" + }, + "compile": null, + "loader": { + "task": "fill-mask", + "model_class": "AutoModelForMaskedLM", + "model_type": "roberta" + }, + "eval": { + "task": "fill-mask", + "dataset": { + "path": "Salesforce/wikitext", + "name": "wikitext-2-raw-v1", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "text" + } + } + } +} diff --git a/examples/recipes/FacebookAI_xlm-roberta-base/fill-mask_fp16_config.json b/examples/recipes/FacebookAI_xlm-roberta-base/fill-mask_fp16_config.json new file mode 100644 index 000000000..effb5ad4d --- /dev/null +++ b/examples/recipes/FacebookAI_xlm-roberta-base/fill-mask_fp16_config.json @@ -0,0 +1,65 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 250002 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "loader": { + "task": "fill-mask", + "model_class": "AutoModelForMaskedLM", + "model_type": "xlm-roberta" + }, + "eval": { + "task": "fill-mask", + "dataset": { + "path": "Salesforce/wikitext", + "name": "wikitext-2-raw-v1", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "text" + } + } + } +} diff --git a/examples/recipes/FacebookAI_xlm-roberta-base/fill-mask_w8a16_config.json b/examples/recipes/FacebookAI_xlm-roberta-base/fill-mask_w8a16_config.json new file mode 100644 index 000000000..6a1bdfddd --- /dev/null +++ b/examples/recipes/FacebookAI_xlm-roberta-base/fill-mask_w8a16_config.json @@ -0,0 +1,82 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 250002 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "fill-mask", + "model_name": "FacebookAI/xlm-roberta-base" + }, + "loader": { + "task": "fill-mask", + "model_class": "AutoModelForMaskedLM", + "model_type": "xlm-roberta" + }, + "eval": { + "task": "fill-mask", + "dataset": { + "path": "Salesforce/wikitext", + "name": "wikitext-2-raw-v1", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "text" + } + } + } +} diff --git a/examples/recipes/Intel_bert-base-uncased-mrpc/feature-extraction_fp16_config.json b/examples/recipes/Intel_bert-base-uncased-mrpc/feature-extraction_fp16_config.json new file mode 100644 index 000000000..2da045256 --- /dev/null +++ b/examples/recipes/Intel_bert-base-uncased-mrpc/feature-extraction_fp16_config.json @@ -0,0 +1,79 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "AutoModel", + "model_type": "bert" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/Intel_bert-base-uncased-mrpc/text-classification_fp16_config.json b/examples/recipes/Intel_bert-base-uncased-mrpc/text-classification_fp16_config.json new file mode 100644 index 000000000..0a6226f14 --- /dev/null +++ b/examples/recipes/Intel_bert-base-uncased-mrpc/text-classification_fp16_config.json @@ -0,0 +1,78 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "text-classification", + "model_class": "AutoModelForSequenceClassification", + "model_type": "bert" + }, + "eval": { + "task": "text-classification", + "dataset": { + "path": "nyu-mll/glue", + "name": "mrpc", + "samples": 100, + "columns_mapping": { + "input_column": "sentence1", + "second_input_column": "sentence2" + } + } + } +} diff --git a/examples/recipes/Isotonic_distilbert_finetuned_ai4privacy_v2/token-classification_fp16_config.json b/examples/recipes/Isotonic_distilbert_finetuned_ai4privacy_v2/token-classification_fp16_config.json new file mode 100644 index 000000000..2c8e6ccc1 --- /dev/null +++ b/examples/recipes/Isotonic_distilbert_finetuned_ai4privacy_v2/token-classification_fp16_config.json @@ -0,0 +1,62 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "token-classification", + "model_class": "AutoModelForTokenClassification", + "model_type": "distilbert" + }, + "eval": { + "task": "token-classification", + "dataset": { + "samples": 100, + "path": "~/.cache/winml/eval_datasets/build_ai4privacy", + "columns_mapping": { + "label_column": "ner_tags" + }, + "build_script": "scripts/e2e_eval/datasets/build_ai4privacy.py" + } + } +} diff --git a/examples/recipes/Isotonic_distilbert_finetuned_ai4privacy_v2/token-classification_w8a16_config.json b/examples/recipes/Isotonic_distilbert_finetuned_ai4privacy_v2/token-classification_w8a16_config.json new file mode 100644 index 000000000..24755346a --- /dev/null +++ b/examples/recipes/Isotonic_distilbert_finetuned_ai4privacy_v2/token-classification_w8a16_config.json @@ -0,0 +1,80 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "token-classification", + "model_name": "Isotonic/distilbert_finetuned_ai4privacy_v2" + }, + "compile": null, + "loader": { + "task": "token-classification", + "model_class": "AutoModelForTokenClassification", + "model_type": "distilbert" + }, + "eval": { + "task": "token-classification", + "dataset": { + "samples": 1000, + "path": "~/.cache/winml/eval_datasets/build_ai4privacy", + "columns_mapping": { + "label_column": "ner_tags" + }, + "build_script": "scripts/e2e_eval/datasets/build_ai4privacy.py" + } + } +} diff --git a/examples/recipes/ProsusAI_finbert/text-classification_fp16_config.json b/examples/recipes/ProsusAI_finbert/text-classification_fp16_config.json new file mode 100644 index 000000000..ef2099d4d --- /dev/null +++ b/examples/recipes/ProsusAI_finbert/text-classification_fp16_config.json @@ -0,0 +1,74 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "text-classification", + "model_class": "AutoModelForSequenceClassification", + "model_type": "bert" + }, + "eval": { + "task": "text-classification", + "dataset": { + "path": "privet1mir/finbert_dataset", + "split": "val", + "samples": 100 + } + } +} diff --git a/examples/recipes/README.md b/examples/recipes/README.md index caaa2c15f..1077e4f74 100644 --- a/examples/recipes/README.md +++ b/examples/recipes/README.md @@ -14,17 +14,82 @@ Each *(model, task)* includes: ## Models +Total: **75** (model, task) tuples that pass fp16 eval on all 10 (EP, device) buckets. + | Model | Task | |---|---| +| BAAI/bge-base-en-v1.5 | feature-extraction | +| BAAI/bge-base-en-v1.5 | sentence-similarity | | BAAI/bge-large-en-v1.5 | sentence-similarity | +| BAAI/bge-m3 | feature-extraction | +| BAAI/bge-m3 | sentence-similarity | +| BAAI/bge-small-en-v1.5 | feature-extraction | +| BAAI/bge-small-en-v1.5 | sentence-similarity | +| Babelscape/wikineural-multilingual-ner | token-classification | +| FacebookAI/roberta-base | fill-mask | +| FacebookAI/roberta-large | fill-mask | +| FacebookAI/xlm-roberta-base | fill-mask | +| Intel/bert-base-uncased-mrpc | feature-extraction | +| Intel/bert-base-uncased-mrpc | text-classification | +| Isotonic/distilbert_finetuned_ai4privacy_v2 | token-classification | +| ProsusAI/finbert | text-classification | +| Salesforce/blip-image-captioning-base | image-to-text | +| StanfordAIMI/dinov2-base-xray-224 | image-feature-extraction | +| ahotrod/electra_large_discriminator_squad2_512 | question-answering | +| apple/mobilevit-small | image-classification | | cardiffnlp/twitter-roberta-base-sentiment-latest | text-classification | +| dbmdz/bert-large-cased-finetuned-conll03-english | token-classification | +| deepset/bert-large-uncased-whole-word-masking-squad2 | question-answering | | deepset/roberta-base-squad2 | question-answering | | deepset/tinyroberta-squad2 | question-answering | +| dima806/fairface_age_image_detection | image-classification | +| distilbert/distilbert-base-cased-distilled-squad | question-answering | +| distilbert/distilbert-base-uncased | fill-mask | +| distilbert/distilbert-base-uncased-distilled-squad | question-answering | +| distilbert/distilbert-base-uncased-finetuned-sst-2-english | text-classification | +| dslim/bert-base-NER | token-classification | +| facebook/convnext-tiny-224 | image-classification | +| facebook/dino-vitb16 | image-feature-extraction | +| facebook/dino-vits16 | image-feature-extraction | | facebook/dinov2-base | image-feature-extraction | +| facebook/dinov2-large | image-feature-extraction | | facebook/dinov2-small | image-feature-extraction | +| google-bert/bert-base-multilingual-cased | feature-extraction | +| google-bert/bert-base-multilingual-cased | fill-mask | +| google-bert/bert-base-multilingual-uncased | fill-mask | +| google-bert/bert-base-uncased | fill-mask | +| google-bert/bert-large-uncased-whole-word-masking-finetuned-squad | question-answering | +| google/vit-base-patch16-224 | image-classification | | google/vit-base-patch16-224-in21k | image-feature-extraction | +| joeddav/xlm-roberta-large-xnli | zero-shot-classification | | laion/CLIP-ViT-B-32-laion2B-s34B-b79K | feature-extraction | +| mattmdjaga/segformer_b2_clothes | image-segmentation | | microsoft/rad-dino | image-feature-extraction | +| microsoft/resnet-18 | image-classification | +| microsoft/resnet-50 | image-classification | +| microsoft/swin-large-patch4-window7-224 | image-classification | +| microsoft/swinv2-tiny-patch4-window16-256 | image-classification | +| microsoft/trocr-base-handwritten | image-to-text | +| microsoft/trocr-base-printed | image-to-text | +| microsoft/trocr-large-handwritten | image-to-text | +| microsoft/trocr-large-printed | image-to-text | +| monologg/koelectra-small-v2-distilled-korquad-384 | question-answering | +| nvidia/segformer-b1-finetuned-ade-512-512 | image-segmentation | +| nvidia/segformer-b2-finetuned-ade-512-512 | image-segmentation | +| nvidia/segformer-b5-finetuned-ade-640-640 | image-segmentation | | openai/clip-vit-base-patch16 | feature-extraction | +| openai/clip-vit-base-patch16 | zero-shot-image-classification | +| openai/clip-vit-base-patch32 | feature-extraction | +| openai/clip-vit-large-patch14 | zero-shot-image-classification | +| openai/clip-vit-large-patch14-336 | zero-shot-image-classification | +| rizvandwiki/gender-classification | image-classification | | sentence-transformers/all-MiniLM-L6-v2 | feature-extraction | | sentence-transformers/all-MiniLM-L6-v2 | sentence-similarity | +| sentence-transformers/all-mpnet-base-v2 | feature-extraction | +| sentence-transformers/all-mpnet-base-v2 | sentence-similarity | +| sentence-transformers/multi-qa-mpnet-base-dot-v1 | feature-extraction | +| sentence-transformers/multi-qa-mpnet-base-dot-v1 | sentence-similarity | +| sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 | feature-extraction | +| sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 | sentence-similarity | +| sentence-transformers/paraphrase-multilingual-mpnet-base-v2 | sentence-similarity | +| w11wo/indonesian-roberta-base-posp-tagger | token-classification | diff --git a/examples/recipes/Salesforce_blip-image-captioning-base/image-to-text_fp16_config_decoder.json b/examples/recipes/Salesforce_blip-image-captioning-base/image-to-text_fp16_config_decoder.json new file mode 100644 index 000000000..3f9be38a6 --- /dev/null +++ b/examples/recipes/Salesforce_blip-image-captioning-base/image-to-text_fp16_config_decoder.json @@ -0,0 +1,492 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "decoder_input_ids", + "dtype": "int32", + "shape": [ + 1, + 1 + ], + "value_range": [ + 0, + 30524 + ] + }, + { + "name": "encoder_hidden_states", + "dtype": "float32", + "shape": [ + 1, + 577, + 768 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "decoder_attention_mask", + "dtype": "int64", + "shape": [ + 1, + 512 + ] + }, + { + "name": "cache_position", + "dtype": "int64", + "shape": [ + 1 + ] + }, + { + "name": "past_0_key", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_0_value", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_1_key", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_1_value", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_2_key", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_2_value", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_3_key", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_3_value", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_4_key", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_4_value", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_5_key", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_5_value", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_6_key", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_6_value", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_7_key", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_7_value", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_8_key", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_8_value", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_9_key", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_9_value", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_10_key", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_10_value", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_11_key", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_11_value", + "dtype": "float32", + "shape": [ + 1, + 12, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + }, + { + "name": "present_0_key" + }, + { + "name": "present_0_value" + }, + { + "name": "present_1_key" + }, + { + "name": "present_1_value" + }, + { + "name": "present_2_key" + }, + { + "name": "present_2_value" + }, + { + "name": "present_3_key" + }, + { + "name": "present_3_value" + }, + { + "name": "present_4_key" + }, + { + "name": "present_4_value" + }, + { + "name": "present_5_key" + }, + { + "name": "present_5_value" + }, + { + "name": "present_6_key" + }, + { + "name": "present_6_value" + }, + { + "name": "present_7_key" + }, + { + "name": "present_7_value" + }, + { + "name": "present_8_key" + }, + { + "name": "present_8_value" + }, + { + "name": "present_9_key" + }, + { + "name": "present_9_value" + }, + { + "name": "present_10_key" + }, + { + "name": "present_10_value" + }, + { + "name": "present_11_key" + }, + { + "name": "present_11_value" + } + ] + }, + "optim": { + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "text2text-generation", + "model_class": "BlipDecoderWrapper", + "model_type": "blip" + }, + "eval": { + "task": "image-to-text", + "dataset": { + "path": "lmms-lab/flickr30k", + "split": "test", + "samples": 500, + "columns_mapping": { + "input_column": "image", + "label_column": "caption" + } + } + } +} diff --git a/examples/recipes/Salesforce_blip-image-captioning-base/image-to-text_fp16_config_encoder.json b/examples/recipes/Salesforce_blip-image-captioning-base/image-to-text_fp16_config_encoder.json new file mode 100644 index 000000000..12f595574 --- /dev/null +++ b/examples/recipes/Salesforce_blip-image-captioning-base/image-to-text_fp16_config_encoder.json @@ -0,0 +1,58 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 384, + 384 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "encoder_hidden_states" + } + ] + }, + "optim": { + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "image-feature-extraction", + "model_class": "BlipVisionEncoderWrapper", + "model_type": "blip" + }, + "eval": { + "task": "image-to-text", + "dataset": { + "path": "lmms-lab/flickr30k", + "split": "test", + "samples": 500, + "columns_mapping": { + "input_column": "image", + "label_column": "caption" + } + } + } +} diff --git a/examples/recipes/StanfordAIMI_dinov2-base-xray-224/image-feature-extraction_fp16_config.json b/examples/recipes/StanfordAIMI_dinov2-base-xray-224/image-feature-extraction_fp16_config.json new file mode 100644 index 000000000..e503c77d9 --- /dev/null +++ b/examples/recipes/StanfordAIMI_dinov2-base-xray-224/image-feature-extraction_fp16_config.json @@ -0,0 +1,50 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": null, + "compile": null, + "loader": { + "task": "image-feature-extraction", + "model_class": "AutoModel", + "model_type": "dinov2" + }, + "eval": { + "task": "image-feature-extraction", + "dataset": { + "path": "Ewakaa/pneumonia_classification_chest_xray", + "split": "test", + "samples": 582 + } + } +} diff --git a/examples/recipes/microsoft_rad-dino/image-feature-extraction_w8a8_config.json b/examples/recipes/StanfordAIMI_dinov2-base-xray-224/image-feature-extraction_w8a16_config.json similarity index 90% rename from examples/recipes/microsoft_rad-dino/image-feature-extraction_w8a8_config.json rename to examples/recipes/StanfordAIMI_dinov2-base-xray-224/image-feature-extraction_w8a16_config.json index 7f02b647f..77a6331c7 100644 --- a/examples/recipes/microsoft_rad-dino/image-feature-extraction_w8a8_config.json +++ b/examples/recipes/StanfordAIMI_dinov2-base-xray-224/image-feature-extraction_w8a16_config.json @@ -16,8 +16,8 @@ "shape": [ 1, 3, - 518, - 518 + 224, + 224 ], "value_range": [ 0, @@ -37,7 +37,7 @@ "samples": 10, "calibration_method": "minmax", "weight_type": "uint8", - "activation_type": "uint8", + "activation_type": "uint16", "per_channel": false, "symmetric": false, "save_calibration": false, @@ -48,8 +48,9 @@ "op_types_to_quantize": null, "nodes_to_exclude": null, "task": "image-feature-extraction", - "model_name": "microsoft/rad-dino" + "model_name": "StanfordAIMI/dinov2-base-xray-224" }, + "compile": null, "loader": { "task": "image-feature-extraction", "model_class": "AutoModel", diff --git a/examples/recipes/ahotrod_electra_large_discriminator_squad2_512/question-answering_fp16_config.json b/examples/recipes/ahotrod_electra_large_discriminator_squad2_512/question-answering_fp16_config.json new file mode 100644 index 000000000..af32cfc6e --- /dev/null +++ b/examples/recipes/ahotrod_electra_large_discriminator_squad2_512/question-answering_fp16_config.json @@ -0,0 +1,80 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "start_logits" + }, + { + "name": "end_logits" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "question-answering", + "model_class": "AutoModelForQuestionAnswering", + "model_type": "electra" + }, + "eval": { + "task": "question-answering", + "dataset": { + "path": "rajpurkar/squad_v2", + "split": "validation", + "samples": 100, + "columns_mapping": { + "question_column": "question", + "context_column": "context", + "id_column": "id", + "label_column": "answers" + } + } + } +} diff --git a/examples/recipes/ahotrod_electra_large_discriminator_squad2_512/question-answering_w8a16_config.json b/examples/recipes/ahotrod_electra_large_discriminator_squad2_512/question-answering_w8a16_config.json new file mode 100644 index 000000000..41a80cc35 --- /dev/null +++ b/examples/recipes/ahotrod_electra_large_discriminator_squad2_512/question-answering_w8a16_config.json @@ -0,0 +1,98 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "start_logits" + }, + { + "name": "end_logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "question-answering", + "model_name": "ahotrod/electra_large_discriminator_squad2_512" + }, + "compile": null, + "loader": { + "task": "question-answering", + "model_class": "AutoModelForQuestionAnswering", + "model_type": "electra" + }, + "eval": { + "task": "question-answering", + "dataset": { + "path": "rajpurkar/squad_v2", + "split": "validation", + "samples": 1000, + "columns_mapping": { + "question_column": "question", + "context_column": "context", + "id_column": "id", + "label_column": "answers" + } + } + } +} diff --git a/examples/recipes/apple_mobilevit-small/image-classification_fp16_config.json b/examples/recipes/apple_mobilevit-small/image-classification_fp16_config.json new file mode 100644 index 000000000..ff6060d8e --- /dev/null +++ b/examples/recipes/apple_mobilevit-small/image-classification_fp16_config.json @@ -0,0 +1,49 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 256, + 256 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "mobilevit" + }, + "eval": { + "task": "image-classification", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 100 + } + } +} diff --git a/examples/recipes/apple_mobilevit-small/image-classification_w8a16_config.json b/examples/recipes/apple_mobilevit-small/image-classification_w8a16_config.json new file mode 100644 index 000000000..034af6e9a --- /dev/null +++ b/examples/recipes/apple_mobilevit-small/image-classification_w8a16_config.json @@ -0,0 +1,67 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 256, + 256 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-classification", + "model_name": "apple/mobilevit-small" + }, + "compile": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "mobilevit" + }, + "eval": { + "task": "image-classification", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 1000 + } + } +} diff --git a/examples/recipes/cardiffnlp_twitter-roberta-base-sentiment-latest/text-classification_fp16_config.json b/examples/recipes/cardiffnlp_twitter-roberta-base-sentiment-latest/text-classification_fp16_config.json index 186a6cbb8..8de0a638d 100644 --- a/examples/recipes/cardiffnlp_twitter-roberta-base-sentiment-latest/text-classification_fp16_config.json +++ b/examples/recipes/cardiffnlp_twitter-roberta-base-sentiment-latest/text-classification_fp16_config.json @@ -55,6 +55,7 @@ "dataset": { "path": "tweet_eval", "name": "sentiment", + "samples": 100, "columns_mapping": { "input_column": "text" } diff --git a/examples/recipes/cardiffnlp_twitter-roberta-base-sentiment-latest/text-classification_w8a16_config.json b/examples/recipes/cardiffnlp_twitter-roberta-base-sentiment-latest/text-classification_w8a16_config.json index b96d6b2eb..f6b9ea686 100644 --- a/examples/recipes/cardiffnlp_twitter-roberta-base-sentiment-latest/text-classification_w8a16_config.json +++ b/examples/recipes/cardiffnlp_twitter-roberta-base-sentiment-latest/text-classification_w8a16_config.json @@ -72,6 +72,7 @@ "dataset": { "path": "tweet_eval", "name": "sentiment", + "samples": 1000, "columns_mapping": { "input_column": "text" } diff --git a/examples/recipes/dbmdz_bert-large-cased-finetuned-conll03-english/token-classification_fp16_config.json b/examples/recipes/dbmdz_bert-large-cased-finetuned-conll03-english/token-classification_fp16_config.json new file mode 100644 index 000000000..6e57f58db --- /dev/null +++ b/examples/recipes/dbmdz_bert-large-cased-finetuned-conll03-english/token-classification_fp16_config.json @@ -0,0 +1,76 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 28996 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "token-classification", + "model_class": "AutoModelForTokenClassification", + "model_type": "bert" + }, + "eval": { + "task": "token-classification", + "dataset": { + "samples": 100, + "path": "BramVanroy/conll2003", + "columns_mapping": { + "label_column": "ner_tags" + } + } + } +} diff --git a/examples/recipes/dbmdz_bert-large-cased-finetuned-conll03-english/token-classification_w8a16_config.json b/examples/recipes/dbmdz_bert-large-cased-finetuned-conll03-english/token-classification_w8a16_config.json new file mode 100644 index 000000000..195a9ddb6 --- /dev/null +++ b/examples/recipes/dbmdz_bert-large-cased-finetuned-conll03-english/token-classification_w8a16_config.json @@ -0,0 +1,93 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 28996 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "token-classification", + "model_name": "dbmdz/bert-large-cased-finetuned-conll03-english" + }, + "compile": null, + "loader": { + "task": "token-classification", + "model_class": "AutoModelForTokenClassification", + "model_type": "bert" + }, + "eval": { + "task": "token-classification", + "dataset": { + "samples": 1000, + "path": "BramVanroy/conll2003", + "columns_mapping": { + "label_column": "ner_tags" + } + } + } +} diff --git a/examples/recipes/deepset_bert-large-uncased-whole-word-masking-squad2/question-answering_fp16_config.json b/examples/recipes/deepset_bert-large-uncased-whole-word-masking-squad2/question-answering_fp16_config.json new file mode 100644 index 000000000..ce04fa559 --- /dev/null +++ b/examples/recipes/deepset_bert-large-uncased-whole-word-masking-squad2/question-answering_fp16_config.json @@ -0,0 +1,82 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "start_logits" + }, + { + "name": "end_logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "loader": { + "task": "question-answering", + "model_class": "AutoModelForQuestionAnswering", + "model_type": "bert" + }, + "eval": { + "task": "question-answering", + "dataset": { + "path": "rajpurkar/squad_v2", + "split": "validation", + "samples": 100, + "columns_mapping": { + "question_column": "question", + "context_column": "context", + "id_column": "id", + "label_column": "answers" + } + } + } +} diff --git a/examples/recipes/deepset_bert-large-uncased-whole-word-masking-squad2/question-answering_w8a16_config.json b/examples/recipes/deepset_bert-large-uncased-whole-word-masking-squad2/question-answering_w8a16_config.json new file mode 100644 index 000000000..ff515ccdc --- /dev/null +++ b/examples/recipes/deepset_bert-large-uncased-whole-word-masking-squad2/question-answering_w8a16_config.json @@ -0,0 +1,99 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "start_logits" + }, + { + "name": "end_logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "question-answering", + "model_name": "deepset/bert-large-uncased-whole-word-masking-squad2" + }, + "loader": { + "task": "question-answering", + "model_class": "AutoModelForQuestionAnswering", + "model_type": "bert" + }, + "eval": { + "task": "question-answering", + "dataset": { + "path": "rajpurkar/squad_v2", + "split": "validation", + "samples": 1000, + "columns_mapping": { + "question_column": "question", + "context_column": "context", + "id_column": "id", + "label_column": "answers" + } + } + } +} diff --git a/examples/recipes/deepset_roberta-base-squad2/question-answering_fp16_config.json b/examples/recipes/deepset_roberta-base-squad2/question-answering_fp16_config.json index e97d94cb3..1c8a70440 100644 --- a/examples/recipes/deepset_roberta-base-squad2/question-answering_fp16_config.json +++ b/examples/recipes/deepset_roberta-base-squad2/question-answering_fp16_config.json @@ -58,6 +58,7 @@ "dataset": { "path": "rajpurkar/squad_v2", "split": "validation", + "samples": 100, "columns_mapping": { "question_column": "question", "context_column": "context", diff --git a/examples/recipes/deepset_roberta-base-squad2/question-answering_w8a16_config.json b/examples/recipes/deepset_roberta-base-squad2/question-answering_w8a16_config.json index 5fdbafca2..53deef516 100644 --- a/examples/recipes/deepset_roberta-base-squad2/question-answering_w8a16_config.json +++ b/examples/recipes/deepset_roberta-base-squad2/question-answering_w8a16_config.json @@ -75,6 +75,7 @@ "dataset": { "path": "rajpurkar/squad_v2", "split": "validation", + "samples": 1000, "columns_mapping": { "question_column": "question", "context_column": "context", diff --git a/examples/recipes/deepset_tinyroberta-squad2/question-answering_fp16_config.json b/examples/recipes/deepset_tinyroberta-squad2/question-answering_fp16_config.json index e97d94cb3..1c8a70440 100644 --- a/examples/recipes/deepset_tinyroberta-squad2/question-answering_fp16_config.json +++ b/examples/recipes/deepset_tinyroberta-squad2/question-answering_fp16_config.json @@ -58,6 +58,7 @@ "dataset": { "path": "rajpurkar/squad_v2", "split": "validation", + "samples": 100, "columns_mapping": { "question_column": "question", "context_column": "context", diff --git a/examples/recipes/deepset_tinyroberta-squad2/question-answering_w8a16_config.json b/examples/recipes/deepset_tinyroberta-squad2/question-answering_w8a16_config.json index 7a006bd7f..38969b7ab 100644 --- a/examples/recipes/deepset_tinyroberta-squad2/question-answering_w8a16_config.json +++ b/examples/recipes/deepset_tinyroberta-squad2/question-answering_w8a16_config.json @@ -75,6 +75,7 @@ "dataset": { "path": "rajpurkar/squad_v2", "split": "validation", + "samples": 1000, "columns_mapping": { "question_column": "question", "context_column": "context", diff --git a/examples/recipes/dima806_fairface_age_image_detection/image-classification_fp16_config.json b/examples/recipes/dima806_fairface_age_image_detection/image-classification_fp16_config.json new file mode 100644 index 000000000..08b771455 --- /dev/null +++ b/examples/recipes/dima806_fairface_age_image_detection/image-classification_fp16_config.json @@ -0,0 +1,52 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "vit" + }, + "eval": { + "task": "image-classification", + "dataset": { + "samples": 100, + "path": "~/.cache/winml/eval_datasets/build_fairface", + "columns_mapping": { + "label_column": "age" + }, + "build_script": "scripts/e2e_eval/datasets/build_fairface.py" + } + } +} diff --git a/examples/recipes/dima806_fairface_age_image_detection/image-classification_w8a16_config.json b/examples/recipes/dima806_fairface_age_image_detection/image-classification_w8a16_config.json new file mode 100644 index 000000000..379bc4caf --- /dev/null +++ b/examples/recipes/dima806_fairface_age_image_detection/image-classification_w8a16_config.json @@ -0,0 +1,70 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-classification", + "model_name": "dima806/fairface_age_image_detection" + }, + "compile": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "vit" + }, + "eval": { + "task": "image-classification", + "dataset": { + "samples": 1000, + "path": "~/.cache/winml/eval_datasets/build_fairface", + "columns_mapping": { + "label_column": "age" + }, + "build_script": "scripts/e2e_eval/datasets/build_fairface.py" + } + } +} diff --git a/examples/recipes/distilbert_distilbert-base-cased-distilled-squad/question-answering_fp16_config.json b/examples/recipes/distilbert_distilbert-base-cased-distilled-squad/question-answering_fp16_config.json new file mode 100644 index 000000000..605ec3526 --- /dev/null +++ b/examples/recipes/distilbert_distilbert-base-cased-distilled-squad/question-answering_fp16_config.json @@ -0,0 +1,68 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 28996 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "start_logits" + }, + { + "name": "end_logits" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "question-answering", + "model_class": "AutoModelForQuestionAnswering", + "model_type": "distilbert" + }, + "eval": { + "task": "question-answering", + "dataset": { + "path": "rajpurkar/squad", + "split": "validation", + "samples": 100, + "columns_mapping": { + "question_column": "question", + "context_column": "context", + "id_column": "id", + "label_column": "answers" + } + } + } +} diff --git a/examples/recipes/deepset_roberta-base-squad2/question-answering_w8a8_config.json b/examples/recipes/distilbert_distilbert-base-cased-distilled-squad/question-answering_w8a16_config.json similarity index 86% rename from examples/recipes/deepset_roberta-base-squad2/question-answering_w8a8_config.json rename to examples/recipes/distilbert_distilbert-base-cased-distilled-squad/question-answering_w8a16_config.json index 60f6039f1..784ccc775 100644 --- a/examples/recipes/deepset_roberta-base-squad2/question-answering_w8a8_config.json +++ b/examples/recipes/distilbert_distilbert-base-cased-distilled-squad/question-answering_w8a16_config.json @@ -19,7 +19,7 @@ ], "value_range": [ 0, - 50265 + 28996 ] }, { @@ -44,15 +44,13 @@ } ] }, - "optim": { - "clamp_constant_values": true - }, + "optim": {}, "quant": { "mode": "qdq", "samples": 10, "calibration_method": "minmax", "weight_type": "uint8", - "activation_type": "uint8", + "activation_type": "uint16", "per_channel": false, "symmetric": false, "save_calibration": false, @@ -63,18 +61,20 @@ "op_types_to_quantize": null, "nodes_to_exclude": null, "task": "question-answering", - "model_name": "deepset/roberta-base-squad2" + "model_name": "distilbert/distilbert-base-cased-distilled-squad" }, + "compile": null, "loader": { "task": "question-answering", "model_class": "AutoModelForQuestionAnswering", - "model_type": "roberta" + "model_type": "distilbert" }, "eval": { "task": "question-answering", "dataset": { - "path": "rajpurkar/squad_v2", + "path": "rajpurkar/squad", "split": "validation", + "samples": 1000, "columns_mapping": { "question_column": "question", "context_column": "context", diff --git a/examples/recipes/distilbert_distilbert-base-uncased-distilled-squad/question-answering_fp16_config.json b/examples/recipes/distilbert_distilbert-base-uncased-distilled-squad/question-answering_fp16_config.json new file mode 100644 index 000000000..027c69c52 --- /dev/null +++ b/examples/recipes/distilbert_distilbert-base-uncased-distilled-squad/question-answering_fp16_config.json @@ -0,0 +1,68 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "start_logits" + }, + { + "name": "end_logits" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "question-answering", + "model_class": "AutoModelForQuestionAnswering", + "model_type": "distilbert" + }, + "eval": { + "task": "question-answering", + "dataset": { + "path": "rajpurkar/squad", + "split": "validation", + "samples": 100, + "columns_mapping": { + "question_column": "question", + "context_column": "context", + "id_column": "id", + "label_column": "answers" + } + } + } +} diff --git a/examples/recipes/deepset_tinyroberta-squad2/question-answering_w8a8_config.json b/examples/recipes/distilbert_distilbert-base-uncased-distilled-squad/question-answering_w8a16_config.json similarity index 86% rename from examples/recipes/deepset_tinyroberta-squad2/question-answering_w8a8_config.json rename to examples/recipes/distilbert_distilbert-base-uncased-distilled-squad/question-answering_w8a16_config.json index 0d8e54344..23216becf 100644 --- a/examples/recipes/deepset_tinyroberta-squad2/question-answering_w8a8_config.json +++ b/examples/recipes/distilbert_distilbert-base-uncased-distilled-squad/question-answering_w8a16_config.json @@ -19,7 +19,7 @@ ], "value_range": [ 0, - 50265 + 30522 ] }, { @@ -44,15 +44,13 @@ } ] }, - "optim": { - "clamp_constant_values": true - }, + "optim": {}, "quant": { "mode": "qdq", "samples": 10, "calibration_method": "minmax", "weight_type": "uint8", - "activation_type": "uint8", + "activation_type": "uint16", "per_channel": false, "symmetric": false, "save_calibration": false, @@ -63,18 +61,20 @@ "op_types_to_quantize": null, "nodes_to_exclude": null, "task": "question-answering", - "model_name": "deepset/tinyroberta-squad2" + "model_name": "distilbert/distilbert-base-uncased-distilled-squad" }, + "compile": null, "loader": { "task": "question-answering", "model_class": "AutoModelForQuestionAnswering", - "model_type": "roberta" + "model_type": "distilbert" }, "eval": { "task": "question-answering", "dataset": { - "path": "rajpurkar/squad_v2", + "path": "rajpurkar/squad", "split": "validation", + "samples": 1000, "columns_mapping": { "question_column": "question", "context_column": "context", diff --git a/examples/recipes/distilbert_distilbert-base-uncased-finetuned-sst-2-english/text-classification_fp16_config.json b/examples/recipes/distilbert_distilbert-base-uncased-finetuned-sst-2-english/text-classification_fp16_config.json new file mode 100644 index 000000000..f7b761cb6 --- /dev/null +++ b/examples/recipes/distilbert_distilbert-base-uncased-finetuned-sst-2-english/text-classification_fp16_config.json @@ -0,0 +1,62 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "text-classification", + "model_class": "AutoModelForSequenceClassification", + "model_type": "distilbert" + }, + "eval": { + "task": "text-classification", + "dataset": { + "path": "nyu-mll/glue", + "name": "sst2", + "samples": 100, + "columns_mapping": { + "input_column": "sentence" + } + } + } +} diff --git a/examples/recipes/cardiffnlp_twitter-roberta-base-sentiment-latest/text-classification_w8a8_config.json b/examples/recipes/distilbert_distilbert-base-uncased-finetuned-sst-2-english/text-classification_w8a16_config.json similarity index 81% rename from examples/recipes/cardiffnlp_twitter-roberta-base-sentiment-latest/text-classification_w8a8_config.json rename to examples/recipes/distilbert_distilbert-base-uncased-finetuned-sst-2-english/text-classification_w8a16_config.json index 2cb7114d2..443f3eb7e 100644 --- a/examples/recipes/cardiffnlp_twitter-roberta-base-sentiment-latest/text-classification_w8a8_config.json +++ b/examples/recipes/distilbert_distilbert-base-uncased-finetuned-sst-2-english/text-classification_w8a16_config.json @@ -19,7 +19,7 @@ ], "value_range": [ 0, - 50265 + 30522 ] }, { @@ -41,15 +41,13 @@ } ] }, - "optim": { - "clamp_constant_values": true - }, + "optim": {}, "quant": { "mode": "qdq", "samples": 10, "calibration_method": "minmax", "weight_type": "uint8", - "activation_type": "uint8", + "activation_type": "uint16", "per_channel": false, "symmetric": false, "save_calibration": false, @@ -60,20 +58,22 @@ "op_types_to_quantize": null, "nodes_to_exclude": null, "task": "text-classification", - "model_name": "cardiffnlp/twitter-roberta-base-sentiment-latest" + "model_name": "distilbert/distilbert-base-uncased-finetuned-sst-2-english" }, + "compile": null, "loader": { "task": "text-classification", "model_class": "AutoModelForSequenceClassification", - "model_type": "roberta" + "model_type": "distilbert" }, "eval": { "task": "text-classification", "dataset": { - "path": "tweet_eval", - "name": "sentiment", + "path": "nyu-mll/glue", + "name": "sst2", + "samples": 1000, "columns_mapping": { - "input_column": "text" + "input_column": "sentence" } } } diff --git a/examples/recipes/distilbert_distilbert-base-uncased/fill-mask_fp16_config.json b/examples/recipes/distilbert_distilbert-base-uncased/fill-mask_fp16_config.json new file mode 100644 index 000000000..ac283ad29 --- /dev/null +++ b/examples/recipes/distilbert_distilbert-base-uncased/fill-mask_fp16_config.json @@ -0,0 +1,63 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "fill-mask", + "model_class": "AutoModelForMaskedLM", + "model_type": "distilbert" + }, + "eval": { + "task": "fill-mask", + "dataset": { + "path": "Salesforce/wikitext", + "name": "wikitext-2-raw-v1", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "text" + } + } + } +} diff --git a/examples/recipes/distilbert_distilbert-base-uncased/fill-mask_w8a16_config.json b/examples/recipes/distilbert_distilbert-base-uncased/fill-mask_w8a16_config.json new file mode 100644 index 000000000..6748df0de --- /dev/null +++ b/examples/recipes/distilbert_distilbert-base-uncased/fill-mask_w8a16_config.json @@ -0,0 +1,81 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "fill-mask", + "model_name": "distilbert/distilbert-base-uncased" + }, + "compile": null, + "loader": { + "task": "fill-mask", + "model_class": "AutoModelForMaskedLM", + "model_type": "distilbert" + }, + "eval": { + "task": "fill-mask", + "dataset": { + "path": "Salesforce/wikitext", + "name": "wikitext-2-raw-v1", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "text" + } + } + } +} diff --git a/examples/recipes/dslim_bert-base-NER/token-classification_fp16_config.json b/examples/recipes/dslim_bert-base-NER/token-classification_fp16_config.json new file mode 100644 index 000000000..6e57f58db --- /dev/null +++ b/examples/recipes/dslim_bert-base-NER/token-classification_fp16_config.json @@ -0,0 +1,76 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 28996 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "token-classification", + "model_class": "AutoModelForTokenClassification", + "model_type": "bert" + }, + "eval": { + "task": "token-classification", + "dataset": { + "samples": 100, + "path": "BramVanroy/conll2003", + "columns_mapping": { + "label_column": "ner_tags" + } + } + } +} diff --git a/examples/recipes/facebook_convnext-tiny-224/image-classification_fp16_config.json b/examples/recipes/facebook_convnext-tiny-224/image-classification_fp16_config.json new file mode 100644 index 000000000..86334662d --- /dev/null +++ b/examples/recipes/facebook_convnext-tiny-224/image-classification_fp16_config.json @@ -0,0 +1,50 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": null, + "compile": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "convnext" + }, + "eval": { + "task": "image-classification", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 100 + } + } +} diff --git a/examples/recipes/facebook_dino-vitb16/image-feature-extraction_fp16_config.json b/examples/recipes/facebook_dino-vitb16/image-feature-extraction_fp16_config.json new file mode 100644 index 000000000..4330bdf89 --- /dev/null +++ b/examples/recipes/facebook_dino-vitb16/image-feature-extraction_fp16_config.json @@ -0,0 +1,49 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "image-feature-extraction", + "model_class": "AutoModel", + "model_type": "vit" + }, + "eval": { + "task": "image-feature-extraction", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 100 + } + } +} diff --git a/examples/recipes/google_vit-base-patch16-224-in21k/image-feature-extraction_w8a8_config.json b/examples/recipes/facebook_dino-vitb16/image-feature-extraction_w8a16_config.json similarity index 93% rename from examples/recipes/google_vit-base-patch16-224-in21k/image-feature-extraction_w8a8_config.json rename to examples/recipes/facebook_dino-vitb16/image-feature-extraction_w8a16_config.json index 7124b916a..f750bc8b1 100644 --- a/examples/recipes/google_vit-base-patch16-224-in21k/image-feature-extraction_w8a8_config.json +++ b/examples/recipes/facebook_dino-vitb16/image-feature-extraction_w8a16_config.json @@ -37,7 +37,7 @@ "samples": 10, "calibration_method": "minmax", "weight_type": "uint8", - "activation_type": "uint8", + "activation_type": "uint16", "per_channel": false, "symmetric": false, "save_calibration": false, @@ -48,7 +48,7 @@ "op_types_to_quantize": null, "nodes_to_exclude": null, "task": "image-feature-extraction", - "model_name": "google/vit-base-patch16-224-in21k" + "model_name": "facebook/dino-vitb16" }, "loader": { "task": "image-feature-extraction", diff --git a/examples/recipes/facebook_dino-vits16/image-feature-extraction_fp16_config.json b/examples/recipes/facebook_dino-vits16/image-feature-extraction_fp16_config.json new file mode 100644 index 000000000..4330bdf89 --- /dev/null +++ b/examples/recipes/facebook_dino-vits16/image-feature-extraction_fp16_config.json @@ -0,0 +1,49 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "image-feature-extraction", + "model_class": "AutoModel", + "model_type": "vit" + }, + "eval": { + "task": "image-feature-extraction", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 100 + } + } +} diff --git a/examples/recipes/facebook_dinov2-small/image-feature-extraction_w8a8_config.json b/examples/recipes/facebook_dino-vits16/image-feature-extraction_w8a16_config.json similarity index 92% rename from examples/recipes/facebook_dinov2-small/image-feature-extraction_w8a8_config.json rename to examples/recipes/facebook_dino-vits16/image-feature-extraction_w8a16_config.json index c3a3af051..3da2c7432 100644 --- a/examples/recipes/facebook_dinov2-small/image-feature-extraction_w8a8_config.json +++ b/examples/recipes/facebook_dino-vits16/image-feature-extraction_w8a16_config.json @@ -37,7 +37,7 @@ "samples": 10, "calibration_method": "minmax", "weight_type": "uint8", - "activation_type": "uint8", + "activation_type": "uint16", "per_channel": false, "symmetric": false, "save_calibration": false, @@ -48,12 +48,12 @@ "op_types_to_quantize": null, "nodes_to_exclude": null, "task": "image-feature-extraction", - "model_name": "facebook/dinov2-small" + "model_name": "facebook/dino-vits16" }, "loader": { "task": "image-feature-extraction", "model_class": "AutoModel", - "model_type": "dinov2" + "model_type": "vit" }, "eval": { "task": "image-feature-extraction", diff --git a/examples/recipes/facebook_dinov2-base/image-feature-extraction_fp16_config.json b/examples/recipes/facebook_dinov2-base/image-feature-extraction_fp16_config.json index b3e1216fd..5c12ba9cf 100644 --- a/examples/recipes/facebook_dinov2-base/image-feature-extraction_fp16_config.json +++ b/examples/recipes/facebook_dinov2-base/image-feature-extraction_fp16_config.json @@ -43,7 +43,7 @@ "dataset": { "path": "timm/mini-imagenet", "split": "test", - "samples": 1000 + "samples": 100 } } } diff --git a/examples/recipes/facebook_dinov2-large/image-feature-extraction_fp16_config.json b/examples/recipes/facebook_dinov2-large/image-feature-extraction_fp16_config.json new file mode 100644 index 000000000..5c12ba9cf --- /dev/null +++ b/examples/recipes/facebook_dinov2-large/image-feature-extraction_fp16_config.json @@ -0,0 +1,49 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "image-feature-extraction", + "model_class": "AutoModel", + "model_type": "dinov2" + }, + "eval": { + "task": "image-feature-extraction", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 100 + } + } +} diff --git a/examples/recipes/facebook_dinov2-base/image-feature-extraction_w8a8_config.json b/examples/recipes/facebook_dinov2-large/image-feature-extraction_w8a16_config.json similarity index 94% rename from examples/recipes/facebook_dinov2-base/image-feature-extraction_w8a8_config.json rename to examples/recipes/facebook_dinov2-large/image-feature-extraction_w8a16_config.json index f9e729c97..2d2c0022c 100644 --- a/examples/recipes/facebook_dinov2-base/image-feature-extraction_w8a8_config.json +++ b/examples/recipes/facebook_dinov2-large/image-feature-extraction_w8a16_config.json @@ -37,7 +37,7 @@ "samples": 10, "calibration_method": "minmax", "weight_type": "uint8", - "activation_type": "uint8", + "activation_type": "uint16", "per_channel": false, "symmetric": false, "save_calibration": false, @@ -48,7 +48,7 @@ "op_types_to_quantize": null, "nodes_to_exclude": null, "task": "image-feature-extraction", - "model_name": "facebook/dinov2-base" + "model_name": "facebook/dinov2-large" }, "loader": { "task": "image-feature-extraction", diff --git a/examples/recipes/facebook_dinov2-small/image-feature-extraction_fp16_config.json b/examples/recipes/facebook_dinov2-small/image-feature-extraction_fp16_config.json index b3e1216fd..5c12ba9cf 100644 --- a/examples/recipes/facebook_dinov2-small/image-feature-extraction_fp16_config.json +++ b/examples/recipes/facebook_dinov2-small/image-feature-extraction_fp16_config.json @@ -43,7 +43,7 @@ "dataset": { "path": "timm/mini-imagenet", "split": "test", - "samples": 1000 + "samples": 100 } } } diff --git a/examples/recipes/google-bert_bert-base-multilingual-cased/feature-extraction_fp16_config.json b/examples/recipes/google-bert_bert-base-multilingual-cased/feature-extraction_fp16_config.json new file mode 100644 index 000000000..58c6f6c2b --- /dev/null +++ b/examples/recipes/google-bert_bert-base-multilingual-cased/feature-extraction_fp16_config.json @@ -0,0 +1,79 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 119547 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "AutoModel", + "model_type": "bert" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/google-bert_bert-base-multilingual-cased/fill-mask_fp16_config.json b/examples/recipes/google-bert_bert-base-multilingual-cased/fill-mask_fp16_config.json new file mode 100644 index 000000000..51688c2fd --- /dev/null +++ b/examples/recipes/google-bert_bert-base-multilingual-cased/fill-mask_fp16_config.json @@ -0,0 +1,77 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 119547 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "loader": { + "task": "fill-mask", + "model_class": "AutoModelForMaskedLM", + "model_type": "bert" + }, + "eval": { + "task": "fill-mask", + "dataset": { + "path": "Salesforce/wikitext", + "name": "wikitext-2-raw-v1", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "text" + } + } + } +} diff --git a/examples/recipes/google-bert_bert-base-multilingual-cased/fill-mask_w8a16_config.json b/examples/recipes/google-bert_bert-base-multilingual-cased/fill-mask_w8a16_config.json new file mode 100644 index 000000000..553074f04 --- /dev/null +++ b/examples/recipes/google-bert_bert-base-multilingual-cased/fill-mask_w8a16_config.json @@ -0,0 +1,95 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 119547 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "fill-mask", + "model_name": "google-bert/bert-base-multilingual-cased" + }, + "compile": null, + "loader": { + "task": "fill-mask", + "model_class": "AutoModelForMaskedLM", + "model_type": "bert" + }, + "eval": { + "task": "fill-mask", + "dataset": { + "path": "Salesforce/wikitext", + "name": "wikitext-2-raw-v1", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "text" + } + } + } +} diff --git a/examples/recipes/google-bert_bert-base-multilingual-uncased/fill-mask_fp16_config.json b/examples/recipes/google-bert_bert-base-multilingual-uncased/fill-mask_fp16_config.json new file mode 100644 index 000000000..f4b523cf9 --- /dev/null +++ b/examples/recipes/google-bert_bert-base-multilingual-uncased/fill-mask_fp16_config.json @@ -0,0 +1,77 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 105879 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "loader": { + "task": "fill-mask", + "model_class": "AutoModelForMaskedLM", + "model_type": "bert" + }, + "eval": { + "task": "fill-mask", + "dataset": { + "path": "Salesforce/wikitext", + "name": "wikitext-2-raw-v1", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "text" + } + } + } +} diff --git a/examples/recipes/google-bert_bert-base-multilingual-uncased/fill-mask_w8a16_config.json b/examples/recipes/google-bert_bert-base-multilingual-uncased/fill-mask_w8a16_config.json new file mode 100644 index 000000000..7b71cc76c --- /dev/null +++ b/examples/recipes/google-bert_bert-base-multilingual-uncased/fill-mask_w8a16_config.json @@ -0,0 +1,94 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 105879 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "fill-mask", + "model_name": "google-bert/bert-base-multilingual-uncased" + }, + "loader": { + "task": "fill-mask", + "model_class": "AutoModelForMaskedLM", + "model_type": "bert" + }, + "eval": { + "task": "fill-mask", + "dataset": { + "path": "Salesforce/wikitext", + "name": "wikitext-2-raw-v1", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "text" + } + } + } +} diff --git a/examples/recipes/google-bert_bert-base-uncased/fill-mask_fp16_config.json b/examples/recipes/google-bert_bert-base-uncased/fill-mask_fp16_config.json new file mode 100644 index 000000000..d42fef107 --- /dev/null +++ b/examples/recipes/google-bert_bert-base-uncased/fill-mask_fp16_config.json @@ -0,0 +1,77 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "loader": { + "task": "fill-mask", + "model_class": "AutoModelForMaskedLM", + "model_type": "bert" + }, + "eval": { + "task": "fill-mask", + "dataset": { + "path": "Salesforce/wikitext", + "name": "wikitext-2-raw-v1", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "text" + } + } + } +} diff --git a/examples/recipes/google-bert_bert-base-uncased/fill-mask_w8a16_config.json b/examples/recipes/google-bert_bert-base-uncased/fill-mask_w8a16_config.json new file mode 100644 index 000000000..94669e9e9 --- /dev/null +++ b/examples/recipes/google-bert_bert-base-uncased/fill-mask_w8a16_config.json @@ -0,0 +1,94 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "fill-mask", + "model_name": "google-bert/bert-base-uncased" + }, + "loader": { + "task": "fill-mask", + "model_class": "AutoModelForMaskedLM", + "model_type": "bert" + }, + "eval": { + "task": "fill-mask", + "dataset": { + "path": "Salesforce/wikitext", + "name": "wikitext-2-raw-v1", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "text" + } + } + } +} diff --git a/examples/recipes/google-bert_bert-large-uncased-whole-word-masking-finetuned-squad/question-answering_fp16_config.json b/examples/recipes/google-bert_bert-large-uncased-whole-word-masking-finetuned-squad/question-answering_fp16_config.json new file mode 100644 index 000000000..13c6daa8f --- /dev/null +++ b/examples/recipes/google-bert_bert-large-uncased-whole-word-masking-finetuned-squad/question-answering_fp16_config.json @@ -0,0 +1,83 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "start_logits" + }, + { + "name": "end_logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "question-answering", + "model_class": "AutoModelForQuestionAnswering", + "model_type": "bert" + }, + "eval": { + "task": "question-answering", + "dataset": { + "path": "rajpurkar/squad", + "split": "validation", + "samples": 100, + "columns_mapping": { + "question_column": "question", + "context_column": "context", + "id_column": "id", + "label_column": "answers" + } + } + } +} diff --git a/examples/recipes/google-bert_bert-large-uncased-whole-word-masking-finetuned-squad/question-answering_w8a16_config.json b/examples/recipes/google-bert_bert-large-uncased-whole-word-masking-finetuned-squad/question-answering_w8a16_config.json new file mode 100644 index 000000000..db79310ba --- /dev/null +++ b/examples/recipes/google-bert_bert-large-uncased-whole-word-masking-finetuned-squad/question-answering_w8a16_config.json @@ -0,0 +1,100 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30522 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "start_logits" + }, + { + "name": "end_logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "question-answering", + "model_name": "google-bert/bert-large-uncased-whole-word-masking-finetuned-squad" + }, + "compile": null, + "loader": { + "task": "question-answering", + "model_class": "AutoModelForQuestionAnswering", + "model_type": "bert" + }, + "eval": { + "task": "question-answering", + "dataset": { + "path": "rajpurkar/squad", + "split": "validation", + "samples": 1000, + "columns_mapping": { + "question_column": "question", + "context_column": "context", + "id_column": "id", + "label_column": "answers" + } + } + } +} diff --git a/examples/recipes/google_vit-base-patch16-224-in21k/image-feature-extraction_fp16_config.json b/examples/recipes/google_vit-base-patch16-224-in21k/image-feature-extraction_fp16_config.json index 628768221..4330bdf89 100644 --- a/examples/recipes/google_vit-base-patch16-224-in21k/image-feature-extraction_fp16_config.json +++ b/examples/recipes/google_vit-base-patch16-224-in21k/image-feature-extraction_fp16_config.json @@ -43,7 +43,7 @@ "dataset": { "path": "timm/mini-imagenet", "split": "test", - "samples": 1000 + "samples": 100 } } } diff --git a/examples/recipes/google_vit-base-patch16-224/image-classification_fp16_config.json b/examples/recipes/google_vit-base-patch16-224/image-classification_fp16_config.json new file mode 100644 index 000000000..b0e9d9e08 --- /dev/null +++ b/examples/recipes/google_vit-base-patch16-224/image-classification_fp16_config.json @@ -0,0 +1,50 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": null, + "compile": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "vit" + }, + "eval": { + "task": "image-classification", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 100 + } + } +} diff --git a/examples/recipes/google_vit-base-patch16-224/image-classification_w8a16_config.json b/examples/recipes/google_vit-base-patch16-224/image-classification_w8a16_config.json new file mode 100644 index 000000000..d1458e451 --- /dev/null +++ b/examples/recipes/google_vit-base-patch16-224/image-classification_w8a16_config.json @@ -0,0 +1,67 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-classification", + "model_name": "google/vit-base-patch16-224" + }, + "compile": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "vit" + }, + "eval": { + "task": "image-classification", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 1000 + } + } +} diff --git a/examples/recipes/joeddav_xlm-roberta-large-xnli/zero-shot-classification_fp16_config.json b/examples/recipes/joeddav_xlm-roberta-large-xnli/zero-shot-classification_fp16_config.json new file mode 100644 index 000000000..e7074249d --- /dev/null +++ b/examples/recipes/joeddav_xlm-roberta-large-xnli/zero-shot-classification_fp16_config.json @@ -0,0 +1,68 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 250002 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "zero-shot-classification", + "model_class": "AutoModelForSequenceClassification", + "model_type": "xlm-roberta" + }, + "eval": { + "task": "zero-shot-classification", + "dataset": { + "path": "fancyzhx/ag_news", + "split": "test", + "samples": 200, + "columns_mapping": { + "input_column": "text", + "label_column": "label", + "candidate_labels": "World,Sports,Business,Sci/Tech", + "hypothesis_template": "This text is about {}." + } + } + } +} diff --git a/examples/recipes/joeddav_xlm-roberta-large-xnli/zero-shot-classification_w8a16_config.json b/examples/recipes/joeddav_xlm-roberta-large-xnli/zero-shot-classification_w8a16_config.json new file mode 100644 index 000000000..5a1abda9e --- /dev/null +++ b/examples/recipes/joeddav_xlm-roberta-large-xnli/zero-shot-classification_w8a16_config.json @@ -0,0 +1,85 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 250002 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "zero-shot-classification", + "model_name": "joeddav/xlm-roberta-large-xnli" + }, + "compile": null, + "loader": { + "task": "zero-shot-classification", + "model_class": "AutoModelForSequenceClassification", + "model_type": "xlm-roberta" + }, + "eval": { + "task": "zero-shot-classification", + "dataset": { + "path": "fancyzhx/ag_news", + "split": "test", + "samples": 200, + "columns_mapping": { + "input_column": "text", + "label_column": "label", + "candidate_labels": "World,Sports,Business,Sci/Tech", + "hypothesis_template": "This text is about {}." + } + } + } +} diff --git a/examples/recipes/laion_CLIP-ViT-B-32-laion2B-s34B-b79K/feature-extraction_fp16_config.json b/examples/recipes/laion_CLIP-ViT-B-32-laion2B-s34B-b79K/feature-extraction_fp16_config.json index 5a186260f..226d77d36 100644 --- a/examples/recipes/laion_CLIP-ViT-B-32-laion2B-s34B-b79K/feature-extraction_fp16_config.json +++ b/examples/recipes/laion_CLIP-ViT-B-32-laion2B-s34B-b79K/feature-extraction_fp16_config.json @@ -61,6 +61,7 @@ "dataset": { "path": "mteb/stsbenchmark-sts", "split": "test", + "samples": 100, "columns_mapping": { "input_column_1": "sentence1", "input_column_2": "sentence2", diff --git a/examples/recipes/laion_CLIP-ViT-B-32-laion2B-s34B-b79K/feature-extraction_w8a16_config.json b/examples/recipes/laion_CLIP-ViT-B-32-laion2B-s34B-b79K/feature-extraction_w8a16_config.json index 11ba24b65..c3b498a73 100644 --- a/examples/recipes/laion_CLIP-ViT-B-32-laion2B-s34B-b79K/feature-extraction_w8a16_config.json +++ b/examples/recipes/laion_CLIP-ViT-B-32-laion2B-s34B-b79K/feature-extraction_w8a16_config.json @@ -78,6 +78,7 @@ "dataset": { "path": "mteb/stsbenchmark-sts", "split": "test", + "samples": 1000, "columns_mapping": { "input_column_1": "sentence1", "input_column_2": "sentence2", diff --git a/examples/recipes/mattmdjaga_segformer_b2_clothes/image-segmentation_fp16_config.json b/examples/recipes/mattmdjaga_segformer_b2_clothes/image-segmentation_fp16_config.json new file mode 100644 index 000000000..e09eabfcf --- /dev/null +++ b/examples/recipes/mattmdjaga_segformer_b2_clothes/image-segmentation_fp16_config.json @@ -0,0 +1,53 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 512, + 512 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": null, + "compile": null, + "loader": { + "task": "image-segmentation", + "model_class": "AutoModelForSemanticSegmentation", + "model_type": "segformer" + }, + "eval": { + "task": "image-segmentation", + "dataset": { + "path": "mattmdjaga/human_parsing_dataset", + "split": "train", + "samples": 100, + "columns_mapping": { + "annotation_column": "mask" + } + } + } +} diff --git a/examples/recipes/mattmdjaga_segformer_b2_clothes/image-segmentation_w8a16_config.json b/examples/recipes/mattmdjaga_segformer_b2_clothes/image-segmentation_w8a16_config.json new file mode 100644 index 000000000..919689732 --- /dev/null +++ b/examples/recipes/mattmdjaga_segformer_b2_clothes/image-segmentation_w8a16_config.json @@ -0,0 +1,70 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 512, + 512 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-segmentation", + "model_name": "mattmdjaga/segformer_b2_clothes" + }, + "compile": null, + "loader": { + "task": "image-segmentation", + "model_class": "AutoModelForSemanticSegmentation", + "model_type": "segformer" + }, + "eval": { + "task": "image-segmentation", + "dataset": { + "path": "mattmdjaga/human_parsing_dataset", + "split": "train", + "samples": 1000, + "columns_mapping": { + "annotation_column": "mask" + } + } + } +} diff --git a/examples/recipes/microsoft_resnet-18/image-classification_fp16_config.json b/examples/recipes/microsoft_resnet-18/image-classification_fp16_config.json new file mode 100644 index 000000000..351c764b7 --- /dev/null +++ b/examples/recipes/microsoft_resnet-18/image-classification_fp16_config.json @@ -0,0 +1,49 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "resnet" + }, + "eval": { + "task": "image-classification", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 100 + } + } +} diff --git a/examples/recipes/microsoft_resnet-18/image-classification_w8a16_config.json b/examples/recipes/microsoft_resnet-18/image-classification_w8a16_config.json new file mode 100644 index 000000000..6e2a421c4 --- /dev/null +++ b/examples/recipes/microsoft_resnet-18/image-classification_w8a16_config.json @@ -0,0 +1,67 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-classification", + "model_name": "microsoft/resnet-18" + }, + "compile": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "resnet" + }, + "eval": { + "task": "image-classification", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 100 + } + } +} diff --git a/examples/recipes/microsoft_resnet-50/image-classification_fp16_config.json b/examples/recipes/microsoft_resnet-50/image-classification_fp16_config.json new file mode 100644 index 000000000..83a057a7a --- /dev/null +++ b/examples/recipes/microsoft_resnet-50/image-classification_fp16_config.json @@ -0,0 +1,50 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": null, + "compile": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "resnet" + }, + "eval": { + "task": "image-classification", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 100 + } + } +} diff --git a/examples/recipes/microsoft_resnet-50/image-classification_w8a16_config.json b/examples/recipes/microsoft_resnet-50/image-classification_w8a16_config.json new file mode 100644 index 000000000..17a0831ac --- /dev/null +++ b/examples/recipes/microsoft_resnet-50/image-classification_w8a16_config.json @@ -0,0 +1,67 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-classification", + "model_name": "microsoft/resnet-50" + }, + "compile": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "resnet" + }, + "eval": { + "task": "image-classification", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 1000 + } + } +} diff --git a/examples/recipes/microsoft_swin-large-patch4-window7-224/image-classification_fp16_config.json b/examples/recipes/microsoft_swin-large-patch4-window7-224/image-classification_fp16_config.json new file mode 100644 index 000000000..3b7b0c032 --- /dev/null +++ b/examples/recipes/microsoft_swin-large-patch4-window7-224/image-classification_fp16_config.json @@ -0,0 +1,50 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": null, + "compile": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "swin" + }, + "eval": { + "task": "image-classification", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 100 + } + } +} diff --git a/examples/recipes/microsoft_swin-large-patch4-window7-224/image-classification_w8a16_config.json b/examples/recipes/microsoft_swin-large-patch4-window7-224/image-classification_w8a16_config.json new file mode 100644 index 000000000..4f5349f3e --- /dev/null +++ b/examples/recipes/microsoft_swin-large-patch4-window7-224/image-classification_w8a16_config.json @@ -0,0 +1,67 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-classification", + "model_name": "microsoft/swin-large-patch4-window7-224" + }, + "compile": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "swin" + }, + "eval": { + "task": "image-classification", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 1000 + } + } +} diff --git a/examples/recipes/microsoft_swinv2-tiny-patch4-window16-256/image-classification_fp16_config.json b/examples/recipes/microsoft_swinv2-tiny-patch4-window16-256/image-classification_fp16_config.json new file mode 100644 index 000000000..242b60821 --- /dev/null +++ b/examples/recipes/microsoft_swinv2-tiny-patch4-window16-256/image-classification_fp16_config.json @@ -0,0 +1,50 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 256, + 256 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": null, + "compile": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "swinv2" + }, + "eval": { + "task": "image-classification", + "dataset": { + "path": "timm/mini-imagenet", + "split": "test", + "samples": 100 + } + } +} diff --git a/examples/recipes/microsoft_trocr-base-handwritten/image-to-text_fp16_config_decoder.json b/examples/recipes/microsoft_trocr-base-handwritten/image-to-text_fp16_config_decoder.json new file mode 100644 index 000000000..676eb1836 --- /dev/null +++ b/examples/recipes/microsoft_trocr-base-handwritten/image-to-text_fp16_config_decoder.json @@ -0,0 +1,494 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "decoder_input_ids", + "dtype": "int32", + "shape": [ + 1, + 1 + ], + "value_range": [ + 0, + 50265 + ] + }, + { + "name": "encoder_hidden_states", + "dtype": "float32", + "shape": [ + 1, + 577, + 768 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "decoder_attention_mask", + "dtype": "int64", + "shape": [ + 1, + 512 + ] + }, + { + "name": "cache_position", + "dtype": "int64", + "shape": [ + 1 + ] + }, + { + "name": "past_0_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_0_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_1_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_1_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_2_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_2_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_3_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_3_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_4_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_4_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_5_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_5_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_6_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_6_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_7_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_7_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_8_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_8_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_9_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_9_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_10_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_10_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_11_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_11_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + }, + { + "name": "present_0_key" + }, + { + "name": "present_0_value" + }, + { + "name": "present_1_key" + }, + { + "name": "present_1_value" + }, + { + "name": "present_2_key" + }, + { + "name": "present_2_value" + }, + { + "name": "present_3_key" + }, + { + "name": "present_3_value" + }, + { + "name": "present_4_key" + }, + { + "name": "present_4_value" + }, + { + "name": "present_5_key" + }, + { + "name": "present_5_value" + }, + { + "name": "present_6_key" + }, + { + "name": "present_6_value" + }, + { + "name": "present_7_key" + }, + { + "name": "present_7_value" + }, + { + "name": "present_8_key" + }, + { + "name": "present_8_value" + }, + { + "name": "present_9_key" + }, + { + "name": "present_9_value" + }, + { + "name": "present_10_key" + }, + { + "name": "present_10_value" + }, + { + "name": "present_11_key" + }, + { + "name": "present_11_value" + } + ] + }, + "optim": { + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true, + "remove_isnan_in_attention_mask": true, + "reshape_mergedreshape": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "text2text-generation", + "model_class": "VisionDecoderWrapper", + "model_type": "vision-encoder-decoder" + }, + "eval": { + "task": "image-to-text", + "dataset": { + "path": "Teklia/IAM-line", + "split": "test", + "samples": 500, + "columns_mapping": { + "input_column": "image", + "label_column": "text" + } + } + } +} diff --git a/examples/recipes/microsoft_trocr-base-handwritten/image-to-text_fp16_config_encoder.json b/examples/recipes/microsoft_trocr-base-handwritten/image-to-text_fp16_config_encoder.json new file mode 100644 index 000000000..d17d6eb43 --- /dev/null +++ b/examples/recipes/microsoft_trocr-base-handwritten/image-to-text_fp16_config_encoder.json @@ -0,0 +1,60 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 384, + 384 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "encoder_hidden_states" + } + ] + }, + "optim": { + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true, + "remove_isnan_in_attention_mask": true, + "reshape_mergedreshape": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "image-feature-extraction", + "model_class": "VisionEncoderWrapper", + "model_type": "vision-encoder-decoder" + }, + "eval": { + "task": "image-to-text", + "dataset": { + "path": "Teklia/IAM-line", + "split": "test", + "samples": 500, + "columns_mapping": { + "input_column": "image", + "label_column": "text" + } + } + } +} diff --git a/examples/recipes/microsoft_trocr-base-printed/image-to-text_fp16_config_decoder.json b/examples/recipes/microsoft_trocr-base-printed/image-to-text_fp16_config_decoder.json new file mode 100644 index 000000000..6e54cde9d --- /dev/null +++ b/examples/recipes/microsoft_trocr-base-printed/image-to-text_fp16_config_decoder.json @@ -0,0 +1,494 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "decoder_input_ids", + "dtype": "int32", + "shape": [ + 1, + 1 + ], + "value_range": [ + 0, + 50265 + ] + }, + { + "name": "encoder_hidden_states", + "dtype": "float32", + "shape": [ + 1, + 577, + 768 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "decoder_attention_mask", + "dtype": "int64", + "shape": [ + 1, + 512 + ] + }, + { + "name": "cache_position", + "dtype": "int64", + "shape": [ + 1 + ] + }, + { + "name": "past_0_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_0_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_1_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_1_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_2_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_2_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_3_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_3_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_4_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_4_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_5_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_5_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_6_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_6_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_7_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_7_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_8_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_8_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_9_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_9_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_10_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_10_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_11_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_11_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + }, + { + "name": "present_0_key" + }, + { + "name": "present_0_value" + }, + { + "name": "present_1_key" + }, + { + "name": "present_1_value" + }, + { + "name": "present_2_key" + }, + { + "name": "present_2_value" + }, + { + "name": "present_3_key" + }, + { + "name": "present_3_value" + }, + { + "name": "present_4_key" + }, + { + "name": "present_4_value" + }, + { + "name": "present_5_key" + }, + { + "name": "present_5_value" + }, + { + "name": "present_6_key" + }, + { + "name": "present_6_value" + }, + { + "name": "present_7_key" + }, + { + "name": "present_7_value" + }, + { + "name": "present_8_key" + }, + { + "name": "present_8_value" + }, + { + "name": "present_9_key" + }, + { + "name": "present_9_value" + }, + { + "name": "present_10_key" + }, + { + "name": "present_10_value" + }, + { + "name": "present_11_key" + }, + { + "name": "present_11_value" + } + ] + }, + "optim": { + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true, + "remove_isnan_in_attention_mask": true, + "reshape_mergedreshape": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "text2text-generation", + "model_class": "VisionDecoderWrapper", + "model_type": "vision-encoder-decoder" + }, + "eval": { + "task": "image-to-text", + "dataset": { + "path": "priyank-m/SROIE_2019_text_recognition", + "split": "test", + "samples": 500, + "columns_mapping": { + "input_column": "image", + "label_column": "text" + } + } + } +} diff --git a/examples/recipes/microsoft_trocr-base-printed/image-to-text_fp16_config_encoder.json b/examples/recipes/microsoft_trocr-base-printed/image-to-text_fp16_config_encoder.json new file mode 100644 index 000000000..59476ddb3 --- /dev/null +++ b/examples/recipes/microsoft_trocr-base-printed/image-to-text_fp16_config_encoder.json @@ -0,0 +1,60 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 384, + 384 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "encoder_hidden_states" + } + ] + }, + "optim": { + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true, + "remove_isnan_in_attention_mask": true, + "reshape_mergedreshape": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "image-feature-extraction", + "model_class": "VisionEncoderWrapper", + "model_type": "vision-encoder-decoder" + }, + "eval": { + "task": "image-to-text", + "dataset": { + "path": "priyank-m/SROIE_2019_text_recognition", + "split": "test", + "samples": 500, + "columns_mapping": { + "input_column": "image", + "label_column": "text" + } + } + } +} diff --git a/examples/recipes/microsoft_trocr-large-handwritten/image-to-text_fp16_config_decoder.json b/examples/recipes/microsoft_trocr-large-handwritten/image-to-text_fp16_config_decoder.json new file mode 100644 index 000000000..4774e0b43 --- /dev/null +++ b/examples/recipes/microsoft_trocr-large-handwritten/image-to-text_fp16_config_decoder.json @@ -0,0 +1,494 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "decoder_input_ids", + "dtype": "int32", + "shape": [ + 1, + 1 + ], + "value_range": [ + 0, + 50265 + ] + }, + { + "name": "encoder_hidden_states", + "dtype": "float32", + "shape": [ + 1, + 577, + 1024 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "decoder_attention_mask", + "dtype": "int64", + "shape": [ + 1, + 512 + ] + }, + { + "name": "cache_position", + "dtype": "int64", + "shape": [ + 1 + ] + }, + { + "name": "past_0_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_0_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_1_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_1_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_2_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_2_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_3_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_3_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_4_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_4_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_5_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_5_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_6_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_6_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_7_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_7_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_8_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_8_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_9_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_9_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_10_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_10_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_11_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_11_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + }, + { + "name": "present_0_key" + }, + { + "name": "present_0_value" + }, + { + "name": "present_1_key" + }, + { + "name": "present_1_value" + }, + { + "name": "present_2_key" + }, + { + "name": "present_2_value" + }, + { + "name": "present_3_key" + }, + { + "name": "present_3_value" + }, + { + "name": "present_4_key" + }, + { + "name": "present_4_value" + }, + { + "name": "present_5_key" + }, + { + "name": "present_5_value" + }, + { + "name": "present_6_key" + }, + { + "name": "present_6_value" + }, + { + "name": "present_7_key" + }, + { + "name": "present_7_value" + }, + { + "name": "present_8_key" + }, + { + "name": "present_8_value" + }, + { + "name": "present_9_key" + }, + { + "name": "present_9_value" + }, + { + "name": "present_10_key" + }, + { + "name": "present_10_value" + }, + { + "name": "present_11_key" + }, + { + "name": "present_11_value" + } + ] + }, + "optim": { + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true, + "remove_isnan_in_attention_mask": true, + "reshape_mergedreshape": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "text2text-generation", + "model_class": "VisionDecoderWrapper", + "model_type": "vision-encoder-decoder" + }, + "eval": { + "task": "image-to-text", + "dataset": { + "path": "Teklia/IAM-line", + "split": "test", + "samples": 500, + "columns_mapping": { + "input_column": "image", + "label_column": "text" + } + } + } +} diff --git a/examples/recipes/microsoft_trocr-large-handwritten/image-to-text_fp16_config_encoder.json b/examples/recipes/microsoft_trocr-large-handwritten/image-to-text_fp16_config_encoder.json new file mode 100644 index 000000000..d17d6eb43 --- /dev/null +++ b/examples/recipes/microsoft_trocr-large-handwritten/image-to-text_fp16_config_encoder.json @@ -0,0 +1,60 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 384, + 384 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "encoder_hidden_states" + } + ] + }, + "optim": { + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true, + "remove_isnan_in_attention_mask": true, + "reshape_mergedreshape": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "image-feature-extraction", + "model_class": "VisionEncoderWrapper", + "model_type": "vision-encoder-decoder" + }, + "eval": { + "task": "image-to-text", + "dataset": { + "path": "Teklia/IAM-line", + "split": "test", + "samples": 500, + "columns_mapping": { + "input_column": "image", + "label_column": "text" + } + } + } +} diff --git a/examples/recipes/microsoft_trocr-large-printed/image-to-text_fp16_config_decoder.json b/examples/recipes/microsoft_trocr-large-printed/image-to-text_fp16_config_decoder.json new file mode 100644 index 000000000..e04d4c021 --- /dev/null +++ b/examples/recipes/microsoft_trocr-large-printed/image-to-text_fp16_config_decoder.json @@ -0,0 +1,494 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "decoder_input_ids", + "dtype": "int32", + "shape": [ + 1, + 1 + ], + "value_range": [ + 0, + 50265 + ] + }, + { + "name": "encoder_hidden_states", + "dtype": "float32", + "shape": [ + 1, + 577, + 1024 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "decoder_attention_mask", + "dtype": "int64", + "shape": [ + 1, + 1024 + ] + }, + { + "name": "cache_position", + "dtype": "int64", + "shape": [ + 1 + ] + }, + { + "name": "past_0_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_0_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_1_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_1_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_2_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_2_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_3_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_3_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_4_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_4_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_5_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_5_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_6_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_6_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_7_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_7_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_8_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_8_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_9_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_9_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_10_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_10_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_11_key", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_11_value", + "dtype": "float32", + "shape": [ + 1, + 16, + 1024, + 64 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + }, + { + "name": "present_0_key" + }, + { + "name": "present_0_value" + }, + { + "name": "present_1_key" + }, + { + "name": "present_1_value" + }, + { + "name": "present_2_key" + }, + { + "name": "present_2_value" + }, + { + "name": "present_3_key" + }, + { + "name": "present_3_value" + }, + { + "name": "present_4_key" + }, + { + "name": "present_4_value" + }, + { + "name": "present_5_key" + }, + { + "name": "present_5_value" + }, + { + "name": "present_6_key" + }, + { + "name": "present_6_value" + }, + { + "name": "present_7_key" + }, + { + "name": "present_7_value" + }, + { + "name": "present_8_key" + }, + { + "name": "present_8_value" + }, + { + "name": "present_9_key" + }, + { + "name": "present_9_value" + }, + { + "name": "present_10_key" + }, + { + "name": "present_10_value" + }, + { + "name": "present_11_key" + }, + { + "name": "present_11_value" + } + ] + }, + "optim": { + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true, + "remove_isnan_in_attention_mask": true, + "reshape_mergedreshape": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "text2text-generation", + "model_class": "VisionDecoderWrapper", + "model_type": "vision-encoder-decoder" + }, + "eval": { + "task": "image-to-text", + "dataset": { + "path": "priyank-m/SROIE_2019_text_recognition", + "split": "test", + "samples": 200, + "columns_mapping": { + "input_column": "image", + "label_column": "text" + } + } + } +} diff --git a/examples/recipes/microsoft_trocr-large-printed/image-to-text_fp16_config_encoder.json b/examples/recipes/microsoft_trocr-large-printed/image-to-text_fp16_config_encoder.json new file mode 100644 index 000000000..e7a784568 --- /dev/null +++ b/examples/recipes/microsoft_trocr-large-printed/image-to-text_fp16_config_encoder.json @@ -0,0 +1,60 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 384, + 384 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "encoder_hidden_states" + } + ] + }, + "optim": { + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true, + "remove_isnan_in_attention_mask": true, + "reshape_mergedreshape": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "image-feature-extraction", + "model_class": "VisionEncoderWrapper", + "model_type": "vision-encoder-decoder" + }, + "eval": { + "task": "image-to-text", + "dataset": { + "path": "priyank-m/SROIE_2019_text_recognition", + "split": "test", + "samples": 200, + "columns_mapping": { + "input_column": "image", + "label_column": "text" + } + } + } +} diff --git a/examples/recipes/monologg_koelectra-small-v2-distilled-korquad-384/question-answering_fp16_config.json b/examples/recipes/monologg_koelectra-small-v2-distilled-korquad-384/question-answering_fp16_config.json new file mode 100644 index 000000000..6ca3b9a9d --- /dev/null +++ b/examples/recipes/monologg_koelectra-small-v2-distilled-korquad-384/question-answering_fp16_config.json @@ -0,0 +1,80 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 32200 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "start_logits" + }, + { + "name": "end_logits" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "question-answering", + "model_class": "AutoModelForQuestionAnswering", + "model_type": "electra" + }, + "eval": { + "task": "question-answering", + "dataset": { + "path": "KorQuAD/squad_kor_v1", + "split": "validation", + "samples": 100, + "columns_mapping": { + "question_column": "question", + "context_column": "context", + "id_column": "id", + "label_column": "answers" + } + } + } +} diff --git a/examples/recipes/monologg_koelectra-small-v2-distilled-korquad-384/question-answering_w8a16_config.json b/examples/recipes/monologg_koelectra-small-v2-distilled-korquad-384/question-answering_w8a16_config.json new file mode 100644 index 000000000..4af062dc0 --- /dev/null +++ b/examples/recipes/monologg_koelectra-small-v2-distilled-korquad-384/question-answering_w8a16_config.json @@ -0,0 +1,98 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 32200 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "start_logits" + }, + { + "name": "end_logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "question-answering", + "model_name": "monologg/koelectra-small-v2-distilled-korquad-384" + }, + "compile": null, + "loader": { + "task": "question-answering", + "model_class": "AutoModelForQuestionAnswering", + "model_type": "electra" + }, + "eval": { + "task": "question-answering", + "dataset": { + "path": "KorQuAD/squad_kor_v1", + "split": "validation", + "samples": 1000, + "columns_mapping": { + "question_column": "question", + "context_column": "context", + "id_column": "id", + "label_column": "answers" + } + } + } +} diff --git a/examples/recipes/nvidia_segformer-b1-finetuned-ade-512-512/image-segmentation_fp16_config.json b/examples/recipes/nvidia_segformer-b1-finetuned-ade-512-512/image-segmentation_fp16_config.json new file mode 100644 index 000000000..2c6552143 --- /dev/null +++ b/examples/recipes/nvidia_segformer-b1-finetuned-ade-512-512/image-segmentation_fp16_config.json @@ -0,0 +1,54 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 512, + 512 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": null, + "compile": null, + "loader": { + "task": "image-segmentation", + "model_class": "AutoModelForSemanticSegmentation", + "model_type": "segformer" + }, + "eval": { + "task": "image-segmentation", + "dataset": { + "path": "danjacobellis/scene_parse_150", + "split": "validation", + "samples": 100, + "label_mapping_file": "scripts/e2e_eval/datasets/ade20k_gt_to_model_label.json", + "columns_mapping": { + "annotation_column": "annotation" + } + } + } +} diff --git a/examples/recipes/nvidia_segformer-b1-finetuned-ade-512-512/image-segmentation_w8a16_config.json b/examples/recipes/nvidia_segformer-b1-finetuned-ade-512-512/image-segmentation_w8a16_config.json new file mode 100644 index 000000000..924497cdd --- /dev/null +++ b/examples/recipes/nvidia_segformer-b1-finetuned-ade-512-512/image-segmentation_w8a16_config.json @@ -0,0 +1,71 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 512, + 512 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-segmentation", + "model_name": "nvidia/segformer-b1-finetuned-ade-512-512" + }, + "compile": null, + "loader": { + "task": "image-segmentation", + "model_class": "AutoModelForSemanticSegmentation", + "model_type": "segformer" + }, + "eval": { + "task": "image-segmentation", + "dataset": { + "path": "danjacobellis/scene_parse_150", + "split": "validation", + "samples": 1000, + "label_mapping_file": "scripts/e2e_eval/datasets/ade20k_gt_to_model_label.json", + "columns_mapping": { + "annotation_column": "annotation" + } + } + } +} diff --git a/examples/recipes/nvidia_segformer-b2-finetuned-ade-512-512/image-segmentation_fp16_config.json b/examples/recipes/nvidia_segformer-b2-finetuned-ade-512-512/image-segmentation_fp16_config.json new file mode 100644 index 000000000..2c6552143 --- /dev/null +++ b/examples/recipes/nvidia_segformer-b2-finetuned-ade-512-512/image-segmentation_fp16_config.json @@ -0,0 +1,54 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 512, + 512 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": null, + "compile": null, + "loader": { + "task": "image-segmentation", + "model_class": "AutoModelForSemanticSegmentation", + "model_type": "segformer" + }, + "eval": { + "task": "image-segmentation", + "dataset": { + "path": "danjacobellis/scene_parse_150", + "split": "validation", + "samples": 100, + "label_mapping_file": "scripts/e2e_eval/datasets/ade20k_gt_to_model_label.json", + "columns_mapping": { + "annotation_column": "annotation" + } + } + } +} diff --git a/examples/recipes/nvidia_segformer-b2-finetuned-ade-512-512/image-segmentation_w8a16_config.json b/examples/recipes/nvidia_segformer-b2-finetuned-ade-512-512/image-segmentation_w8a16_config.json new file mode 100644 index 000000000..0848f1fc8 --- /dev/null +++ b/examples/recipes/nvidia_segformer-b2-finetuned-ade-512-512/image-segmentation_w8a16_config.json @@ -0,0 +1,71 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 512, + 512 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-segmentation", + "model_name": "nvidia/segformer-b2-finetuned-ade-512-512" + }, + "compile": null, + "loader": { + "task": "image-segmentation", + "model_class": "AutoModelForSemanticSegmentation", + "model_type": "segformer" + }, + "eval": { + "task": "image-segmentation", + "dataset": { + "path": "danjacobellis/scene_parse_150", + "split": "validation", + "samples": 1000, + "label_mapping_file": "scripts/e2e_eval/datasets/ade20k_gt_to_model_label.json", + "columns_mapping": { + "annotation_column": "annotation" + } + } + } +} diff --git a/examples/recipes/nvidia_segformer-b5-finetuned-ade-640-640/image-segmentation_fp16_config.json b/examples/recipes/nvidia_segformer-b5-finetuned-ade-640-640/image-segmentation_fp16_config.json new file mode 100644 index 000000000..85a882f08 --- /dev/null +++ b/examples/recipes/nvidia_segformer-b5-finetuned-ade-640-640/image-segmentation_fp16_config.json @@ -0,0 +1,54 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 640, + 640 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": null, + "compile": null, + "loader": { + "task": "image-segmentation", + "model_class": "AutoModelForSemanticSegmentation", + "model_type": "segformer" + }, + "eval": { + "task": "image-segmentation", + "dataset": { + "path": "danjacobellis/scene_parse_150", + "split": "validation", + "samples": 100, + "label_mapping_file": "scripts/e2e_eval/datasets/ade20k_gt_to_model_label.json", + "columns_mapping": { + "annotation_column": "annotation" + } + } + } +} diff --git a/examples/recipes/nvidia_segformer-b5-finetuned-ade-640-640/image-segmentation_w8a16_config.json b/examples/recipes/nvidia_segformer-b5-finetuned-ade-640-640/image-segmentation_w8a16_config.json new file mode 100644 index 000000000..dd5c11a96 --- /dev/null +++ b/examples/recipes/nvidia_segformer-b5-finetuned-ade-640-640/image-segmentation_w8a16_config.json @@ -0,0 +1,71 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 640, + 640 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-segmentation", + "model_name": "nvidia/segformer-b5-finetuned-ade-640-640" + }, + "compile": null, + "loader": { + "task": "image-segmentation", + "model_class": "AutoModelForSemanticSegmentation", + "model_type": "segformer" + }, + "eval": { + "task": "image-segmentation", + "dataset": { + "path": "danjacobellis/scene_parse_150", + "split": "validation", + "samples": 1000, + "label_mapping_file": "scripts/e2e_eval/datasets/ade20k_gt_to_model_label.json", + "columns_mapping": { + "annotation_column": "annotation" + } + } + } +} diff --git a/examples/recipes/openai_clip-vit-base-patch16/feature-extraction_fp16_config.json b/examples/recipes/openai_clip-vit-base-patch16/feature-extraction_fp16_config.json index 5a186260f..226d77d36 100644 --- a/examples/recipes/openai_clip-vit-base-patch16/feature-extraction_fp16_config.json +++ b/examples/recipes/openai_clip-vit-base-patch16/feature-extraction_fp16_config.json @@ -61,6 +61,7 @@ "dataset": { "path": "mteb/stsbenchmark-sts", "split": "test", + "samples": 100, "columns_mapping": { "input_column_1": "sentence1", "input_column_2": "sentence2", diff --git a/examples/recipes/openai_clip-vit-base-patch16/feature-extraction_w8a16_config.json b/examples/recipes/openai_clip-vit-base-patch16/feature-extraction_w8a16_config.json index 98323e872..5745e9ca0 100644 --- a/examples/recipes/openai_clip-vit-base-patch16/feature-extraction_w8a16_config.json +++ b/examples/recipes/openai_clip-vit-base-patch16/feature-extraction_w8a16_config.json @@ -78,6 +78,7 @@ "dataset": { "path": "mteb/stsbenchmark-sts", "split": "test", + "samples": 1000, "columns_mapping": { "input_column_1": "sentence1", "input_column_2": "sentence2", diff --git a/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_fp16_config_image-encoder.json b/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_fp16_config_image-encoder.json new file mode 100644 index 000000000..d16a0697d --- /dev/null +++ b/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_fp16_config_image-encoder.json @@ -0,0 +1,62 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "image_embeds" + }, + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true, + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "image-feature-extraction", + "model_class": "CLIPVisionModelWithProjection", + "model_type": "clip_vision_model" + }, + "eval": { + "task": "zero-shot-image-classification", + "dataset": { + "path": "uoft-cs/cifar100", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "img", + "label_column": "fine_label" + } + } + } +} diff --git a/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_fp16_config_text-encoder.json b/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_fp16_config_text-encoder.json new file mode 100644 index 000000000..b32582151 --- /dev/null +++ b/examples/recipes/openai_clip-vit-base-patch16/zero-shot-image-classification_fp16_config_text-encoder.json @@ -0,0 +1,72 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 77 + ], + "value_range": [ + 0, + 49408 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 77 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "text_embeds" + }, + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true, + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "CLIPTextModelWithProjection", + "model_type": "clip_text_model" + }, + "eval": { + "task": "zero-shot-image-classification", + "dataset": { + "path": "uoft-cs/cifar100", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "img", + "label_column": "fine_label" + } + } + } +} diff --git a/examples/recipes/openai_clip-vit-base-patch32/feature-extraction_fp16_config.json b/examples/recipes/openai_clip-vit-base-patch32/feature-extraction_fp16_config.json new file mode 100644 index 000000000..226d77d36 --- /dev/null +++ b/examples/recipes/openai_clip-vit-base-patch32/feature-extraction_fp16_config.json @@ -0,0 +1,72 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 77 + ], + "value_range": [ + 0, + 49408 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 77 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "text_embeds" + }, + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true, + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true + }, + "quant": null, + "loader": { + "task": "feature-extraction", + "model_class": "CLIPTextModelWithProjection", + "model_type": "clip_text_model" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/laion_CLIP-ViT-B-32-laion2B-s34B-b79K/feature-extraction_w8a8_config.json b/examples/recipes/openai_clip-vit-base-patch32/feature-extraction_w8a16_config.json similarity index 94% rename from examples/recipes/laion_CLIP-ViT-B-32-laion2B-s34B-b79K/feature-extraction_w8a8_config.json rename to examples/recipes/openai_clip-vit-base-patch32/feature-extraction_w8a16_config.json index 849dc5481..0dde45b25 100644 --- a/examples/recipes/laion_CLIP-ViT-B-32-laion2B-s34B-b79K/feature-extraction_w8a8_config.json +++ b/examples/recipes/openai_clip-vit-base-patch32/feature-extraction_w8a16_config.json @@ -55,7 +55,7 @@ "samples": 10, "calibration_method": "minmax", "weight_type": "uint8", - "activation_type": "uint8", + "activation_type": "uint16", "per_channel": false, "symmetric": false, "save_calibration": false, @@ -66,7 +66,7 @@ "op_types_to_quantize": null, "nodes_to_exclude": null, "task": "feature-extraction", - "model_name": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K" + "model_name": "openai/clip-vit-base-patch32" }, "loader": { "task": "feature-extraction", @@ -78,6 +78,7 @@ "dataset": { "path": "mteb/stsbenchmark-sts", "split": "test", + "samples": 1000, "columns_mapping": { "input_column_1": "sentence1", "input_column_2": "sentence2", diff --git a/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_fp16_config_image-encoder.json b/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_fp16_config_image-encoder.json new file mode 100644 index 000000000..04c51f130 --- /dev/null +++ b/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_fp16_config_image-encoder.json @@ -0,0 +1,62 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 336, + 336 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "image_embeds" + }, + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true, + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "image-feature-extraction", + "model_class": "CLIPVisionModelWithProjection", + "model_type": "clip_vision_model" + }, + "eval": { + "task": "zero-shot-image-classification", + "dataset": { + "path": "uoft-cs/cifar100", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "img", + "label_column": "fine_label" + } + } + } +} diff --git a/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_fp16_config_text-encoder.json b/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_fp16_config_text-encoder.json new file mode 100644 index 000000000..b32582151 --- /dev/null +++ b/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_fp16_config_text-encoder.json @@ -0,0 +1,72 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 77 + ], + "value_range": [ + 0, + 49408 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 77 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "text_embeds" + }, + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true, + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "CLIPTextModelWithProjection", + "model_type": "clip_text_model" + }, + "eval": { + "task": "zero-shot-image-classification", + "dataset": { + "path": "uoft-cs/cifar100", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "img", + "label_column": "fine_label" + } + } + } +} diff --git a/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_w8a16_config_image-encoder.json b/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_w8a16_config_image-encoder.json new file mode 100644 index 000000000..c10bff087 --- /dev/null +++ b/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_w8a16_config_image-encoder.json @@ -0,0 +1,79 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 336, + 336 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "image_embeds" + }, + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true, + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-feature-extraction", + "model_name": "openai/clip-vit-large-patch14-336" + }, + "compile": null, + "loader": { + "task": "image-feature-extraction", + "model_class": "CLIPVisionModelWithProjection", + "model_type": "clip_vision_model" + }, + "eval": { + "task": "zero-shot-image-classification", + "dataset": { + "path": "uoft-cs/cifar100", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column": "img", + "label_column": "fine_label" + } + } + } +} diff --git a/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_w8a16_config_text-encoder.json b/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_w8a16_config_text-encoder.json new file mode 100644 index 000000000..a3591f781 --- /dev/null +++ b/examples/recipes/openai_clip-vit-large-patch14-336/zero-shot-image-classification_w8a16_config_text-encoder.json @@ -0,0 +1,89 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 77 + ], + "value_range": [ + 0, + 49408 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 77 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "text_embeds" + }, + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true, + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "feature-extraction", + "model_name": "openai/clip-vit-large-patch14-336" + }, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "CLIPTextModelWithProjection", + "model_type": "clip_text_model" + }, + "eval": { + "task": "zero-shot-image-classification", + "dataset": { + "path": "uoft-cs/cifar100", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column": "img", + "label_column": "fine_label" + } + } + } +} diff --git a/examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_fp16_config_image-encoder.json b/examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_fp16_config_image-encoder.json new file mode 100644 index 000000000..d16a0697d --- /dev/null +++ b/examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_fp16_config_image-encoder.json @@ -0,0 +1,62 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "image_embeds" + }, + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true, + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "image-feature-extraction", + "model_class": "CLIPVisionModelWithProjection", + "model_type": "clip_vision_model" + }, + "eval": { + "task": "zero-shot-image-classification", + "dataset": { + "path": "uoft-cs/cifar100", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "img", + "label_column": "fine_label" + } + } + } +} diff --git a/examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_fp16_config_text-encoder.json b/examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_fp16_config_text-encoder.json new file mode 100644 index 000000000..b32582151 --- /dev/null +++ b/examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_fp16_config_text-encoder.json @@ -0,0 +1,72 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 77 + ], + "value_range": [ + 0, + 49408 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 77 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "text_embeds" + }, + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true, + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "CLIPTextModelWithProjection", + "model_type": "clip_text_model" + }, + "eval": { + "task": "zero-shot-image-classification", + "dataset": { + "path": "uoft-cs/cifar100", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column": "img", + "label_column": "fine_label" + } + } + } +} diff --git a/examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_w8a16_config_image-encoder.json b/examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_w8a16_config_image-encoder.json new file mode 100644 index 000000000..e6236da7e --- /dev/null +++ b/examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_w8a16_config_image-encoder.json @@ -0,0 +1,79 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "image_embeds" + }, + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true, + "gelu_fusion": true, + "layer_norm_fusion": true, + "matmul_add_fusion": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-feature-extraction", + "model_name": "openai/clip-vit-large-patch14" + }, + "compile": null, + "loader": { + "task": "image-feature-extraction", + "model_class": "CLIPVisionModelWithProjection", + "model_type": "clip_vision_model" + }, + "eval": { + "task": "zero-shot-image-classification", + "dataset": { + "path": "uoft-cs/cifar100", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column": "img", + "label_column": "fine_label" + } + } + } +} diff --git a/examples/recipes/openai_clip-vit-base-patch16/feature-extraction_w8a8_config.json b/examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_w8a16_config_text-encoder.json similarity index 85% rename from examples/recipes/openai_clip-vit-base-patch16/feature-extraction_w8a8_config.json rename to examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_w8a16_config_text-encoder.json index ae40a470a..222a26f34 100644 --- a/examples/recipes/openai_clip-vit-base-patch16/feature-extraction_w8a8_config.json +++ b/examples/recipes/openai_clip-vit-large-patch14/zero-shot-image-classification_w8a16_config_text-encoder.json @@ -55,7 +55,7 @@ "samples": 10, "calibration_method": "minmax", "weight_type": "uint8", - "activation_type": "uint8", + "activation_type": "uint16", "per_channel": false, "symmetric": false, "save_calibration": false, @@ -66,22 +66,23 @@ "op_types_to_quantize": null, "nodes_to_exclude": null, "task": "feature-extraction", - "model_name": "openai/clip-vit-base-patch16" + "model_name": "openai/clip-vit-large-patch14" }, + "compile": null, "loader": { "task": "feature-extraction", "model_class": "CLIPTextModelWithProjection", "model_type": "clip_text_model" }, "eval": { - "task": "feature-extraction", + "task": "zero-shot-image-classification", "dataset": { - "path": "mteb/stsbenchmark-sts", + "path": "uoft-cs/cifar100", "split": "test", + "samples": 1000, "columns_mapping": { - "input_column_1": "sentence1", - "input_column_2": "sentence2", - "score_column": "score" + "input_column": "img", + "label_column": "fine_label" } } } diff --git a/examples/recipes/rizvandwiki_gender-classification/image-classification_fp16_config.json b/examples/recipes/rizvandwiki_gender-classification/image-classification_fp16_config.json new file mode 100644 index 000000000..ef5ac83f3 --- /dev/null +++ b/examples/recipes/rizvandwiki_gender-classification/image-classification_fp16_config.json @@ -0,0 +1,52 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "vit" + }, + "eval": { + "task": "image-classification", + "dataset": { + "samples": 100, + "path": "~/.cache/winml/eval_datasets/build_fairface", + "columns_mapping": { + "label_column": "gender" + }, + "build_script": "scripts/e2e_eval/datasets/build_fairface.py" + } + } +} diff --git a/examples/recipes/rizvandwiki_gender-classification/image-classification_w8a16_config.json b/examples/recipes/rizvandwiki_gender-classification/image-classification_w8a16_config.json new file mode 100644 index 000000000..2e43e5ab2 --- /dev/null +++ b/examples/recipes/rizvandwiki_gender-classification/image-classification_w8a16_config.json @@ -0,0 +1,69 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "pixel_values", + "dtype": "float32", + "shape": [ + 1, + 3, + 224, + 224 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "image-classification", + "model_name": "rizvandwiki/gender-classification" + }, + "loader": { + "task": "image-classification", + "model_class": "AutoModelForImageClassification", + "model_type": "vit" + }, + "eval": { + "task": "image-classification", + "dataset": { + "samples": 1000, + "path": "~/.cache/winml/eval_datasets/build_fairface", + "columns_mapping": { + "label_column": "gender" + }, + "build_script": "scripts/e2e_eval/datasets/build_fairface.py" + } + } +} diff --git a/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/feature-extraction_fp16_config.json b/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/feature-extraction_fp16_config.json index ba6575a35..ff77c9eb3 100644 --- a/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/feature-extraction_fp16_config.json +++ b/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/feature-extraction_fp16_config.json @@ -67,6 +67,7 @@ "dataset": { "path": "mteb/stsbenchmark-sts", "split": "test", + "samples": 100, "columns_mapping": { "input_column_1": "sentence1", "input_column_2": "sentence2", diff --git a/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/feature-extraction_w8a16_config.json b/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/feature-extraction_w8a16_config.json index d05456800..77ccc0498 100644 --- a/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/feature-extraction_w8a16_config.json +++ b/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/feature-extraction_w8a16_config.json @@ -84,6 +84,7 @@ "dataset": { "path": "mteb/stsbenchmark-sts", "split": "test", + "samples": 1000, "columns_mapping": { "input_column_1": "sentence1", "input_column_2": "sentence2", diff --git a/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/sentence-similarity_fp16_config.json b/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/sentence-similarity_fp16_config.json index eed96889f..30f1521ce 100644 --- a/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/sentence-similarity_fp16_config.json +++ b/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/sentence-similarity_fp16_config.json @@ -67,6 +67,7 @@ "dataset": { "path": "mteb/stsbenchmark-sts", "split": "test", + "samples": 100, "columns_mapping": { "input_column_1": "sentence1", "input_column_2": "sentence2", diff --git a/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/sentence-similarity_w8a16_config.json b/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/sentence-similarity_w8a16_config.json index 49c2bb8b8..28962bc74 100644 --- a/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/sentence-similarity_w8a16_config.json +++ b/examples/recipes/sentence-transformers_all-MiniLM-L6-v2/sentence-similarity_w8a16_config.json @@ -84,6 +84,7 @@ "dataset": { "path": "mteb/stsbenchmark-sts", "split": "test", + "samples": 1000, "columns_mapping": { "input_column_1": "sentence1", "input_column_2": "sentence2", diff --git a/examples/recipes/sentence-transformers_all-mpnet-base-v2/feature-extraction_fp16_config.json b/examples/recipes/sentence-transformers_all-mpnet-base-v2/feature-extraction_fp16_config.json new file mode 100644 index 000000000..e731b2a2a --- /dev/null +++ b/examples/recipes/sentence-transformers_all-mpnet-base-v2/feature-extraction_fp16_config.json @@ -0,0 +1,64 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30527 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "feature-extraction", + "model_class": "AutoModel", + "model_type": "mpnet" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/sentence-transformers_all-mpnet-base-v2/feature-extraction_w8a16_config.json b/examples/recipes/sentence-transformers_all-mpnet-base-v2/feature-extraction_w8a16_config.json new file mode 100644 index 000000000..b88600555 --- /dev/null +++ b/examples/recipes/sentence-transformers_all-mpnet-base-v2/feature-extraction_w8a16_config.json @@ -0,0 +1,82 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30527 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "feature-extraction", + "model_name": "sentence-transformers/all-mpnet-base-v2" + }, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "AutoModel", + "model_type": "mpnet" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/sentence-transformers_all-mpnet-base-v2/sentence-similarity_fp16_config.json b/examples/recipes/sentence-transformers_all-mpnet-base-v2/sentence-similarity_fp16_config.json new file mode 100644 index 000000000..25b22809c --- /dev/null +++ b/examples/recipes/sentence-transformers_all-mpnet-base-v2/sentence-similarity_fp16_config.json @@ -0,0 +1,64 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30527 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "sentence-similarity", + "model_class": "AutoModel", + "model_type": "mpnet" + }, + "eval": { + "task": "sentence-similarity", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/sentence-transformers_all-mpnet-base-v2/sentence-similarity_w8a16_config.json b/examples/recipes/sentence-transformers_all-mpnet-base-v2/sentence-similarity_w8a16_config.json new file mode 100644 index 000000000..345311097 --- /dev/null +++ b/examples/recipes/sentence-transformers_all-mpnet-base-v2/sentence-similarity_w8a16_config.json @@ -0,0 +1,82 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30527 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "sentence-similarity", + "model_name": "sentence-transformers/all-mpnet-base-v2" + }, + "compile": null, + "loader": { + "task": "sentence-similarity", + "model_class": "AutoModel", + "model_type": "mpnet" + }, + "eval": { + "task": "sentence-similarity", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/feature-extraction_fp16_config.json b/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/feature-extraction_fp16_config.json new file mode 100644 index 000000000..e731b2a2a --- /dev/null +++ b/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/feature-extraction_fp16_config.json @@ -0,0 +1,64 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30527 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "feature-extraction", + "model_class": "AutoModel", + "model_type": "mpnet" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/feature-extraction_w8a16_config.json b/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/feature-extraction_w8a16_config.json new file mode 100644 index 000000000..e00cce6f7 --- /dev/null +++ b/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/feature-extraction_w8a16_config.json @@ -0,0 +1,82 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30527 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "feature-extraction", + "model_name": "sentence-transformers/multi-qa-mpnet-base-dot-v1" + }, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "AutoModel", + "model_type": "mpnet" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/sentence-similarity_fp16_config.json b/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/sentence-similarity_fp16_config.json new file mode 100644 index 000000000..25b22809c --- /dev/null +++ b/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/sentence-similarity_fp16_config.json @@ -0,0 +1,64 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30527 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": null, + "loader": { + "task": "sentence-similarity", + "model_class": "AutoModel", + "model_type": "mpnet" + }, + "eval": { + "task": "sentence-similarity", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/sentence-similarity_w8a16_config.json b/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/sentence-similarity_w8a16_config.json new file mode 100644 index 000000000..500b5ff1a --- /dev/null +++ b/examples/recipes/sentence-transformers_multi-qa-mpnet-base-dot-v1/sentence-similarity_w8a16_config.json @@ -0,0 +1,82 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 30527 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": {}, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "sentence-similarity", + "model_name": "sentence-transformers/multi-qa-mpnet-base-dot-v1" + }, + "compile": null, + "loader": { + "task": "sentence-similarity", + "model_class": "AutoModel", + "model_type": "mpnet" + }, + "eval": { + "task": "sentence-similarity", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 1000, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/feature-extraction_fp16_config.json b/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/feature-extraction_fp16_config.json new file mode 100644 index 000000000..65f2713df --- /dev/null +++ b/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/feature-extraction_fp16_config.json @@ -0,0 +1,79 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 250037 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "AutoModel", + "model_type": "bert" + }, + "eval": { + "task": "feature-extraction", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/sentence-similarity_fp16_config.json b/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/sentence-similarity_fp16_config.json new file mode 100644 index 000000000..daea4950c --- /dev/null +++ b/examples/recipes/sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2/sentence-similarity_fp16_config.json @@ -0,0 +1,78 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 250037 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + }, + { + "name": "token_type_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "loader": { + "task": "sentence-similarity", + "model_class": "AutoModel", + "model_type": "bert" + }, + "eval": { + "task": "sentence-similarity", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/sentence-similarity_fp16_config.json b/examples/recipes/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/sentence-similarity_fp16_config.json new file mode 100644 index 000000000..92b6ae628 --- /dev/null +++ b/examples/recipes/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/sentence-similarity_fp16_config.json @@ -0,0 +1,66 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 250002 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "last_hidden_state" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "loader": { + "task": "sentence-similarity", + "model_class": "AutoModel", + "model_type": "xlm-roberta" + }, + "eval": { + "task": "sentence-similarity", + "dataset": { + "path": "mteb/stsbenchmark-sts", + "split": "test", + "samples": 100, + "columns_mapping": { + "input_column_1": "sentence1", + "input_column_2": "sentence2", + "score_column": "score" + } + } + } +} diff --git a/examples/recipes/BAAI_bge-large-en-v1.5/sentence-similarity_w8a8_config.json b/examples/recipes/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/sentence-similarity_w8a16_config.json similarity index 83% rename from examples/recipes/BAAI_bge-large-en-v1.5/sentence-similarity_w8a8_config.json rename to examples/recipes/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/sentence-similarity_w8a16_config.json index 275363979..8cba69ee4 100644 --- a/examples/recipes/BAAI_bge-large-en-v1.5/sentence-similarity_w8a8_config.json +++ b/examples/recipes/sentence-transformers_paraphrase-multilingual-mpnet-base-v2/sentence-similarity_w8a16_config.json @@ -19,7 +19,7 @@ ], "value_range": [ 0, - 30522 + 250002 ] }, { @@ -33,18 +33,6 @@ 0, 2 ] - }, - { - "name": "token_type_ids", - "dtype": "int32", - "shape": [ - 1, - 512 - ], - "value_range": [ - 0, - 2 - ] } ], "output_tensors": [ @@ -61,7 +49,7 @@ "samples": 10, "calibration_method": "minmax", "weight_type": "uint8", - "activation_type": "uint8", + "activation_type": "uint16", "per_channel": false, "symmetric": false, "save_calibration": false, @@ -72,18 +60,19 @@ "op_types_to_quantize": null, "nodes_to_exclude": null, "task": "sentence-similarity", - "model_name": "BAAI/bge-large-en-v1.5" + "model_name": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2" }, "loader": { "task": "sentence-similarity", "model_class": "AutoModel", - "model_type": "bert" + "model_type": "xlm-roberta" }, "eval": { "task": "sentence-similarity", "dataset": { "path": "mteb/stsbenchmark-sts", "split": "test", + "samples": 1000, "columns_mapping": { "input_column_1": "sentence1", "input_column_2": "sentence2", diff --git a/examples/recipes/w11wo_indonesian-roberta-base-posp-tagger/token-classification_fp16_config.json b/examples/recipes/w11wo_indonesian-roberta-base-posp-tagger/token-classification_fp16_config.json new file mode 100644 index 000000000..8d02715fe --- /dev/null +++ b/examples/recipes/w11wo_indonesian-roberta-base-posp-tagger/token-classification_fp16_config.json @@ -0,0 +1,64 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 50265 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": null, + "loader": { + "task": "token-classification", + "model_class": "AutoModelForTokenClassification", + "model_type": "roberta" + }, + "eval": { + "task": "token-classification", + "dataset": { + "samples": 100, + "path": "~/.cache/winml/eval_datasets/build_indonlu_posp", + "columns_mapping": { + "label_column": "pos_tags" + }, + "build_script": "scripts/e2e_eval/datasets/build_indonlu_posp.py" + } + } +} diff --git a/examples/recipes/w11wo_indonesian-roberta-base-posp-tagger/token-classification_w8a16_config.json b/examples/recipes/w11wo_indonesian-roberta-base-posp-tagger/token-classification_w8a16_config.json new file mode 100644 index 000000000..d00b21020 --- /dev/null +++ b/examples/recipes/w11wo_indonesian-roberta-base-posp-tagger/token-classification_w8a16_config.json @@ -0,0 +1,81 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 50265 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true + }, + "quant": { + "mode": "qdq", + "samples": 10, + "calibration_method": "minmax", + "weight_type": "uint8", + "activation_type": "uint16", + "per_channel": false, + "symmetric": false, + "save_calibration": false, + "distribution": "uniform", + "seed": null, + "calibration_load_path": null, + "calibration_save_path": null, + "op_types_to_quantize": null, + "nodes_to_exclude": null, + "task": "token-classification", + "model_name": "w11wo/indonesian-roberta-base-posp-tagger" + }, + "loader": { + "task": "token-classification", + "model_class": "AutoModelForTokenClassification", + "model_type": "roberta" + }, + "eval": { + "task": "token-classification", + "dataset": { + "samples": 1000, + "path": "~/.cache/winml/eval_datasets/build_indonlu_posp", + "columns_mapping": { + "label_column": "pos_tags" + }, + "build_script": "scripts/e2e_eval/datasets/build_indonlu_posp.py" + } + } +}