diff --git a/examples/feature_extraction/peft_lora_embedding_semantic_search.py b/examples/feature_extraction/peft_lora_embedding_semantic_search.py index 02015fbad8..e209133c34 100644 --- a/examples/feature_extraction/peft_lora_embedding_semantic_search.py +++ b/examples/feature_extraction/peft_lora_embedding_semantic_search.py @@ -264,7 +264,7 @@ def main(): dataset = DatasetDict({"train": train_dataset, "validation": val_dataset}) else: - dataset = load_dataset(args.dataset_name) + dataset = load_dataset(args.dataset_name, revision="main") def preprocess_function(examples): queries = examples["query"] diff --git a/examples/feature_extraction/peft_lora_embedding_semantic_similarity_inference.ipynb b/examples/feature_extraction/peft_lora_embedding_semantic_similarity_inference.ipynb index 733531844c..b263db1652 100644 --- a/examples/feature_extraction/peft_lora_embedding_semantic_similarity_inference.ipynb +++ b/examples/feature_extraction/peft_lora_embedding_semantic_similarity_inference.ipynb @@ -127,7 +127,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "f190e1ee", "metadata": {}, "outputs": [ @@ -157,7 +157,7 @@ "import pandas as pd\n", "\n", "tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)\n", - "dataset = load_dataset(dataset_name)\n", + "dataset = load_dataset(dataset_name, revision=\"main\")\n", "train_product_dataset = dataset[\"train\"].to_pandas()[[\"product_title\"]]\n", "val_product_dataset = dataset[\"validation\"].to_pandas()[[\"product_title\"]]\n", "product_dataset_for_indexing = pd.concat([train_product_dataset, val_product_dataset])\n",