Spaces:

jennifee
/

Product_First_Principles_Decomposition

Running

App Files Files Community

aslan-ng commited on Oct 6

Commit

76fab70

verified ·

1 Parent(s): 0b0f2cc

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -78

app.py CHANGED Viewed

@@ -305,86 +305,27 @@ def query(input: str, total_examples: int, near_far_ratio: float = 0.5):
 # Example
 print("Example: ", query("water bottle", total_examples=4, near_far_ratio=0.5))
-# 1. Load dataset
-dataset = load_dataset("cwinkler/patents_green_plastics")
-# Split into train/test
-dataset = dataset["train"].train_test_split(test_size=0.2, seed=42)
-train_dataset = dataset["train"]
-test_dataset  = dataset["test"]
-# 2. Tokenizer
-model_name = "distilbert-base-uncased"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-def preprocess(examples):
-    return tokenizer(examples["abstract"], truncation=True, padding="max_length", max_length=128)
-tokenized = dataset.map(preprocess, batched=True)
-tokenized = tokenized.rename_column("label", "labels")
-tokenized.set_format("torch", columns=["input_ids", "attention_mask", "labels"])
-train_dataset = tokenized["train"].shuffle(seed=42).select(range(2000))  # subset for CPU
-test_dataset  = tokenized["test"]
-# 3. Base model
-base_model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
-# 4. LoRA config
-lora_config = LoraConfig(
-    task_type=TaskType.SEQ_CLS,
-    r=8, lora_alpha=16, lora_dropout=0.1, target_modules=["q_lin", "v_lin"]
-)
-model = get_peft_model(base_model, lora_config)
-# 5. Training setup
-import os
-os.environ["WANDB_DISABLED"] = "true"
-args = TrainingArguments(
-    output_dir="./lora-green-patents",
-    do_eval=True,                 # instead of evaluation_strategy
-    eval_steps=500,               # run eval every N steps
-    save_steps=500,               # save checkpoint every N steps
-    learning_rate=2e-4,
-    per_device_train_batch_size=8,
-    per_device_eval_batch_size=8,
-    num_train_epochs=10,
-    logging_steps=20,
-    report_to=None
-)
-trainer = Trainer(
-    model=model,
-    args=args,
-    train_dataset=train_dataset,
-    eval_dataset=test_dataset,
-)
-# 6. Train
-trainer.train()
-# 7. Save adapter
-model.save_pretrained("lora-green-patents")
-tokenizer.save_pretrained("lora-green-patents")
-# 8. Inference
 # Load base + adapter
-base_model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
-model = PeftModel.from_pretrained(base_model, "lora-green-patents")
-clf = pipeline("text-classification", model=model, tokenizer=tokenizer)
-# Examples of patents and products
-texts = [
-    "A biodegradable plastic composition derived from renewable corn starch.",
-    "A new synthetic polymer with enhanced tensile strength."
-    "Refreshing Taste: Every bottle of Pure Life Water is enhanced with minerals for a crisp taste that makes drinking water delicious. 12 pack of 16.9 fl oz water bottles."
-    "This 18/8 stainless steel water bottle is designed to last a lifetime. Plastic free & Eco friendly water bottles are a healthier option for you & the planet! However, Water in stainless steel tastes different than plastic, make sure your taste buds are ready for this healthy switch"
-]
-print(clf(texts))
 ex_waterbottle_text = [
     "A single use case made with fossil fuels and gasoline.",

 # Example
 print("Example: ", query("water bottle", total_examples=4, near_far_ratio=0.5))
 # Load base + adapter
+def lora_load():
+    model_name = "distilbert-base-uncased"      # same base you trained on
+    tokenizer  = AutoTokenizer.from_pretrained(REPO_ID_LORA_GREEN_PATENTS)  # , token=token)
+    base_model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)  # , token=token)
+    model      = PeftModel.from_pretrained(base_model, REPO_ID_LORA_GREEN_PATENTS)  # , token=token)
+    clf = pipeline("text-classification", model=model, tokenizer=tokenizer)
+    # Examples of patents and products  (fixed commas)
+    texts = [
+      "A biodegradable plastic composition derived from renewable corn starch.",
+      "A new synthetic polymer with enhanced tensile strength.",
+      "Refreshing Taste: Every bottle of Pure Life Water is enhanced with minerals for a crisp taste that makes drinking water delicious. 12 pack of 16.9 fl oz water bottles.",
+      "This 18/8 stainless steel water bottle is designed to last a lifetime. Plastic free & Eco friendly water bottles are a healthier option for you & the planet! However, Water in stainless steel tastes different than plastic, make sure your taste buds are ready for this healthy switch"
+    ]
+    print(clf(texts))
+    return clf
+clf = lora_load()
 ex_waterbottle_text = [
     "A single use case made with fossil fuels and gasoline.",