Spaces:

Ronith55
/

OCR_deepseek-vl2

Running

File size: 1,184 Bytes

dd44da8
 
c693731
 
7fb9cbe
dd44da8
c693731
7fb9cbe
 
 
 
 
 
 
c693731
7fb9cbe
dd44da8
 
7fb9cbe
dd44da8
 
c693731
dd44da8
 
7fb9cbe
dd44da8
 
7fb9cbe
dd44da8
c693731
dd44da8
 
 
 
c693731

import torch
from transformers import AutoProcessor, AutoModelForVision2Seq
from PIL import Image

# ✅ Define the correct model name from Hugging Face
MODEL_NAME = "deepseek-ai/deepseek-vl2-small"

# ✅ Load processor & model with `trust_remote_code=True`
processor = AutoProcessor.from_pretrained(MODEL_NAME, trust_remote_code=True)
model = AutoModelForVision2Seq.from_pretrained(
    MODEL_NAME, 
    torch_dtype=torch.float16, 
    trust_remote_code=True  # ✅ This allows loading custom model implementations
).to("cuda" if torch.cuda.is_available() else "cpu")

# ✅ Test function to process an image
def predict(image_path):
    image = Image.open(image_path).convert("RGB")

    # Process input
    inputs = processor(images=image, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")

    # Generate output
    output = model.generate(**inputs)

    # Decode response
    generated_text = processor.batch_decode(output, skip_special_tokens=True)[0]

    return generated_text

# ✅ Example Usage
if __name__ == "__main__":
    test_image_path = "test.jpg"  # Replace with an actual image path
    print("Generated Output:", predict(test_image_path))