Spaces:

Ronith55
/

OCR_deepseek-vl2

Sleeping

OCR_deepseek-vl2 / app.py

Update app.py

7fb9cbe verified 10 months ago

1.18 kB

	import torch
	from transformers import AutoProcessor, AutoModelForVision2Seq
	from PIL import Image

	# ✅ Define the correct model name from Hugging Face
	MODEL_NAME = "deepseek-ai/deepseek-vl2-small"

	# ✅ Load processor & model with `trust_remote_code=True`
	processor = AutoProcessor.from_pretrained(MODEL_NAME, trust_remote_code=True)
	model = AutoModelForVision2Seq.from_pretrained(
	MODEL_NAME,
	torch_dtype=torch.float16,
	trust_remote_code=True # ✅ This allows loading custom model implementations
	).to("cuda" if torch.cuda.is_available() else "cpu")

	# ✅ Test function to process an image
	def predict(image_path):
	image = Image.open(image_path).convert("RGB")

	# Process input
	inputs = processor(images=image, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")

	# Generate output
	output = model.generate(**inputs)

	# Decode response
	generated_text = processor.batch_decode(output, skip_special_tokens=True)[0]

	return generated_text

	# ✅ Example Usage
	if __name__ == "__main__":
	test_image_path = "test.jpg" # Replace with an actual image path
	print("Generated Output:", predict(test_image_path))