Spaces:

pdufour
/

Qwen2-VL-2B-Instruct-ONNX-Q4-F16

Running

pdufour commited on Nov 19, 2024

Commit

4c42c50

verified ·

1 Parent(s): 828d25e

Update index.js

Files changed (1) hide show

index.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import { env, AutoTokenizer, RawImage, Tensor, getSession } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers';
-import { getModelJSON } from "https://cdn.jsdelivr.net/npm/@huggingface/[email protected]/src/utils/hub.js";
 import * as ort from "https://cdn.jsdelivr.net/npm/[email protected]/dist/ort.webgpu.mjs";
 // Since we will download the model from the Hugging Face Hub, we can skip the local model check
@@ -19,12 +19,10 @@ const IMAGE_EMBED_SIZE = WIDTH_FACTOR * HEIGHT_FACTOR;
 const MAX_SEQ_LENGTH = 1024;
 const ONNX_URL = "http://localhost:3004/onnx";
 const BASE_MODEL = "Qwen/Qwen2-VL-2B-Instruct";
-const QUANTIZATION = "q4f16";
 const MAX_SINGLE_CHAT_LENGTH = 10;
-// const ONNX_MODEL_BASE_URL=
-console.log(getSession);
 status.textContent = 'Loading model...';
 status.textContent = 'Ready';
@@ -47,7 +45,6 @@ fileUpload.addEventListener('change', function (e) {
     reader.readAsDataURL(file);
 });
 async function parse(img, txt) {
     imageContainer.innerHTML = '';
     imageContainer.style.backgroundImage = `url(${img})`;
@@ -127,7 +124,7 @@ export async function imageTextToText(
     const pixel_values = image.unsqueeze(0);
     const ortSessionA = await ort.InferenceSession.create(
-      `${ONNX_MODEL_BASE_URL}/QwenVL_A${suffix}.onnx`,
       { executionProviders: ["webgpu"] }
     );

 import { env, AutoTokenizer, RawImage, Tensor, getSession } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers';
+import { getModelJSON, getModelFile } from "https://cdn.jsdelivr.net/npm/@huggingface/[email protected]/src/utils/hub.js";
 import * as ort from "https://cdn.jsdelivr.net/npm/[email protected]/dist/ort.webgpu.mjs";
 // Since we will download the model from the Hugging Face Hub, we can skip the local model check
 const MAX_SEQ_LENGTH = 1024;
 const ONNX_URL = "http://localhost:3004/onnx";
 const BASE_MODEL = "Qwen/Qwen2-VL-2B-Instruct";
+const ONNX_MODEL = "pdufour/Qwen2-VL-2B-Instruct-ONNX-Q4-F16";
+const QUANT = "q4f16";
 const MAX_SINGLE_CHAT_LENGTH = 10;
 status.textContent = 'Loading model...';
 status.textContent = 'Ready';
     reader.readAsDataURL(file);
 });
 async function parse(img, txt) {
     imageContainer.innerHTML = '';
     imageContainer.style.backgroundImage = `url(${img})`;
     const pixel_values = image.unsqueeze(0);
     const ortSessionA = await ort.InferenceSession.create(
+      await getModelFile(ONNX_MODEL, "onnx/QwenVL_A_${QUANT}.onnx"),
       { executionProviders: ["webgpu"] }
     );