Spaces:

pdufour
/

Qwen2-VL-2B-Instruct-ONNX-Q4-F16

Running

pdufour commited on Nov 19, 2024

Commit

f9cf2b4

verified ·

1 Parent(s): dd6299d

Update index.js

Files changed (1) hide show

index.js CHANGED Viewed

@@ -48,21 +48,6 @@ async function initializeSessions() {
     { executionProviders: ["webgpu"] }
   );
-  ortSessionD = await ort.InferenceSession.create(
-    await getModelFile(ONNX_MODEL, `onnx/QwenVL_D_${QUANT}.onnx`),
-    {
-      executionProviders: ["webgpu"],
-    }
-  );
-  ortSessionE = await ort.InferenceSession.create(
-    await getModelFile(ONNX_MODEL, `onnx/QwenVL_E_${QUANT}.onnx`),
-    {
-      executionProviders: ["webgpu"],
-    },
-  );
   config = (await getModelJSON(BASE_MODEL, "config.json"));
   status.textContent = 'Ready';
@@ -255,6 +240,13 @@ export async function imageTextToText(
     await ortSessionA.release();
     ortSessionA = null;
     ({ hidden_states, position_ids } = await ortSessionD.run({
       "hidden_states.1": hidden_states,
@@ -276,6 +268,15 @@ export async function imageTextToText(
   ) {
     let token_id;
     ({
       max_logit_ids: token_id,
       past_key_states: past_key_states,

     { executionProviders: ["webgpu"] }
   );
   config = (await getModelJSON(BASE_MODEL, "config.json"));
   status.textContent = 'Ready';
     await ortSessionA.release();
     ortSessionA = null;
+    ortSessionD = await ort.InferenceSession.create(
+      await getModelFile(ONNX_MODEL, `onnx/QwenVL_D_${QUANT}.onnx`),
+      {
+        executionProviders: ["webgpu"],
+      }
+    );
     ({ hidden_states, position_ids } = await ortSessionD.run({
       "hidden_states.1": hidden_states,
   ) {
     let token_id;
+    if (!ortSessionE) {
+      ortSessionE = await ort.InferenceSession.create(
+        await getModelFile(ONNX_MODEL, `onnx/QwenVL_E_${QUANT}.onnx`),
+        {
+          executionProviders: ["webgpu"],
+        },
+      );
+    }
     ({
       max_logit_ids: token_id,
       past_key_states: past_key_states,