update app + inference check passed ✅

#2
Files changed (1) hide show
  1. app.py +8 -5
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import os
2
  import sys
 
3
  import time
4
  import copy
5
  import random
@@ -7,9 +8,9 @@ import torch
7
  import spaces
8
  import requests
9
  import subprocess
10
- import importlib.util
11
  import gradio as gr
12
  from PIL import Image
 
13
  from threading import Thread
14
  from typing import Iterable, Optional, Tuple, List
15
 
@@ -32,7 +33,9 @@ check_and_install_package("transformers", "transformers", "transformers==4.57.3"
32
  print("Done!")
33
 
34
  from transformers import (
35
- Qwen3VLForConditionalGeneration,
 
 
36
  AutoProcessor,
37
  TextIteratorStreamer,
38
  )
@@ -135,11 +138,11 @@ css = """
135
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
136
  print(f"Using Main Device: {device}")
137
 
138
- QWEN_VL_MODEL_ID = "Qwen/Qwen3-VL-8B-Instruct"
139
  print(f"Loading OCR Model: {QWEN_VL_MODEL_ID}...")
140
 
141
  qwen_processor = AutoProcessor.from_pretrained(QWEN_VL_MODEL_ID, trust_remote_code=True)
142
- qwen_model = Qwen3VLForConditionalGeneration.from_pretrained(
143
  QWEN_VL_MODEL_ID,
144
  attn_implementation="flash_attention_2",
145
  trust_remote_code=True,
@@ -362,7 +365,7 @@ with gr.Blocks() as demo:
362
  text_output = gr.Textbox(
363
  label="Extracted Text (Editable)",
364
  interactive=True,
365
- lines=10,
366
  )
367
 
368
  audio_output = gr.Audio(
 
1
  import os
2
  import sys
3
+ import cv2
4
  import time
5
  import copy
6
  import random
 
8
  import spaces
9
  import requests
10
  import subprocess
 
11
  import gradio as gr
12
  from PIL import Image
13
+ import importlib.util
14
  from threading import Thread
15
  from typing import Iterable, Optional, Tuple, List
16
 
 
33
  print("Done!")
34
 
35
  from transformers import (
36
+ Qwen2_5_VLForConditionalGeneration,
37
+ AutoModelForImageTextToText,
38
+ AutoModelForCausalLM,
39
  AutoProcessor,
40
  TextIteratorStreamer,
41
  )
 
138
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
139
  print(f"Using Main Device: {device}")
140
 
141
+ QWEN_VL_MODEL_ID = "Qwen/Qwen2.5-VL-7B-Instruct"
142
  print(f"Loading OCR Model: {QWEN_VL_MODEL_ID}...")
143
 
144
  qwen_processor = AutoProcessor.from_pretrained(QWEN_VL_MODEL_ID, trust_remote_code=True)
145
+ qwen_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
146
  QWEN_VL_MODEL_ID,
147
  attn_implementation="flash_attention_2",
148
  trust_remote_code=True,
 
365
  text_output = gr.Textbox(
366
  label="Extracted Text (Editable)",
367
  interactive=True,
368
+ lines=14,
369
  )
370
 
371
  audio_output = gr.Audio(