Spaces:
Running
on
Zero
Running
on
Zero
update app + inference check passed ✅
#2
by
prithivMLmods
- opened
app.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
import os
|
| 2 |
import sys
|
|
|
|
| 3 |
import time
|
| 4 |
import copy
|
| 5 |
import random
|
|
@@ -7,9 +8,9 @@ import torch
|
|
| 7 |
import spaces
|
| 8 |
import requests
|
| 9 |
import subprocess
|
| 10 |
-
import importlib.util
|
| 11 |
import gradio as gr
|
| 12 |
from PIL import Image
|
|
|
|
| 13 |
from threading import Thread
|
| 14 |
from typing import Iterable, Optional, Tuple, List
|
| 15 |
|
|
@@ -32,7 +33,9 @@ check_and_install_package("transformers", "transformers", "transformers==4.57.3"
|
|
| 32 |
print("Done!")
|
| 33 |
|
| 34 |
from transformers import (
|
| 35 |
-
|
|
|
|
|
|
|
| 36 |
AutoProcessor,
|
| 37 |
TextIteratorStreamer,
|
| 38 |
)
|
|
@@ -135,11 +138,11 @@ css = """
|
|
| 135 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
| 136 |
print(f"Using Main Device: {device}")
|
| 137 |
|
| 138 |
-
QWEN_VL_MODEL_ID = "Qwen/
|
| 139 |
print(f"Loading OCR Model: {QWEN_VL_MODEL_ID}...")
|
| 140 |
|
| 141 |
qwen_processor = AutoProcessor.from_pretrained(QWEN_VL_MODEL_ID, trust_remote_code=True)
|
| 142 |
-
qwen_model =
|
| 143 |
QWEN_VL_MODEL_ID,
|
| 144 |
attn_implementation="flash_attention_2",
|
| 145 |
trust_remote_code=True,
|
|
@@ -362,7 +365,7 @@ with gr.Blocks() as demo:
|
|
| 362 |
text_output = gr.Textbox(
|
| 363 |
label="Extracted Text (Editable)",
|
| 364 |
interactive=True,
|
| 365 |
-
lines=
|
| 366 |
)
|
| 367 |
|
| 368 |
audio_output = gr.Audio(
|
|
|
|
| 1 |
import os
|
| 2 |
import sys
|
| 3 |
+
import cv2
|
| 4 |
import time
|
| 5 |
import copy
|
| 6 |
import random
|
|
|
|
| 8 |
import spaces
|
| 9 |
import requests
|
| 10 |
import subprocess
|
|
|
|
| 11 |
import gradio as gr
|
| 12 |
from PIL import Image
|
| 13 |
+
import importlib.util
|
| 14 |
from threading import Thread
|
| 15 |
from typing import Iterable, Optional, Tuple, List
|
| 16 |
|
|
|
|
| 33 |
print("Done!")
|
| 34 |
|
| 35 |
from transformers import (
|
| 36 |
+
Qwen2_5_VLForConditionalGeneration,
|
| 37 |
+
AutoModelForImageTextToText,
|
| 38 |
+
AutoModelForCausalLM,
|
| 39 |
AutoProcessor,
|
| 40 |
TextIteratorStreamer,
|
| 41 |
)
|
|
|
|
| 138 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
| 139 |
print(f"Using Main Device: {device}")
|
| 140 |
|
| 141 |
+
QWEN_VL_MODEL_ID = "Qwen/Qwen2.5-VL-7B-Instruct"
|
| 142 |
print(f"Loading OCR Model: {QWEN_VL_MODEL_ID}...")
|
| 143 |
|
| 144 |
qwen_processor = AutoProcessor.from_pretrained(QWEN_VL_MODEL_ID, trust_remote_code=True)
|
| 145 |
+
qwen_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 146 |
QWEN_VL_MODEL_ID,
|
| 147 |
attn_implementation="flash_attention_2",
|
| 148 |
trust_remote_code=True,
|
|
|
|
| 365 |
text_output = gr.Textbox(
|
| 366 |
label="Extracted Text (Editable)",
|
| 367 |
interactive=True,
|
| 368 |
+
lines=14,
|
| 369 |
)
|
| 370 |
|
| 371 |
audio_output = gr.Audio(
|