Spaces:

npro65
/

Test1

Sleeping

Test1 / app.py

Update app.py

31bf61a verified 9 days ago

1.6 kB

	import torch
	from transformers import InstructBlipProcessor, InstructBlipForConditionalGeneration
	from PIL import Image
	import gradio as gr

	# 1. Hugging Face model ID
	MODEL_ID = "Salesforce/instructblip-vicuna-7b"

	# 2. Load processor and model
	processor = InstructBlipProcessor.from_pretrained(MODEL_ID)

	device = "cuda" if torch.cuda.is_available() else "cpu"
	dtype = torch.float16 if device == "cuda" else torch.float32

	model = InstructBlipForConditionalGeneration.from_pretrained(
	MODEL_ID,
	torch_dtype=dtype,
	)

	model = model.to(device)

	# 3. Define the function that Gradio will call
	def image_qa_pipeline(image, question):
	if image is None:
	return "Please upload an image."

	if not question or question.strip() == "":
	question = "Describe this image in detail."

	# Preprocess
	inputs = processor(images=image, text=question, return_tensors="pt").to(device)

	# Generate
	with torch.no_grad():
	output_ids = model.generate(
	**inputs,
	max_new_tokens=64,
	num_beams=3,
	)

	answer = processor.tokenizer.decode(output_ids[0], skip_special_tokens=True)
	return answer

	# 4. Build the Gradio interface
	demo = gr.Interface(
	fn=image_qa_pipeline,
	inputs=[
	gr.Image(type="pil", label="Upload an image"),
	gr.Textbox(label="Question about the image"),
	],
	outputs=gr.Textbox(label="Answer"),
	title="Multimodal Image Q&A Assistant",
	description="Upload an image and ask a question about it."
	)

	# 5. Launch
	if __name__ == "__main__":
	demo.launch()