from transformers import AutoImageProcessor, SiglipForImageClassification from PIL import Image import torch import cv2 import os import gradio as gr # Load model and processor model_name = "prithivMLmods/deepfake-detector-model-v1" model = SiglipForImageClassification.from_pretrained(model_name) processor = AutoImageProcessor.from_pretrained(model_name) # Updated label mapping id2label = { "0": "fake", "1": "real" } def classify_image(image): image = Image.fromarray(image).convert("RGB") inputs = processor(images=image, return_tensors="pt") with torch.no_grad(): outputs = model(**inputs) logits = outputs.logits probs = torch.nn.functional.softmax(logits, dim=1).squeeze().tolist() prediction = { id2label[str(i)]: round(probs[i], 3) for i in range(len(probs)) } return prediction def sliceFrames(cap): frame_count = 0 frames = [] while True: ret, frame = cap.read() if not ret: break # Save every 10th frame if frame_count % 10 == 0: frames.append(frame) frame_count += 1 cap.release() return frames def classify_video(video_path): cap = cv2.VideoCapture(video_path) if not cap.isOpened(): return {"error": "Could not open video."} frames = sliceFrames(cap) totalfake = 0 totalreal = 0 for frame in frames: prediction = classify_image(frame) totalfake += prediction["fake"] totalreal += prediction["real"] avg_fake = totalfake / len(frames) if frames else 0 avg_real = totalreal / len(frames) if frames else 0 return { "average_fake": round(avg_fake, 3), "average_real": round(avg_real, 3), } # Gradio Interface def gradio_interface(video_file): return classify_video(video_file) iface = gr.Interface( fn=gradio_interface, inputs=gr.Video(label="Upload a video"), outputs=gr.JSON(label="Prediction"), title="Deepfake Detector", description="Upload a video to check if it's real or fake." ) if __name__ == "__main__": iface.launch()