rachitrk commited on
Commit
ef16f91
·
verified ·
1 Parent(s): 0b0989c

Upload 7 files

Browse files
Files changed (7) hide show
  1. .huggingface.yml +6 -0
  2. Dockerfile +20 -0
  3. image_model_core.py +494 -0
  4. main.py +157 -0
  5. model_helper.py +179 -0
  6. requirements.txt +18 -0
  7. temporal_model.py +25 -0
.huggingface.yml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ sdk: docker
2
+ app_port: 7860
3
+ title: DeepGuard - Deepfake Detection API
4
+ emoji: 🧠
5
+ colorFrom: blue
6
+ colorTo: indigo
Dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -----------------------------
2
+ # 🧠 DeepGuard - Python ML Backend (Docker for Hugging Face)
3
+ # -----------------------------
4
+
5
+ FROM python:3.10-slim
6
+
7
+ # Install dependencies
8
+ RUN apt-get update && apt-get install -y \
9
+ ffmpeg libsm6 libxext6 git && \
10
+ rm -rf /var/lib/apt/lists/*
11
+
12
+ WORKDIR /app
13
+ COPY . .
14
+
15
+ RUN pip install --no-cache-dir -r requirements.txt
16
+
17
+ # Hugging Face Spaces uses port 7860
18
+ EXPOSE 7860
19
+
20
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
image_model_core.py ADDED
@@ -0,0 +1,494 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # # image_model_core.py
2
+ # import os
3
+ # import logging
4
+ # import warnings
5
+ # import numpy as np
6
+ # from PIL import Image
7
+ # import cv2
8
+ # import torch
9
+ # from transformers import AutoImageProcessor, AutoModelForImageClassification
10
+
11
+ # warnings.filterwarnings("ignore")
12
+ # logger = logging.getLogger(__name__)
13
+ # logger.setLevel(logging.INFO)
14
+
15
+ # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
+ # logger.info(f"Using device: {device}")
17
+
18
+ # # --- Face detector: prefer RetinaFace if installed, otherwise fallback to MTCNN ---
19
+ # USE_RETINA = False
20
+ # try:
21
+ # # retina-face package (pip install retina-face)
22
+ # from retinaface import RetinaFace
23
+ # USE_RETINA = True
24
+ # logger.info("Using RetinaFace for face detection (retina-face).")
25
+ # except Exception:
26
+ # try:
27
+ # # alternative retinaface implementation
28
+ # from retinaface_pytorch import RetinaFaceDetector # optional naming
29
+ # USE_RETINA = True
30
+ # logger.info("Using retinaface-pytorch for face detection.")
31
+ # except Exception:
32
+ # USE_RETINA = False
33
+
34
+ # if not USE_RETINA:
35
+ # try:
36
+ # from facenet_pytorch import MTCNN
37
+ # mtcnn = MTCNN(keep_all=False, device=device)
38
+ # logger.info("RetinaFace not available — falling back to MTCNN.")
39
+ # except Exception:
40
+ # mtcnn = None
41
+ # logger.warning("No RetinaFace or MTCNN available — face detection will be very basic.")
42
+
43
+
44
+ # # ---------- Models ----------
45
+ # # Replace the invalid non-existing model id with a working prithiv model or other public deepfake models.
46
+ # MODEL_PATHS = [
47
+ # # balanced ensemble: CNN-style deepfake (prithiv), ViT-based and BEiT-based
48
+ # "prithivMLmods/deepfake-detector-model-v1", # (public prithiv variant) — fallback to a valid prithiv model
49
+ # "Wvolf/ViT_Deepfake_Detection",
50
+ # "microsoft/beit-large-patch16-224-pt22k-ft22k"
51
+ # ]
52
+
53
+ # models = []
54
+ # processors = []
55
+ # for mid in MODEL_PATHS:
56
+ # try:
57
+ # proc = AutoImageProcessor.from_pretrained(mid)
58
+ # model = AutoModelForImageClassification.from_pretrained(mid).to(device)
59
+ # model.eval()
60
+ # models.append(model)
61
+ # processors.append(proc)
62
+ # logger.info(f"✅ Loaded image model: {mid}")
63
+ # except Exception as e:
64
+ # logger.warning(f"⚠️ Failed to load model {mid}: {e}")
65
+
66
+ # if len(models) == 0:
67
+ # logger.error("No models could be loaded. Please check MODEL_PATHS and internet / HF auth.")
68
+
69
+
70
+ # # ---------- Heuristics (optimized) ----------
71
+ # def _frequency_artifact_score(face_bgr):
72
+ # # faster but stable frequency heuristic
73
+ # gray = cv2.cvtColor(face_bgr, cv2.COLOR_BGR2GRAY)
74
+ # # downsample to small size for FFT to speed up
75
+ # small = cv2.resize(gray, (64, 64), interpolation=cv2.INTER_LINEAR)
76
+ # f = np.fft.fft2(small)
77
+ # fshift = np.fft.fftshift(f)
78
+ # mag = np.log(np.abs(fshift) + 1)
79
+ # high_freq = np.mean(mag[32:, 32:])
80
+ # return float(np.clip(high_freq / 6.0, 0, 1))
81
+
82
+
83
+ # def _illumination_consistency(face_bgr):
84
+ # lab = cv2.cvtColor(face_bgr, cv2.COLOR_BGR2LAB)
85
+ # l_std = np.std(lab[:, :, 0])
86
+ # return float(np.clip(l_std / 64.0, 0, 1))
87
+
88
+
89
+ # def _edge_density(face_bgr):
90
+ # gray = cv2.cvtColor(face_bgr, cv2.COLOR_BGR2GRAY)
91
+ # edges = cv2.Canny(gray, 80, 160)
92
+ # return float(np.clip(np.mean(edges) / 255.0 * 2.0, 0, 1))
93
+
94
+
95
+ # def aggregate_heuristics(face_bgr):
96
+ # # compute all using the same precomputed gray if needed
97
+ # try:
98
+ # return float(np.mean([
99
+ # _frequency_artifact_score(face_bgr),
100
+ # _illumination_consistency(face_bgr),
101
+ # _edge_density(face_bgr)
102
+ # ]))
103
+ # except Exception as e:
104
+ # logger.warning(f"Heuristic error: {e}")
105
+ # return 0.0
106
+
107
+
108
+ # # ---------- Face extraction (robust) ----------
109
+ # def _detect_face_boxes(img_bgr):
110
+ # """
111
+ # Return list of bounding boxes in x1,y1,x2,y2 format.
112
+ # """
113
+ # h, w = img_bgr.shape[:2]
114
+ # boxes = []
115
+ # if USE_RETINA:
116
+ # try:
117
+ # # retinaface returns dict or list depending on implementation
118
+ # dets = RetinaFace.detect_faces(img_bgr, align=False)
119
+ # # for many retinaface wrappers dets is dict with keys being faceIDs
120
+ # if isinstance(dets, dict):
121
+ # for k, v in dets.items():
122
+ # bb = v.get("facial_area") or v.get("bbox") or None
123
+ # if bb:
124
+ # x1, y1, x2, y2 = bb
125
+ # boxes.append([max(0, int(x1)), max(0, int(y1)), min(w, int(x2)), min(h, int(y2))])
126
+ # elif isinstance(dets, list):
127
+ # for d in dets:
128
+ # if len(d) >= 4:
129
+ # x1, y1, x2, y2 = d[:4]
130
+ # boxes.append([max(0, int(x1)), max(0, int(y1)), min(w, int(x2)), min(h, int(y2))])
131
+ # except Exception:
132
+ # # some retina wrappers expect RGB; attempt conversion fallback
133
+ # try:
134
+ # dets = RetinaFace.detect_faces(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB), align=False)
135
+ # if isinstance(dets, dict):
136
+ # for k, v in dets.items():
137
+ # bb = v.get("facial_area") or v.get("bbox") or None
138
+ # if bb:
139
+ # x1, y1, x2, y2 = bb
140
+ # boxes.append([max(0, int(x1)), max(0, int(y1)), min(w, int(x2)), min(h, int(y2))])
141
+ # except Exception as ex:
142
+ # logger.warning(f"RetinaFace detect exception: {ex}")
143
+ # else:
144
+ # if mtcnn is not None:
145
+ # try:
146
+ # # mtcnn.detect expects RGB
147
+ # boxes_mt, _ = mtcnn.detect(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB))
148
+ # if boxes_mt is not None:
149
+ # for b in boxes_mt:
150
+ # x1, y1, x2, y2 = [int(max(0, val)) for val in b]
151
+ # boxes.append([x1, y1, x2, y2])
152
+ # except Exception as e:
153
+ # logger.warning(f"MTCNN detect failure: {e}")
154
+ # # clamp and filter
155
+ # clean_boxes = []
156
+ # for (x1, y1, x2, y2) in boxes:
157
+ # if x2 - x1 < 10 or y2 - y1 < 10: # tiny box
158
+ # continue
159
+ # if x1 < 0 or y1 < 0 or x2 <= x1 or y2 <= y1:
160
+ # continue
161
+ # clean_boxes.append([x1, y1, x2, y2])
162
+ # return clean_boxes
163
+
164
+
165
+ # def _extract_face_region(img_bgr):
166
+ # boxes = _detect_face_boxes(img_bgr)
167
+ # if not boxes:
168
+ # return None
169
+ # # pick the largest box
170
+ # boxes = sorted(boxes, key=lambda b: (b[2] - b[0]) * (b[3] - b[1]), reverse=True)
171
+ # x1, y1, x2, y2 = boxes[0]
172
+ # # safe clamp
173
+ # h, w = img_bgr.shape[:2]
174
+ # x1, y1, x2, y2 = max(0, x1), max(0, y1), min(w, x2), min(h, y2)
175
+ # face = img_bgr[y1:y2, x1:x2]
176
+ # if face is None or face.size == 0:
177
+ # return None
178
+ # face = cv2.resize(face, (224, 224), interpolation=cv2.INTER_AREA)
179
+ # return face
180
+
181
+
182
+ # # ---------- Batched inference helper ----------
183
+ # def _batched_model_predict(pil_images, batch_size=8):
184
+ # """
185
+ # pil_images: list[PIL.Image]
186
+ # returns: list of per-image composite scores between 0..1 where higher means more "fake"
187
+ # """
188
+ # if len(models) == 0:
189
+ # return [0.0] * len(pil_images)
190
+
191
+ # # For each model, produce per-image probabilities; then ensemble across models
192
+ # all_model_scores = [] # shape: (n_models, n_images)
193
+ # for model, proc in zip(models, processors):
194
+ # try:
195
+ # inputs = proc(images=pil_images, return_tensors="pt", padding=True).to(device)
196
+ # # If inputs are large, split by batch
197
+ # logits = None
198
+ # with torch.no_grad():
199
+ # logits = model(**inputs).logits # (batch, classes)
200
+ # probs = torch.nn.functional.softmax(logits, dim=-1).cpu().numpy()
201
+ # # pick predicted class confidence mapped to "fakeness"
202
+ # id2label = model.config.id2label
203
+ # per_img_scores = []
204
+ # for p in probs:
205
+ # label_idx = int(np.argmax(p))
206
+ # label = str(id2label.get(str(label_idx), id2label.get(label_idx, "unknown"))).lower()
207
+ # is_fake = any(k in label for k in ["fake", "manipulated", "forged", "edited"])
208
+ # conf = float(p[label_idx])
209
+ # score = conf if is_fake else 1.0 - conf
210
+ # per_img_scores.append(score)
211
+ # all_model_scores.append(per_img_scores)
212
+ # except Exception as e:
213
+ # logger.warning(f"Model batch predict failed: {e}")
214
+ # # fallback: zeros
215
+ # all_model_scores.append([0.0] * len(pil_images))
216
+
217
+ # # ensemble across models
218
+ # all_model_scores = np.array(all_model_scores) # shape (m, n)
219
+ # # weights proportional to number of models loaded (keep default relative weights)
220
+ # base_weights = np.array([0.4, 0.35, 0.25])[:all_model_scores.shape[0]]
221
+ # if base_weights.sum() == 0:
222
+ # base_weights = np.ones(all_model_scores.shape[0]) / all_model_scores.shape[0]
223
+ # else:
224
+ # base_weights = base_weights / base_weights.sum()
225
+ # weighted = np.dot(base_weights, all_model_scores) # size n
226
+ # return weighted.tolist()
227
+
228
+
229
+ # # ---------- Public API ----------
230
+ # def predict_image(image_path):
231
+ # """
232
+ # Main image-level API (synchronous)
233
+ # Returns dict compatible with your existing responses:
234
+ # { "top": {"label": "fake"/"real", "score": 0.xx}, "model_score": ..., "heuristic_score": ..., "source":"image" }
235
+ # """
236
+ # try:
237
+ # img_bgr = cv2.imread(image_path)
238
+ # if img_bgr is None:
239
+ # return {"error": "cannot_read_image"}
240
+
241
+ # face = _extract_face_region(img_bgr)
242
+ # if face is None:
243
+ # # fallback: resize whole image
244
+ # try:
245
+ # face = cv2.resize(img_bgr, (224, 224), interpolation=cv2.INTER_AREA)
246
+ # except Exception:
247
+ # return {"error": "no_face_detected"}
248
+
249
+ # pil = Image.fromarray(cv2.cvtColor(face, cv2.COLOR_BGR2RGB))
250
+ # model_scores = _batched_model_predict([pil]) # returns list len 1
251
+ # model_score = float(model_scores[0])
252
+ # heuristic_score = aggregate_heuristics(face)
253
+ # final = float(np.clip(0.85 * model_score + 0.15 * heuristic_score, 0, 1))
254
+ # label = "fake" if final > 0.55 else "real"
255
+
256
+ # return {
257
+ # "top": {"label": label, "score": round(final, 4)},
258
+ # "model_score": round(model_score, 4),
259
+ # "heuristic_score": round(heuristic_score, 4),
260
+ # "source": "image"
261
+ # }
262
+ # except Exception as e:
263
+ # logger.exception("predict_image failed")
264
+ # return {"error": str(e)}
265
+
266
+
267
+ # ############################3333333333333333333333333333333333333333333333333#############################################################################################################################################333333333333333333
268
+
269
+
270
+ # image_model_core.py
271
+ """
272
+ Image detection core (accuracy-first).
273
+ - Uses RetinaFace preferred, otherwise MTCNN.
274
+ - Runs batched inference (but for single image it's small).
275
+ - Uses more model weight on model outputs (0.85) and heuristics 0.15.
276
+ """
277
+
278
+ import os
279
+ import logging
280
+ import warnings
281
+ import numpy as np
282
+ from PIL import Image
283
+ import cv2
284
+ import torch
285
+ from dotenv import load_dotenv
286
+ from transformers import AutoImageProcessor, AutoModelForImageClassification
287
+
288
+
289
+ warnings.filterwarnings("ignore")
290
+ logger = logging.getLogger(__name__)
291
+ logger.setLevel(logging.INFO)
292
+
293
+ load_dotenv()
294
+
295
+
296
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
297
+ logger.info(f"Device for image_model_core: {device}")
298
+
299
+ # Prefer RetinaFace, else MTCNN (same approach as model_helper)
300
+ USE_RETINA = False
301
+ try:
302
+ from retinaface import RetinaFace
303
+ USE_RETINA = True
304
+ logger.info("Using RetinaFace for image face detection.")
305
+ except Exception:
306
+ try:
307
+ from facenet_pytorch import MTCNN
308
+ mtcnn = MTCNN(keep_all=False, device=device)
309
+ logger.info("RetinaFace not available — falling back to MTCNN for image pipeline.")
310
+ except Exception:
311
+ mtcnn = None
312
+ logger.warning("No RetinaFace or MTCNN available — image face detection will be basic.")
313
+
314
+ # models (same ensemble)
315
+ MODEL_PATHS = [
316
+
317
+ os.getenv("IMAGE_MODEL_1"),
318
+ os.getenv("IMAGE_MODEL_2"),
319
+ os.getenv("IMAGE_MODEL_3")
320
+ ]
321
+
322
+ models = []
323
+ processors = []
324
+ def load_image_models():
325
+ global models, processors
326
+ models = []
327
+ processors = []
328
+ for mid in MODEL_PATHS:
329
+ try:
330
+ proc = AutoImageProcessor.from_pretrained(mid, trust_remote_code=False)
331
+ model = AutoModelForImageClassification.from_pretrained(mid).to(device)
332
+ model.eval()
333
+ models.append(model)
334
+ processors.append(proc)
335
+ logger.info(f"✅ Loaded image model: {mid.split('/')[-1]}")
336
+ except Exception as e:
337
+ logger.warning(f"⚠️ Failed to load model {mid}: {e}")
338
+
339
+ load_image_models()
340
+ if len(models) == 0:
341
+ logger.error("No image models loaded. Image detection disabled until models are present.")
342
+
343
+
344
+ # --------------- heuristics ----------------
345
+ def _frequency_artifact_score(face_bgr):
346
+ gray = cv2.cvtColor(face_bgr, cv2.COLOR_BGR2GRAY)
347
+ small = cv2.resize(gray, (64,64), interpolation=cv2.INTER_LINEAR)
348
+ f = np.fft.fft2(small)
349
+ fshift = np.fft.fftshift(f)
350
+ mag = np.log(np.abs(fshift) + 1)
351
+ high_freq = np.mean(mag[32:, 32:]) if mag.shape[0] > 32 else np.mean(mag)
352
+ return float(np.clip(high_freq / 6.0, 0, 1))
353
+
354
+
355
+ def _illumination_consistency(face_bgr):
356
+ lab = cv2.cvtColor(face_bgr, cv2.COLOR_BGR2LAB)
357
+ l_std = np.std(lab[:,:,0])
358
+ return float(np.clip(l_std / 64.0, 0, 1))
359
+
360
+
361
+ def _edge_density(face_bgr):
362
+ gray = cv2.cvtColor(face_bgr, cv2.COLOR_BGR2GRAY)
363
+ edges = cv2.Canny(gray, 80, 160)
364
+ return float(np.clip(np.mean(edges) / 255.0 * 2.0, 0, 1))
365
+
366
+
367
+ def aggregate_heuristics(face_bgr):
368
+ try:
369
+ return float(np.mean([_frequency_artifact_score(face_bgr),
370
+ _illumination_consistency(face_bgr),
371
+ _edge_density(face_bgr)]))
372
+ except Exception as e:
373
+ logger.warning(f"Heuristic error: {e}")
374
+ return 0.0
375
+
376
+
377
+ # ---------------- face extraction -------------
378
+ def _detect_face_boxes(img_bgr):
379
+ h,w = img_bgr.shape[:2]
380
+ boxes = []
381
+ if USE_RETINA:
382
+ try:
383
+ dets = RetinaFace.detect_faces(img_bgr, align=False)
384
+ if isinstance(dets, dict):
385
+ for k,v in dets.items():
386
+ bb = v.get("facial_area") or v.get("bbox")
387
+ if bb:
388
+ x1,y1,x2,y2 = bb
389
+ boxes.append([max(0,int(x1)), max(0,int(y1)), min(w,int(x2)), min(h,int(y2))])
390
+ elif isinstance(dets, list):
391
+ for d in dets:
392
+ if len(d) >= 4:
393
+ x1,y1,x2,y2 = d[:4]
394
+ boxes.append([max(0,int(x1)), max(0,int(y1)), min(w,int(x2)), min(h,int(y2))])
395
+ except Exception as e:
396
+ logger.debug(f"RetinaFace detection error (image): {e}")
397
+ elif 'mtcnn' in globals() and mtcnn is not None:
398
+ try:
399
+ boxes_mt, _ = mtcnn.detect(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB))
400
+ if boxes_mt is not None:
401
+ for b in boxes_mt:
402
+ x1,y1,x2,y2 = [int(max(0,val)) for val in b]
403
+ boxes.append([x1,y1,x2,y2])
404
+ except Exception as e:
405
+ logger.debug(f"MTCNN detection error (image): {e}")
406
+ cleaned = []
407
+ for x1,y1,x2,y2 in boxes:
408
+ if x2-x1 < 12 or y2-y1 < 12: continue
409
+ if x1<0 or y1<0 or x2<=x1 or y2<=y1: continue
410
+ cleaned.append([x1,y1,x2,y2])
411
+ return cleaned
412
+
413
+
414
+ def _extract_face_region(img_bgr):
415
+ boxes = _detect_face_boxes(img_bgr)
416
+ if not boxes:
417
+ return None
418
+ boxes = sorted(boxes, key=lambda b: (b[2]-b[0])*(b[3]-b[1]), reverse=True)
419
+ x1,y1,x2,y2 = boxes[0]
420
+ h,w = img_bgr.shape[:2]
421
+ x1,y1,x2,y2 = max(0,x1), max(0,y1), min(w,x2), min(h,y2)
422
+ face = img_bgr[y1:y2, x1:x2]
423
+ if face is None or face.size == 0: return None
424
+ face = cv2.resize(face, (224,224), interpolation=cv2.INTER_AREA)
425
+ return face
426
+
427
+
428
+ # ---------------- batched inference helper -------------
429
+ def _batched_model_predict(pil_images):
430
+ if len(models) == 0:
431
+ return [0.0] * len(pil_images)
432
+ per_model_outputs = []
433
+ for model, proc in zip(models, processors):
434
+ try:
435
+ inputs = proc(images=pil_images, return_tensors="pt", padding=True).to(device)
436
+ with torch.no_grad():
437
+ if device.type == "cuda":
438
+ with torch.cuda.amp.autocast():
439
+ logits = model(**inputs).logits
440
+ else:
441
+ logits = model(**inputs).logits
442
+ probs = torch.nn.functional.softmax(logits, dim=-1).cpu().numpy()
443
+ id2label = getattr(model.config, "id2label", {}) or {}
444
+ out_scores = []
445
+ for p in probs:
446
+ idx = int(np.argmax(p))
447
+ label = str(id2label.get(str(idx), id2label.get(idx, "unknown"))).lower()
448
+ is_fake = any(k in label for k in ["fake","manipulated","forged","edited"])
449
+ conf = float(p[idx])
450
+ out_scores.append(conf if is_fake else 1.0 - conf)
451
+ per_model_outputs.append(out_scores)
452
+ except Exception as e:
453
+ logger.warning(f"Model batch predict failed (image): {e}")
454
+ per_model_outputs.append([0.0]*len(pil_images))
455
+
456
+ all_scores = np.array(per_model_outputs)
457
+ base_weights = np.array([0.4, 0.35, 0.25])[:all_scores.shape[0]]
458
+ if base_weights.sum() == 0:
459
+ base_weights = np.ones(all_scores.shape[0]) / all_scores.shape[0]
460
+ else:
461
+ base_weights = base_weights / base_weights.sum()
462
+ weighted = np.dot(base_weights, all_scores)
463
+ return weighted.tolist()
464
+
465
+
466
+ # ---------------- public API ----------------
467
+ def predict_image(image_path):
468
+ try:
469
+ img_bgr = cv2.imread(image_path)
470
+ if img_bgr is None:
471
+ return {"error": "cannot_read_image"}
472
+ face = _extract_face_region(img_bgr)
473
+ if face is None:
474
+ # fallback: whole image attempted
475
+ try:
476
+ face = cv2.resize(img_bgr, (224,224), interpolation=cv2.INTER_AREA)
477
+ except Exception:
478
+ return {"error": "no_face_detected"}
479
+
480
+ pil = Image.fromarray(cv2.cvtColor(face, cv2.COLOR_BGR2RGB))
481
+ model_scores = _batched_model_predict([pil])
482
+ model_score = float(model_scores[0])
483
+ heuristic_score = aggregate_heuristics(face)
484
+ final = float(np.clip(0.85 * model_score + 0.15 * heuristic_score, 0, 1))
485
+ label = "fake" if final > 0.55 else "real"
486
+ return {
487
+ "top": {"label": label, "score": round(final, 4)},
488
+ "model_score": round(model_score, 4),
489
+ "heuristic_score": round(heuristic_score, 4),
490
+ "source": "image"
491
+ }
492
+ except Exception as e:
493
+ logger.exception("predict_image failed")
494
+ return {"error": str(e)}
main.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ #***********************************************************************************************************************************************
4
+
5
+
6
+ from fastapi import FastAPI, UploadFile, File
7
+ from fastapi.middleware.cors import CORSMiddleware
8
+ import uvicorn, tempfile, cv2, numpy as np, logging
9
+ from model_helper import ensemble_predict_from_path
10
+ from image_model_core import predict_image
11
+
12
+ # ------------------------------
13
+ # ⚙️ App Setup
14
+ # ------------------------------
15
+ app = FastAPI(title="Deepfake Detection API", version="2.0")
16
+
17
+ app.add_middleware(
18
+ CORSMiddleware,
19
+ allow_origins=["*"],
20
+ allow_credentials=True,
21
+ allow_methods=["*"],
22
+ allow_headers=["*"],
23
+ )
24
+
25
+ # ------------------------------
26
+ # 🪵 Logging
27
+ # ------------------------------
28
+ logging.basicConfig(
29
+ level=logging.INFO,
30
+ format="%(asctime)s [%(levelname)s] %(message)s",
31
+ )
32
+ logger = logging.getLogger(__name__)
33
+
34
+ # ------------------------------
35
+ # 🧩 Heuristic functions (for videos)
36
+ # ------------------------------
37
+ def compute_fft_artifact_score(frame):
38
+ gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
39
+ f = np.fft.fft2(gray)
40
+ fshift = np.fft.fftshift(f)
41
+ magnitude = 20 * np.log(np.abs(fshift) + 1)
42
+ high_freq = np.mean(magnitude[-20:, -20:])
43
+ return float(min(high_freq / 255.0, 1.0))
44
+
45
+ def color_inconsistency_score(frame):
46
+ hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
47
+ h_std = np.std(hsv[:, :, 0])
48
+ return float(min(h_std / 90.0, 1.0))
49
+
50
+ def edge_warp_score(frame):
51
+ edges = cv2.Canny(frame, 100, 200)
52
+ return float(min(np.mean(edges) / 255.0, 1.0))
53
+
54
+ def aggregate_heuristics(frame):
55
+ fft_score = compute_fft_artifact_score(frame)
56
+ color_score = color_inconsistency_score(frame)
57
+ warp_score = edge_warp_score(frame)
58
+ return float(np.mean([fft_score, color_score, warp_score]))
59
+
60
+ # ------------------------------
61
+ # 🎥 Video Analysis Endpoint
62
+ # ------------------------------
63
+ @app.post("/analyze")
64
+ async def analyze_video(file: UploadFile = File(...)):
65
+ logger.info(f"🎞️ Received video file: {file.filename}")
66
+
67
+ # Save uploaded video temporarily
68
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp:
69
+ tmp.write(await file.read())
70
+ video_path = tmp.name
71
+
72
+ cap = cv2.VideoCapture(video_path)
73
+ frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
74
+ if frame_count == 0:
75
+ return {"error": "Unable to read video"}
76
+
77
+ sample_frames = max(1, frame_count // 10)
78
+ model_scores, heuristic_scores = [], []
79
+
80
+ for i in range(0, frame_count, sample_frames):
81
+ cap.set(cv2.CAP_PROP_POS_FRAMES, i)
82
+ ret, frame = cap.read()
83
+ if not ret:
84
+ continue
85
+
86
+ # --- Heuristic ---
87
+ h_score = aggregate_heuristics(frame)
88
+ heuristic_scores.append(h_score)
89
+
90
+ # --- Model ensemble prediction ---
91
+ temp_img_path = tempfile.NamedTemporaryFile(suffix=".jpg", delete=False).name
92
+ cv2.imwrite(temp_img_path, frame)
93
+ preds = ensemble_predict_from_path(temp_img_path)
94
+ fake_score = preds["top"]["label"].lower() == "fake"
95
+ model_scores.append(float(preds["top"]["score"] if fake_score else 1 - preds["top"]["score"]))
96
+
97
+ cap.release()
98
+
99
+ final_model_score = float(np.mean(model_scores) if model_scores else 0.0)
100
+ final_heuristic_score = float(np.mean(heuristic_scores) if heuristic_scores else 0.0)
101
+ final_score = 0.7 * final_model_score + 0.3 * final_heuristic_score
102
+ is_fake = bool(final_score > 0.5)
103
+
104
+ logger.info(f"✅ Video analyzed: score={final_score:.4f}, fake={is_fake}")
105
+
106
+ return {
107
+ "source": "video",
108
+ "model_score": round(final_model_score, 4),
109
+ "heuristic_score": round(final_heuristic_score, 4),
110
+ "final_score": round(final_score, 4),
111
+ "is_deepfake": is_fake
112
+ }
113
+
114
+ # ------------------------------
115
+ # 🖼️ Image Analysis Endpoint
116
+ # ------------------------------
117
+ @app.post("/predict/image")
118
+ async def analyze_image(file: UploadFile = File(...)):
119
+ logger.info(f"🖼️ Received image file: {file.filename}")
120
+ try:
121
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp:
122
+ tmp.write(await file.read())
123
+ image_path = tmp.name
124
+
125
+ # 🔍 Run prediction
126
+ preds = predict_image(image_path)
127
+ if "error" in preds:
128
+ return {"error": preds["error"]}
129
+
130
+ model_score = preds.get("model_score", 0.0)
131
+ heuristic_score = preds.get("heuristic_score", 0.0)
132
+ final_score = preds["top"]["score"]
133
+ is_fake = preds["top"]["label"].lower() == "fake"
134
+
135
+ logger.info(f"✅ Image analyzed: score={final_score:.4f}, fake={is_fake}")
136
+
137
+ return {
138
+ "source": "image",
139
+ "model_score": round(model_score, 4),
140
+ "heuristic_score": round(heuristic_score, 4),
141
+ "final_score": round(final_score, 4),
142
+ "is_deepfake": is_fake
143
+ }
144
+
145
+ except Exception as e:
146
+ logger.exception("❌ Error during image analysis")
147
+ return {"error": str(e)}
148
+
149
+ # ------------------------------
150
+ # 🚀 Run Server
151
+ # ------------------------------
152
+ if __name__ == "__main__":
153
+ import os
154
+ port = int(os.environ.get("PORT", 8000))
155
+ uvicorn.run(app, host="0.0.0.0", port=port)
156
+
157
+
model_helper.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch, cv2, numpy as np
2
+ from transformers import AutoImageProcessor, AutoModelForImageClassification
3
+ from PIL import Image
4
+ from facenet_pytorch import MTCNN
5
+ from temporal_model import TemporalConsistencyModel
6
+ import warnings, logging
7
+ import os
8
+ from dotenv import load_dotenv
9
+
10
+ warnings.filterwarnings("ignore")
11
+
12
+ # ---------- Logger Setup ----------
13
+ logging.basicConfig(
14
+ level=logging.INFO,
15
+ format="%(asctime)s [%(levelname)s] %(message)s",
16
+ handlers=[logging.StreamHandler()]
17
+ )
18
+ logger = logging.getLogger(__name__)
19
+
20
+ load_dotenv()
21
+
22
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
23
+
24
+ # ---------- Face Detector ----------
25
+ face_detector = MTCNN(keep_all=False, device=device)
26
+
27
+ # ---------- Temporal Model ----------
28
+ temporal_model = TemporalConsistencyModel(window=7, alpha=0.75)
29
+
30
+ # ---------- Model Definitions ----------
31
+ MODEL_PATHS = [
32
+
33
+
34
+ os.getenv("VIDEO_MODEL_1"),
35
+ os.getenv("VIDEO_MODEL_2"),
36
+ os.getenv("VIDEO_MODEL_3")
37
+
38
+
39
+ ]
40
+
41
+ models, processors = [], []
42
+ for mid in MODEL_PATHS:
43
+ try:
44
+ proc = AutoImageProcessor.from_pretrained(mid)
45
+ model = AutoModelForImageClassification.from_pretrained(mid).to(device)
46
+ model.eval()
47
+ models.append(model)
48
+ processors.append(proc)
49
+ logger.info(f"✅ Loaded model: {mid}")
50
+ except Exception as e:
51
+ logger.warning(f"⚠️ Failed to load {mid}: {e}")
52
+
53
+ # ---------- Heuristic ----------
54
+ def heuristic_texture_analysis(frame):
55
+ gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
56
+ freq = np.fft.fft2(gray)
57
+ freq_shift = np.fft.fftshift(freq)
58
+ mag = np.log(np.abs(freq_shift) + 1)
59
+ edge_var = np.var(cv2.Laplacian(gray, cv2.CV_64F))
60
+ texture_score = np.mean(mag) / (edge_var + 1e-5)
61
+ norm_score = np.clip(np.tanh(texture_score / 60), 0, 1)
62
+ return float(norm_score)
63
+
64
+ # ---------- Face Cropper (Fixed) ----------
65
+ def extract_face(frame):
66
+ boxes, _ = face_detector.detect(frame)
67
+ if boxes is not None and len(boxes) > 0:
68
+ x1, y1, x2, y2 = [int(b) for b in boxes[0]]
69
+ face = frame[y1:y2, x1:x2]
70
+
71
+ if face is None or face.size == 0:
72
+ logger.warning("⚠️ Detected invalid face region; skipping frame.")
73
+ return None
74
+
75
+ return cv2.resize(face, (224, 224))
76
+ else:
77
+ logger.info("ℹ️ No face detected in this frame; skipping.")
78
+ return None
79
+
80
+ # ---------- Prediction ----------
81
+ def predict_frame(frame):
82
+ face_img = extract_face(frame)
83
+ if face_img is None:
84
+ return None # skip frame gracefully
85
+
86
+ frame_img = Image.fromarray(cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB))
87
+ preds = []
88
+
89
+ for model, proc in zip(models, processors):
90
+ try:
91
+ inputs = proc(images=frame_img, return_tensors="pt").to(device)
92
+ with torch.no_grad():
93
+ logits = model(**inputs).logits
94
+ probs = torch.nn.functional.softmax(logits, dim=-1)[0].cpu().numpy()
95
+
96
+ id2label = model.config.id2label
97
+ label_idx = np.argmax(probs)
98
+
99
+ if str(label_idx) in id2label:
100
+ label = id2label[str(label_idx)].lower()
101
+ elif label_idx in id2label:
102
+ label = id2label[label_idx].lower()
103
+ else:
104
+ label = "unknown"
105
+
106
+ is_fake = any(k in label for k in ["fake", "forged", "manipulated", "edited"])
107
+ confidence = float(probs[label_idx])
108
+
109
+ score = confidence if is_fake else 1 - confidence
110
+ preds.append(score)
111
+
112
+ except Exception as e:
113
+ logger.warning(f"⚠️ Model prediction failed for {model.__class__.__name__}: {e}")
114
+
115
+ if not preds:
116
+ logger.warning("⚠️ No valid model predictions; skipping frame.")
117
+ return None
118
+
119
+ # Weighted average (CNN:0.4, ViT:0.35, BEiT:0.25)
120
+ weights = np.array([0.4, 0.35, 0.25])[:len(preds)]
121
+ weights /= weights.sum()
122
+ weighted_score = np.dot(preds, weights)
123
+ return float(np.clip(weighted_score, 0, 1))
124
+
125
+ # ---------- Main Pipeline ----------
126
+ def ensemble_predict_video(video_path, frame_interval=10):
127
+ cap = cv2.VideoCapture(video_path)
128
+ frame_preds, heuristics = [], []
129
+ frame_count = 0
130
+
131
+ while True:
132
+ ret, frame = cap.read()
133
+ if not ret:
134
+ break
135
+
136
+ if frame_count % frame_interval == 0:
137
+ model_score = predict_frame(frame)
138
+ if model_score is None:
139
+ frame_count += 1
140
+ continue
141
+
142
+ heuristic_score = heuristic_texture_analysis(frame)
143
+ combined_score = 0.8 * model_score + 0.2 * heuristic_score
144
+ temporal_score = temporal_model.update(combined_score)
145
+
146
+ frame_preds.append(temporal_score)
147
+ heuristics.append(heuristic_score)
148
+
149
+ frame_count += 1
150
+
151
+ cap.release()
152
+
153
+ if not frame_preds:
154
+ logger.error("❌ No valid frames processed. Returning unknown result.")
155
+ return {"top": {"label": "unknown", "score": 0.0}}
156
+
157
+ model_score = float(np.mean(frame_preds))
158
+ heuristic_score = float(np.mean(heuristics))
159
+ final_score = float(np.clip(model_score, 0, 1))
160
+
161
+ logger.info(f"✅ Video processed | Final Score: {final_score:.4f}")
162
+
163
+ return {
164
+ "top": {
165
+ "label": "fake" if final_score > 0.55 else "real",
166
+ "score": round(final_score, 4)
167
+ },
168
+ "model_score": round(model_score, 4),
169
+ "heuristic_score": round(heuristic_score, 4),
170
+ }
171
+
172
+ # ---------- Compatibility Wrapper ----------
173
+ def ensemble_predict_from_path(video_path):
174
+ """Compatibility wrapper for main.py"""
175
+ return ensemble_predict_video(video_path)
176
+
177
+
178
+ #***********************************************************************************************************************************************************************************************************************
179
+
requirements.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ pillow
4
+ opencv-python
5
+ numpy
6
+ torch
7
+ torchvision
8
+ timm
9
+ transformers
10
+ facenet-pytorch
11
+ scipy
12
+ python-multipart
13
+ aiofiles
14
+ ffmpeg-python
15
+ imageio
16
+ matplotlib
17
+ scikit-image
18
+ retina-face
temporal_model.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+ class TemporalConsistencyModel:
4
+ """
5
+ Simple temporal smoothing model to capture flicker and irregular changes.
6
+ Works as a moving average + penalty for inconsistent transitions.
7
+ """
8
+ def __init__(self, window=5, alpha=0.7):
9
+ self.window = window
10
+ self.alpha = alpha
11
+ self.history = []
12
+
13
+ def update(self, score):
14
+ self.history.append(score)
15
+ if len(self.history) > self.window:
16
+ self.history.pop(0)
17
+ smoothed = np.mean(self.history)
18
+ # penalize high oscillations
19
+ flicker_penalty = np.std(self.history)
20
+ final = (self.alpha * smoothed) - (0.5 * flicker_penalty)
21
+ return np.clip(final, 0, 1)
22
+
23
+
24
+ ####################################################################################################################################################3
25
+