Skip to main content

Skin Lesion Detection WebRTC

Real-Time Inference using WebRTC with TURN and FastAPI Signalingโ€‹

This guide explains how any frontend application (mobile/web) can connect to a WebRTC inference server for real-time object detection using a Deep Learning model. The server is implemented in FastAPI and supports peer-to-peer streaming via TURN.


๐Ÿง  Overviewโ€‹

  • WebRTC streams video from webcam or mobile in real time.
  • TURN server handles NAT traversal and connectivity.
  • FastAPI Signaling Server processes SDP offer/answer exchange.

๐Ÿ”— Base API URLโ€‹

https://api-dev.helfie.ai

All endpoints below assume this as the base URL.


๐Ÿ”— Server Endpointsโ€‹

Signaling (Offer/Answer)โ€‹

POST {BASE_URL}/skin/webrtc/offer

Headers:

{
"api-key": "your-api-key"
}

api-key is mandatory for authenticating all requests.

Request Body:

{
"sdp": "<offer_sdp>",
"type": "offer",
"user_id": "your-user-id (optional)"
}

user_id is optional. If provided, it is used to associate the session with a known user. If not provided, the session proceeds anonymously.

Response:

{
"sdp": "<answer_sdp>",
"type": "answer"
}

Health Checkโ€‹

GET {BASE_URL}/skin/webrtc/health

Response:

{
"status": "WebRTC Skin API healthy"
}

๐ŸŽฏ Required .env Values (Client)โ€‹

TURN_SERVER_URL=skinturn-helfie.eastus.cloudapp.azure.com
TURN_SERVER_PORT=3478
TURN_SERVER_PROTOCOL=turn

SERVER_URL={BASE_URL}/skin/webrtc/offer
CLIENT_FPS=30

API_KEY=your-api-key
USER_ID=your-user-id # Optional


๐Ÿงช What the Client Doesโ€‹

  1. Capture camera stream via WebRTC.

  2. Exchange SDP via FastAPI /offer endpoint:

    • Generate SDP offer
    • POST it along with api-key (and optionally user_id)
    • Receive SDP answer and apply it
  3. Send media stream to the server via WebRTC track.

  4. Receive predictions from server via WebRTC DataChannel:

{
"frame_id": 42,
"boxes": [
{ "class": "lesion", "box": [x, y, width, height] },
...
]
}
  1. Overlay detections using bounding boxes on each frame.

๐Ÿ› ๏ธ Client SDKs & Toolsโ€‹

๐Ÿ”ต Web (React/JS)โ€‹

  • RTCPeerConnection, MediaStreamTrack
  • simple-peer for abstracted WebRTC
  • fetch() for POST to /offer

๐Ÿ”ต Android (Java/Kotlin)โ€‹

  • WebRTC Android SDK
  • Retrofit or okhttp for signaling

๐Ÿ”ต iOS (Swift)โ€‹

  • WebRTC.framework
  • URLSession for POST signaling

โš ๏ธ Important Notesโ€‹

  • Use api-key (not api_key) in headers due to FastAPI naming.
  • The server is multi-user and async-enabled via FastAPI and Uvicorn.
  • user_id is optional for session tracking but not required.
  • api-key is mandatory for authorization and must be valid.

๐Ÿ Python Client Exampleโ€‹

  • Ensure you have the following dependencies installed:
pip install aiohttp aiortc opencv-python av 
  • demo.py

import asyncio
import json
import logging
import threading
import sys
import time
import aiohttp
import cv2
from aiortc import (
RTCPeerConnection,
RTCSessionDescription,
RTCConfiguration,
RTCIceServer,
VideoStreamTrack,
)
from av import VideoFrame
from fractions import Fraction
from dotenv import load_dotenv
import os

load_dotenv()
#USER_ID = "USER_ID" # (Optional) Replace with actual user id
API_KEY = "API_KEY" # Replace with actual API key

TURN_PROTOCOL = "turn"
TURN_URL = "skinturn-helfie.eastus.cloudapp.azure.com"
TURN_PORT = "3478"

if TURN_URL and TURN_PORT:
TURN_SERVER = f"{TURN_PROTOCOL}:{TURN_URL}:{TURN_PORT}"
else:
raise ValueError("TURN server configuration is incomplete.")


SERVER_URL = "https://api-dev.helfie.ai/skin/webrtc/offer"
CLIENT_FPS = 30

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("webrtc-client")

class OpenCVCaptureTrack(VideoStreamTrack):
def __init__(self, cap, frame_delay=1/CLIENT_FPS):
super().__init__()
self.cap = cap
self.frame_delay = frame_delay
self._pts = 0
self.time_base = Fraction(1, CLIENT_FPS)
self.sent_timestamps = {}
self.bboxes = []
self.bbox_lock = threading.Lock()
self.last_bboxes_frame_id = -1

async def recv(self):
await asyncio.sleep(self.frame_delay)
ret, frame = self.cap.read()
if not ret:
raise asyncio.CancelledError("Webcam frame failed.")

frame = cv2.resize(frame, (640, int(frame.shape[0] * 640 / frame.shape[1])))

video_frame = VideoFrame.from_ndarray(frame, format="bgr24")
frame_id = self._pts
video_frame.pts = frame_id
video_frame.time_base = self.time_base

self.sent_timestamps[frame_id] = time.time()
self._pts += 1
return video_frame

def update_bboxes(self, frame_id, boxes):
with self.bbox_lock:
if frame_id > self.last_bboxes_frame_id:
self.last_bboxes_frame_id = frame_id
self.bboxes = boxes

def display_loop(cap, track):
while True:
ret, frame = cap.read()
if not ret:
break

frame = cv2.resize(frame, (640, int(frame.shape[0] * 640 / frame.shape[1])))

with track.bbox_lock:
for pred in track.bboxes:
try:
x, y, w, h = [int(v) for v in pred["box"]]
cls = pred["class"]
cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv2.putText(frame, f"{cls}", (x, y - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
except Exception:
pass

cv2.imshow("Webcam with Detection", frame)
if cv2.waitKey(1) & 0xFF == ord("q"):
cap.release()
cv2.destroyAllWindows()
sys.exit(0)

async def run():
cap = cv2.VideoCapture(0)
track = OpenCVCaptureTrack(cap)
threading.Thread(target=display_loop, args=(cap, track), daemon=True).start()

rtc = RTCConfiguration([
RTCIceServer(urls=[TURN_SERVER])
])
pc = RTCPeerConnection(configuration=rtc)
pc.addTrack(track)

channel = pc.createDataChannel("results")

@channel.on("message")
def on_message(message):
try:
data = json.loads(message)
fid = data.get("frame_id")
boxes = data.get("boxes", [])

ts = track.sent_timestamps.pop(fid, None)
if ts is not None:
rtt = (time.time() - ts) * 1000
logger.info(f"[LATENCY] Frame {fid} RTT: {rtt:.1f} ms")

track.update_bboxes(fid, boxes)
except Exception as e:
logger.warning(f"[CLIENT] Failed to process message: {e}")

offer = await pc.createOffer()
await pc.setLocalDescription(offer)

try:
async with aiohttp.ClientSession() as sess:
async with sess.post(SERVER_URL, json={
"sdp": pc.localDescription.sdp,
"type": pc.localDescription.type,
"user_id": USER_ID # optional
},
headers={
"api-key": API_KEY
}) as resp:
if resp.status != 200:
text = await resp.text()
logger.error(f"[CLIENT] Server returned {resp.status}: {text}")
return

answer = await resp.json()
except Exception as e:
logger.error(f"[CLIENT] Error during offer/answer exchange: {e}")
return

await pc.setRemoteDescription(
RTCSessionDescription(sdp=answer["sdp"], type=answer["type"])
)

try:
while True:
await asyncio.sleep(1)
except (KeyboardInterrupt, asyncio.CancelledError):
logger.info("[CLIENT] Shutting down...")
await pc.close()
cap.release()

if __name__ == "__main__":
asyncio.run(run())