Skin Lesion Detection WebRTC
Real-Time Inference using WebRTC with TURN and FastAPI Signalingโ
This guide explains how any frontend application (mobile/web) can connect to a WebRTC inference server for real-time object detection using a Deep Learning model. The server is implemented in FastAPI and supports peer-to-peer streaming via TURN.
๐ง Overviewโ
- WebRTC streams video from webcam or mobile in real time.
- TURN server handles NAT traversal and connectivity.
- FastAPI Signaling Server processes SDP offer/answer exchange.
๐ Base API URLโ
https://api-dev.helfie.ai
All endpoints below assume this as the base URL.
๐ Server Endpointsโ
Signaling (Offer/Answer)โ
POST {BASE_URL}/skin/webrtc/offer
Headers:
{
"api-key": "your-api-key"
}
api-key
is mandatory for authenticating all requests.
Request Body:
{
"sdp": "<offer_sdp>",
"type": "offer",
"user_id": "your-user-id (optional)"
}
user_id
is optional. If provided, it is used to associate the session with a known user. If not provided, the session proceeds anonymously.
Response:
{
"sdp": "<answer_sdp>",
"type": "answer"
}
Health Checkโ
GET {BASE_URL}/skin/webrtc/health
Response:
{
"status": "WebRTC Skin API healthy"
}
๐ฏ Required .env
Values (Client)โ
TURN_SERVER_URL=skinturn-helfie.eastus.cloudapp.azure.com
TURN_SERVER_PORT=3478
TURN_SERVER_PROTOCOL=turn
SERVER_URL={BASE_URL}/skin/webrtc/offer
CLIENT_FPS=30
API_KEY=your-api-key
USER_ID=your-user-id # Optional
๐งช What the Client Doesโ
-
Capture camera stream via WebRTC.
-
Exchange SDP via FastAPI
/offer
endpoint:- Generate SDP offer
- POST it along with
api-key
(and optionallyuser_id
) - Receive SDP answer and apply it
-
Send media stream to the server via WebRTC track.
-
Receive predictions from server via WebRTC
DataChannel
:
{
"frame_id": 42,
"boxes": [
{ "class": "lesion", "box": [x, y, width, height] },
...
]
}
- Overlay detections using bounding boxes on each frame.
๐ ๏ธ Client SDKs & Toolsโ
๐ต Web (React/JS)โ
RTCPeerConnection
,MediaStreamTrack
simple-peer
for abstracted WebRTCfetch()
for POST to/offer
๐ต Android (Java/Kotlin)โ
WebRTC Android SDK
Retrofit
orokhttp
for signaling
๐ต iOS (Swift)โ
WebRTC.framework
URLSession
for POST signaling
โ ๏ธ Important Notesโ
- Use
api-key
(notapi_key
) in headers due to FastAPI naming. - The server is multi-user and async-enabled via FastAPI and Uvicorn.
user_id
is optional for session tracking but not required.api-key
is mandatory for authorization and must be valid.
๐ Python Client Exampleโ
- Ensure you have the following dependencies installed:
pip install aiohttp aiortc opencv-python av
demo.py
import asyncio
import json
import logging
import threading
import sys
import time
import aiohttp
import cv2
from aiortc import (
RTCPeerConnection,
RTCSessionDescription,
RTCConfiguration,
RTCIceServer,
VideoStreamTrack,
)
from av import VideoFrame
from fractions import Fraction
from dotenv import load_dotenv
import os
load_dotenv()
#USER_ID = "USER_ID" # (Optional) Replace with actual user id
API_KEY = "API_KEY" # Replace with actual API key
TURN_PROTOCOL = "turn"
TURN_URL = "skinturn-helfie.eastus.cloudapp.azure.com"
TURN_PORT = "3478"
if TURN_URL and TURN_PORT:
TURN_SERVER = f"{TURN_PROTOCOL}:{TURN_URL}:{TURN_PORT}"
else:
raise ValueError("TURN server configuration is incomplete.")
SERVER_URL = "https://api-dev.helfie.ai/skin/webrtc/offer"
CLIENT_FPS = 30
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("webrtc-client")
class OpenCVCaptureTrack(VideoStreamTrack):
def __init__(self, cap, frame_delay=1/CLIENT_FPS):
super().__init__()
self.cap = cap
self.frame_delay = frame_delay
self._pts = 0
self.time_base = Fraction(1, CLIENT_FPS)
self.sent_timestamps = {}
self.bboxes = []
self.bbox_lock = threading.Lock()
self.last_bboxes_frame_id = -1
async def recv(self):
await asyncio.sleep(self.frame_delay)
ret, frame = self.cap.read()
if not ret:
raise asyncio.CancelledError("Webcam frame failed.")
frame = cv2.resize(frame, (640, int(frame.shape[0] * 640 / frame.shape[1])))
video_frame = VideoFrame.from_ndarray(frame, format="bgr24")
frame_id = self._pts
video_frame.pts = frame_id
video_frame.time_base = self.time_base
self.sent_timestamps[frame_id] = time.time()
self._pts += 1
return video_frame
def update_bboxes(self, frame_id, boxes):
with self.bbox_lock:
if frame_id > self.last_bboxes_frame_id:
self.last_bboxes_frame_id = frame_id
self.bboxes = boxes
def display_loop(cap, track):
while True:
ret, frame = cap.read()
if not ret:
break
frame = cv2.resize(frame, (640, int(frame.shape[0] * 640 / frame.shape[1])))
with track.bbox_lock:
for pred in track.bboxes:
try:
x, y, w, h = [int(v) for v in pred["box"]]
cls = pred["class"]
cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv2.putText(frame, f"{cls}", (x, y - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
except Exception:
pass
cv2.imshow("Webcam with Detection", frame)
if cv2.waitKey(1) & 0xFF == ord("q"):
cap.release()
cv2.destroyAllWindows()
sys.exit(0)
async def run():
cap = cv2.VideoCapture(0)
track = OpenCVCaptureTrack(cap)
threading.Thread(target=display_loop, args=(cap, track), daemon=True).start()
rtc = RTCConfiguration([
RTCIceServer(urls=[TURN_SERVER])
])
pc = RTCPeerConnection(configuration=rtc)
pc.addTrack(track)
channel = pc.createDataChannel("results")
@channel.on("message")
def on_message(message):
try:
data = json.loads(message)
fid = data.get("frame_id")
boxes = data.get("boxes", [])
ts = track.sent_timestamps.pop(fid, None)
if ts is not None:
rtt = (time.time() - ts) * 1000
logger.info(f"[LATENCY] Frame {fid} RTT: {rtt:.1f} ms")
track.update_bboxes(fid, boxes)
except Exception as e:
logger.warning(f"[CLIENT] Failed to process message: {e}")
offer = await pc.createOffer()
await pc.setLocalDescription(offer)
try:
async with aiohttp.ClientSession() as sess:
async with sess.post(SERVER_URL, json={
"sdp": pc.localDescription.sdp,
"type": pc.localDescription.type,
"user_id": USER_ID # optional
},
headers={
"api-key": API_KEY
}) as resp:
if resp.status != 200:
text = await resp.text()
logger.error(f"[CLIENT] Server returned {resp.status}: {text}")
return
answer = await resp.json()
except Exception as e:
logger.error(f"[CLIENT] Error during offer/answer exchange: {e}")
return
await pc.setRemoteDescription(
RTCSessionDescription(sdp=answer["sdp"], type=answer["type"])
)
try:
while True:
await asyncio.sleep(1)
except (KeyboardInterrupt, asyncio.CancelledError):
logger.info("[CLIENT] Shutting down...")
await pc.close()
cap.release()
if __name__ == "__main__":
asyncio.run(run())