Skin Lesion Detection WebRTC

Real-Time Inference using WebRTC with TURN and FastAPI Signaling

This guide explains how any frontend application (mobile/web) can connect to a WebRTC inference server for real-time object detection using a Deep Learning model. The server is implemented in FastAPI and supports peer-to-peer streaming via TURN.

🧠 Overview

WebRTC streams video from webcam or mobile in real time.
TURN server handles NAT traversal and connectivity.
FastAPI Signaling Server processes SDP offer/answer exchange.

🔗 Base API URL

https://api-dev.helfie.ai

All endpoints below assume this as the base URL.

🔗 Server Endpoints

Signaling (Offer/Answer)

POST {BASE_URL}/skin/webrtc/offer

Headers:

{
  "api-key": "your-api-key"
}

api-key is mandatory for authenticating all requests.

Request Body:

{
  "sdp": "<offer_sdp>",
  "type": "offer",
  "user_id": "your-user-id (optional)"
}

user_id is optional. If provided, it is used to associate the session with a known user. If not provided, the session proceeds anonymously.

Response:

{
  "sdp": "<answer_sdp>",
  "type": "answer"
}

Health Check

GET {BASE_URL}/skin/webrtc/health

Response:

{
  "status": "WebRTC Skin API healthy"
}

🎯 Required `.env` Values (Client)

TURN_SERVER_URL=skinturn-helfie.eastus.cloudapp.azure.com
TURN_SERVER_PORT=3478
TURN_SERVER_PROTOCOL=turn

SERVER_URL={BASE_URL}/skin/webrtc/offer
CLIENT_FPS=30

API_KEY=your-api-key  
USER_ID=your-user-id  # Optional

🧪 What the Client Does

Capture camera stream via WebRTC.
Exchange SDP via FastAPI /offer endpoint:
- Generate SDP offer
- POST it along with api-key (and optionally user_id)
- Receive SDP answer and apply it
Send media stream to the server via WebRTC track.
Receive predictions from server via WebRTC DataChannel:

{
  "frame_id": 42,
  "boxes": [
    { "class": "lesion", "box": [x, y, width, height] },
    ...
  ]
}

Overlay detections using bounding boxes on each frame.

🛠️ Client SDKs & Tools

🔵 Web (React/JS)

RTCPeerConnection, MediaStreamTrack
simple-peer for abstracted WebRTC
fetch() for POST to /offer

🔵 Android (Java/Kotlin)

WebRTC Android SDK
Retrofit or okhttp for signaling

🔵 iOS (Swift)

WebRTC.framework
URLSession for POST signaling

⚠️ Important Notes

Use api-key (not api_key) in headers due to FastAPI naming.
The server is multi-user and async-enabled via FastAPI and Uvicorn.
user_id is optional for session tracking but not required.
api-key is mandatory for authorization and must be valid.

🐍 Python Client Example

Ensure you have the following dependencies installed:

pip install aiohttp aiortc opencv-python av

demo.py

import asyncio
import json
import logging
import threading
import sys
import time
import aiohttp
import cv2
from aiortc import (
    RTCPeerConnection,
    RTCSessionDescription,
    RTCConfiguration,
    RTCIceServer,
    VideoStreamTrack,
)
from av import VideoFrame
from fractions import Fraction
from dotenv import load_dotenv
import os

load_dotenv()
#USER_ID = "USER_ID"               # (Optional) Replace with actual user id
API_KEY = "API_KEY"               # Replace with actual API key

TURN_PROTOCOL = "turn"
TURN_URL = "skinturn-helfie.eastus.cloudapp.azure.com"
TURN_PORT = "3478"

if TURN_URL and TURN_PORT:
    TURN_SERVER = f"{TURN_PROTOCOL}:{TURN_URL}:{TURN_PORT}"
else:
    raise ValueError("TURN server configuration is incomplete.")


SERVER_URL = "https://api-dev.helfie.ai/skin/webrtc/offer"
CLIENT_FPS = 30

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("webrtc-client")

class OpenCVCaptureTrack(VideoStreamTrack):
    def __init__(self, cap, frame_delay=1/CLIENT_FPS):
        super().__init__()
        self.cap = cap
        self.frame_delay = frame_delay
        self._pts = 0
        self.time_base = Fraction(1, CLIENT_FPS)
        self.sent_timestamps = {}
        self.bboxes = []
        self.bbox_lock = threading.Lock()
        self.last_bboxes_frame_id = -1

    async def recv(self):
        await asyncio.sleep(self.frame_delay)
        ret, frame = self.cap.read()
        if not ret:
            raise asyncio.CancelledError("Webcam frame failed.")

        frame = cv2.resize(frame, (640, int(frame.shape[0] * 640 / frame.shape[1])))

        video_frame = VideoFrame.from_ndarray(frame, format="bgr24")
        frame_id = self._pts
        video_frame.pts = frame_id
        video_frame.time_base = self.time_base

        self.sent_timestamps[frame_id] = time.time()
        self._pts += 1
        return video_frame

    def update_bboxes(self, frame_id, boxes):
        with self.bbox_lock:
            if frame_id > self.last_bboxes_frame_id:
                self.last_bboxes_frame_id = frame_id
                self.bboxes = boxes

def display_loop(cap, track):
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        frame = cv2.resize(frame, (640, int(frame.shape[0] * 640 / frame.shape[1])))

        with track.bbox_lock:
            for pred in track.bboxes:
                try:
                    x, y, w, h = [int(v) for v in pred["box"]]
                    cls = pred["class"]
                    cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
                    cv2.putText(frame, f"{cls}", (x, y - 10),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
                except Exception:
                    pass

        cv2.imshow("Webcam with Detection", frame)
        if cv2.waitKey(1) & 0xFF == ord("q"):
            cap.release()
            cv2.destroyAllWindows()
            sys.exit(0)

async def run():
    cap = cv2.VideoCapture(0)
    track = OpenCVCaptureTrack(cap)
    threading.Thread(target=display_loop, args=(cap, track), daemon=True).start()

    rtc = RTCConfiguration([
        RTCIceServer(urls=[TURN_SERVER])
    ])
    pc = RTCPeerConnection(configuration=rtc)
    pc.addTrack(track)

    channel = pc.createDataChannel("results")

    @channel.on("message")
    def on_message(message):
        try:
            data = json.loads(message)
            fid = data.get("frame_id")
            boxes = data.get("boxes", [])

            ts = track.sent_timestamps.pop(fid, None)
            if ts is not None:
                rtt = (time.time() - ts) * 1000
                logger.info(f"[LATENCY] Frame {fid} RTT: {rtt:.1f} ms")

            track.update_bboxes(fid, boxes)
        except Exception as e:
            logger.warning(f"[CLIENT] Failed to process message: {e}")

    offer = await pc.createOffer()
    await pc.setLocalDescription(offer)

    try:
        async with aiohttp.ClientSession() as sess:
            async with sess.post(SERVER_URL, json={
                "sdp": pc.localDescription.sdp,
                "type": pc.localDescription.type,
                    "user_id": USER_ID                 # optional
                },
                headers={
                    "api-key": API_KEY
            }) as resp:
                if resp.status != 200:
                    text = await resp.text()
                    logger.error(f"[CLIENT] Server returned {resp.status}: {text}")
                    return

                answer = await resp.json()
    except Exception as e:
        logger.error(f"[CLIENT] Error during offer/answer exchange: {e}")
        return

    await pc.setRemoteDescription(
        RTCSessionDescription(sdp=answer["sdp"], type=answer["type"])
    )

    try:
        while True:
            await asyncio.sleep(1)
    except (KeyboardInterrupt, asyncio.CancelledError):
        logger.info("[CLIENT] Shutting down...")
        await pc.close()
        cap.release()

if __name__ == "__main__":
    asyncio.run(run())

Skin Lesion Detection WebRTC

Real-Time Inference using WebRTC with TURN and FastAPI Signaling​

🧠 Overview​

🔗 Base API URL​

🔗 Server Endpoints​

Signaling (Offer/Answer)​

Health Check​

🎯 Required .env Values (Client)​

🧪 What the Client Does​

🛠️ Client SDKs & Tools​

🔵 Web (React/JS)​

🔵 Android (Java/Kotlin)​

🔵 iOS (Swift)​

⚠️ Important Notes​

🐍 Python Client Example​