Projeck_ML/trainYolo.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "502f6493",
   "metadata": {},
   "outputs": [],
   "source": [
    "from ultralytics import YOLO\n",
    "\n",
    "model = YOLO(\"yolo26n-cls.pt\")\n",
    "\n",
    "model.train(\n",
    "    data=\"dataset1\",\n",
    "    epochs=50,\n",
    "    imgsz=224,\n",
    "    batch=16\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7eac4377",
   "metadata": {},
   "outputs": [],
   "source": [
    "from onnxruntime.quantization import quantize_dynamic\n",
    "from onnxruntime.quantization import QuantType\n",
    "\n",
    "quantize_dynamic(\n",
    "    model_input=\"runs/classify/train5/weights/best.onnx\",\n",
    "    model_output=\"runs/classify/train5/weights/best_int8.onnx\",\n",
    "    weight_type=QuantType.QInt8\n",
    ")\n",
    "\n",
    "print(\"INT8 DONE\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0a6d5d0d",
   "metadata": {},
   "outputs": [],
   "source": [
    "from collections import deque\n",
    "import cv2\n",
    "import numpy as np\n",
    "import onnxruntime as ort\n",
    "\n",
    "# =============================================\n",
    "# CONFIG\n",
    "# =============================================\n",
    "WEIGHTS_PATH = \"runs/classify/train5/weights/best_int8.onnx\"\n",
    "\n",
    "CONF_THRESH = 0.6\n",
    "SMOOTH_N = 5\n",
    "IMAGE_SIZE = 224\n",
    "roi_size = 400\n",
    "\n",
    "CLASS_NAMES = {\n",
    "    0: \"Can\",\n",
    "    1: \"Others\",\n",
    "    2: \"PET-Bottle\",\n",
    "}\n",
    "\n",
    "CLASS_COLORS = {\n",
    "    \"Can\":        (0, 200, 50),\n",
    "    \"Others\":     (0, 165, 255),\n",
    "    \"PET-Bottle\": (255, 50, 50),\n",
    "}\n",
    "\n",
    "# =============================================\n",
    "# LOAD ONNX MODEL\n",
    "# =============================================\n",
    "session = ort.InferenceSession(WEIGHTS_PATH, providers=[\"CPUExecutionProvider\"])\n",
    "\n",
    "input_name  = session.get_inputs()[0].name\n",
    "output_name = session.get_outputs()[0].name\n",
    "input_shape = session.get_inputs()[0].shape\n",
    "\n",
    "print(\"===================================\")\n",
    "print(\"ONNX MODEL LOADED\")\n",
    "print(\"===================================\")\n",
    "print(f\"Input  : {input_name}  shape={input_shape}\")\n",
    "print(f\"Output : {output_name}\")\n",
    "for i, name in CLASS_NAMES.items():\n",
    "    print(f\"  [{i}] {name}\")\n",
    "\n",
    "# =============================================\n",
    "# CEK APAKAH MODEL SUDAH INCLUDE SOFTMAX\n",
    "# Jalankan sekali dengan dummy input\n",
    "# =============================================\n",
    "dummy     = np.zeros((1, 3, IMAGE_SIZE, IMAGE_SIZE), dtype=np.float32)\n",
    "dummy_out = session.run([output_name], {input_name: dummy})[0][0]\n",
    "output_sum = float(np.sum(dummy_out))\n",
    "\n",
    "# sum ~ 1.0  → sudah softmax di dalam model\n",
    "# sum jauh dari 1.0 → output masih logits mentah\n",
    "ALREADY_SOFTMAX = abs(output_sum - 1.0) < 0.1\n",
    "print(f\"\\nOutput dummy sum = {output_sum:.4f}\")\n",
    "print(f\"Model sudah include softmax? -> {'YA' if ALREADY_SOFTMAX else 'TIDAK'}\\n\")\n",
    "\n",
    "# =============================================\n",
    "# SOFTMAX (hanya dipakai jika model belum include)\n",
    "# =============================================\n",
    "def softmax(x: np.ndarray) -> np.ndarray:\n",
    "    e = np.exp(x - x.max())\n",
    "    return e / e.sum()\n",
    "\n",
    "# =============================================\n",
    "# PREPROCESSING\n",
    "# =============================================\n",
    "def preprocess(bgr_img: np.ndarray, img_size: int = IMAGE_SIZE) -> np.ndarray:\n",
    "    img = cv2.resize(bgr_img, (img_size, img_size), interpolation=cv2.INTER_LINEAR)\n",
    "    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n",
    "    img = img.astype(np.float32) / 255.0\n",
    "    img = np.transpose(img, (2, 0, 1))[np.newaxis, ...]  # (1, 3, H, W)\n",
    "    return np.ascontiguousarray(img)\n",
    "\n",
    "# =============================================\n",
    "# SMOOTHING BUFFER\n",
    "# =============================================\n",
    "buffer = deque(maxlen=SMOOTH_N)\n",
    "\n",
    "# =============================================\n",
    "# CAMERA\n",
    "# =============================================\n",
    "cap = cv2.VideoCapture(1, cv2.CAP_DSHOW)\n",
    "cap.set(cv2.CAP_PROP_FRAME_WIDTH,  1920)\n",
    "cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080)\n",
    "\n",
    "actual_w = cap.get(cv2.CAP_PROP_FRAME_WIDTH)\n",
    "actual_h = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)\n",
    "print(f\"Resolusi kamera: {int(actual_w)} x {int(actual_h)}\\n\")\n",
    "\n",
    "frame_count = 0\n",
    "\n",
    "# =============================================\n",
    "# MAIN LOOP\n",
    "# =============================================\n",
    "while True:\n",
    "    ret, frame = cap.read()\n",
    "    if not ret:\n",
    "        print(\"Kamera gagal\")\n",
    "        break\n",
    "\n",
    "    frame = cv2.flip(frame, 1)\n",
    "    h, w, _ = frame.shape\n",
    "\n",
    "    # ROI TENGAH\n",
    "    cx, cy = w // 2, h // 2\n",
    "    x1 = cx - roi_size // 2\n",
    "    y1 = cy - roi_size // 2\n",
    "    x2 = cx + roi_size // 2\n",
    "    y2 = cy + roi_size // 2\n",
    "\n",
    "    roi = frame[y1:y2, x1:x2]\n",
    "    if roi.size == 0:\n",
    "        continue\n",
    "\n",
    "    # =========================================\n",
    "    # INFERENCE\n",
    "    # =========================================\n",
    "    tensor     = preprocess(roi)\n",
    "    raw_output = session.run([output_name], {input_name: tensor})[0][0]  # (num_classes,)\n",
    "\n",
    "    # Terapkan softmax hanya jika model belum include\n",
    "    probs = raw_output if ALREADY_SOFTMAX else softmax(raw_output)\n",
    "\n",
    "    idx  = int(np.argmax(probs))\n",
    "    conf = float(probs[idx])\n",
    "\n",
    "    # DEBUG: print tiap 30 frame ke terminal\n",
    "    frame_count += 1\n",
    "    if frame_count % 30 == 0:\n",
    "        print(f\"[DEBUG] raw  = {np.round(raw_output, 4)}\")\n",
    "        print(f\"        prob = {np.round(probs, 4)}\")\n",
    "        print(f\"        top1 = {CLASS_NAMES.get(idx,'?')}  conf={conf*100:.1f}%\")\n",
    "\n",
    "    # =========================================\n",
    "    # LABEL SMOOTHING\n",
    "    # =========================================\n",
    "    if conf >= CONF_THRESH:\n",
    "        buffer.append(idx)\n",
    "        stable_idx = max(set(buffer), key=buffer.count)\n",
    "        label = CLASS_NAMES.get(stable_idx, f\"class_{stable_idx}\")\n",
    "        color = CLASS_COLORS.get(label, (255, 255, 255))\n",
    "        text  = f\"{label} {conf * 100:.1f}%\"\n",
    "    else:\n",
    "        label = \"Unknown\"\n",
    "        color = (120, 120, 120)\n",
    "        text  = f\"Low conf: {conf * 100:.1f}%\"  # tampilkan angka untuk debug\n",
    "\n",
    "    # =========================================\n",
    "    # DISPLAY\n",
    "    # =========================================\n",
    "    display_frame = frame.copy()\n",
    "\n",
    "    cv2.rectangle(display_frame, (x1, y1), (x2, y2), color, 2)\n",
    "\n",
    "    cv2.putText(display_frame, text,\n",
    "                (x1, y1 - 10),\n",
    "                cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)\n",
    "\n",
    "    cv2.putText(display_frame, \"[ESC] Quit\",\n",
    "                (10, h - 15),\n",
    "                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (200, 200, 200), 1)\n",
    "\n",
    "    roi_display = cv2.resize(roi, (IMAGE_SIZE, IMAGE_SIZE))\n",
    "    cv2.putText(roi_display, text,\n",
    "                (10, 25),\n",
    "                cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)\n",
    "\n",
    "    cv2.imshow(\"Camera\", display_frame)\n",
    "    cv2.imshow(\"ROI\",    roi_display)\n",
    "\n",
    "    if cv2.waitKey(1) & 0xFF == 27:\n",
    "        break\n",
    "\n",
    "cap.release()\n",
    "cv2.destroyAllWindows()"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}