{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "502f6493", "metadata": {}, "outputs": [], "source": [ "from ultralytics import YOLO\n", "\n", "model = YOLO(\"yolo26n-cls.pt\")\n", "\n", "model.train(\n", " data=\"dataset1\",\n", " epochs=50,\n", " imgsz=224,\n", " batch=16\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "7eac4377", "metadata": {}, "outputs": [], "source": [ "from onnxruntime.quantization import quantize_dynamic\n", "from onnxruntime.quantization import QuantType\n", "\n", "quantize_dynamic(\n", " model_input=\"runs/classify/train5/weights/best.onnx\",\n", " model_output=\"runs/classify/train5/weights/best_int8.onnx\",\n", " weight_type=QuantType.QInt8\n", ")\n", "\n", "print(\"INT8 DONE\")" ] }, { "cell_type": "code", "execution_count": null, "id": "0a6d5d0d", "metadata": {}, "outputs": [], "source": [ "from collections import deque\n", "import cv2\n", "import numpy as np\n", "import onnxruntime as ort\n", "\n", "# =============================================\n", "# CONFIG\n", "# =============================================\n", "WEIGHTS_PATH = \"runs/classify/train5/weights/best_int8.onnx\"\n", "\n", "CONF_THRESH = 0.6\n", "SMOOTH_N = 5\n", "IMAGE_SIZE = 224\n", "roi_size = 400\n", "\n", "CLASS_NAMES = {\n", " 0: \"Can\",\n", " 1: \"Others\",\n", " 2: \"PET-Bottle\",\n", "}\n", "\n", "CLASS_COLORS = {\n", " \"Can\": (0, 200, 50),\n", " \"Others\": (0, 165, 255),\n", " \"PET-Bottle\": (255, 50, 50),\n", "}\n", "\n", "# =============================================\n", "# LOAD ONNX MODEL\n", "# =============================================\n", "session = ort.InferenceSession(WEIGHTS_PATH, providers=[\"CPUExecutionProvider\"])\n", "\n", "input_name = session.get_inputs()[0].name\n", "output_name = session.get_outputs()[0].name\n", "input_shape = session.get_inputs()[0].shape\n", "\n", "print(\"===================================\")\n", "print(\"ONNX MODEL LOADED\")\n", "print(\"===================================\")\n", "print(f\"Input : {input_name} shape={input_shape}\")\n", "print(f\"Output : {output_name}\")\n", "for i, name in CLASS_NAMES.items():\n", " print(f\" [{i}] {name}\")\n", "\n", "# =============================================\n", "# CEK APAKAH MODEL SUDAH INCLUDE SOFTMAX\n", "# Jalankan sekali dengan dummy input\n", "# =============================================\n", "dummy = np.zeros((1, 3, IMAGE_SIZE, IMAGE_SIZE), dtype=np.float32)\n", "dummy_out = session.run([output_name], {input_name: dummy})[0][0]\n", "output_sum = float(np.sum(dummy_out))\n", "\n", "# sum ~ 1.0 → sudah softmax di dalam model\n", "# sum jauh dari 1.0 → output masih logits mentah\n", "ALREADY_SOFTMAX = abs(output_sum - 1.0) < 0.1\n", "print(f\"\\nOutput dummy sum = {output_sum:.4f}\")\n", "print(f\"Model sudah include softmax? -> {'YA' if ALREADY_SOFTMAX else 'TIDAK'}\\n\")\n", "\n", "# =============================================\n", "# SOFTMAX (hanya dipakai jika model belum include)\n", "# =============================================\n", "def softmax(x: np.ndarray) -> np.ndarray:\n", " e = np.exp(x - x.max())\n", " return e / e.sum()\n", "\n", "# =============================================\n", "# PREPROCESSING\n", "# =============================================\n", "def preprocess(bgr_img: np.ndarray, img_size: int = IMAGE_SIZE) -> np.ndarray:\n", " img = cv2.resize(bgr_img, (img_size, img_size), interpolation=cv2.INTER_LINEAR)\n", " img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n", " img = img.astype(np.float32) / 255.0\n", " img = np.transpose(img, (2, 0, 1))[np.newaxis, ...] # (1, 3, H, W)\n", " return np.ascontiguousarray(img)\n", "\n", "# =============================================\n", "# SMOOTHING BUFFER\n", "# =============================================\n", "buffer = deque(maxlen=SMOOTH_N)\n", "\n", "# =============================================\n", "# CAMERA\n", "# =============================================\n", "cap = cv2.VideoCapture(1, cv2.CAP_DSHOW)\n", "cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1920)\n", "cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080)\n", "\n", "actual_w = cap.get(cv2.CAP_PROP_FRAME_WIDTH)\n", "actual_h = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)\n", "print(f\"Resolusi kamera: {int(actual_w)} x {int(actual_h)}\\n\")\n", "\n", "frame_count = 0\n", "\n", "# =============================================\n", "# MAIN LOOP\n", "# =============================================\n", "while True:\n", " ret, frame = cap.read()\n", " if not ret:\n", " print(\"Kamera gagal\")\n", " break\n", "\n", " frame = cv2.flip(frame, 1)\n", " h, w, _ = frame.shape\n", "\n", " # ROI TENGAH\n", " cx, cy = w // 2, h // 2\n", " x1 = cx - roi_size // 2\n", " y1 = cy - roi_size // 2\n", " x2 = cx + roi_size // 2\n", " y2 = cy + roi_size // 2\n", "\n", " roi = frame[y1:y2, x1:x2]\n", " if roi.size == 0:\n", " continue\n", "\n", " # =========================================\n", " # INFERENCE\n", " # =========================================\n", " tensor = preprocess(roi)\n", " raw_output = session.run([output_name], {input_name: tensor})[0][0] # (num_classes,)\n", "\n", " # Terapkan softmax hanya jika model belum include\n", " probs = raw_output if ALREADY_SOFTMAX else softmax(raw_output)\n", "\n", " idx = int(np.argmax(probs))\n", " conf = float(probs[idx])\n", "\n", " # DEBUG: print tiap 30 frame ke terminal\n", " frame_count += 1\n", " if frame_count % 30 == 0:\n", " print(f\"[DEBUG] raw = {np.round(raw_output, 4)}\")\n", " print(f\" prob = {np.round(probs, 4)}\")\n", " print(f\" top1 = {CLASS_NAMES.get(idx,'?')} conf={conf*100:.1f}%\")\n", "\n", " # =========================================\n", " # LABEL SMOOTHING\n", " # =========================================\n", " if conf >= CONF_THRESH:\n", " buffer.append(idx)\n", " stable_idx = max(set(buffer), key=buffer.count)\n", " label = CLASS_NAMES.get(stable_idx, f\"class_{stable_idx}\")\n", " color = CLASS_COLORS.get(label, (255, 255, 255))\n", " text = f\"{label} {conf * 100:.1f}%\"\n", " else:\n", " label = \"Unknown\"\n", " color = (120, 120, 120)\n", " text = f\"Low conf: {conf * 100:.1f}%\" # tampilkan angka untuk debug\n", "\n", " # =========================================\n", " # DISPLAY\n", " # =========================================\n", " display_frame = frame.copy()\n", "\n", " cv2.rectangle(display_frame, (x1, y1), (x2, y2), color, 2)\n", "\n", " cv2.putText(display_frame, text,\n", " (x1, y1 - 10),\n", " cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)\n", "\n", " cv2.putText(display_frame, \"[ESC] Quit\",\n", " (10, h - 15),\n", " cv2.FONT_HERSHEY_SIMPLEX, 0.6, (200, 200, 200), 1)\n", "\n", " roi_display = cv2.resize(roi, (IMAGE_SIZE, IMAGE_SIZE))\n", " cv2.putText(roi_display, text,\n", " (10, 25),\n", " cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)\n", "\n", " cv2.imshow(\"Camera\", display_frame)\n", " cv2.imshow(\"ROI\", roi_display)\n", "\n", " if cv2.waitKey(1) & 0xFF == 27:\n", " break\n", "\n", "cap.release()\n", "cv2.destroyAllWindows()" ] } ], "metadata": { "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 5 }