236 lines
8.2 KiB
Plaintext
236 lines
8.2 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "502f6493",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from ultralytics import YOLO\n",
|
|
"\n",
|
|
"model = YOLO(\"yolo26n-cls.pt\")\n",
|
|
"\n",
|
|
"model.train(\n",
|
|
" data=\"dataset1\",\n",
|
|
" epochs=50,\n",
|
|
" imgsz=224,\n",
|
|
" batch=16\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "7eac4377",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from onnxruntime.quantization import quantize_dynamic\n",
|
|
"from onnxruntime.quantization import QuantType\n",
|
|
"\n",
|
|
"quantize_dynamic(\n",
|
|
" model_input=\"runs/classify/train5/weights/best.onnx\",\n",
|
|
" model_output=\"runs/classify/train5/weights/best_int8.onnx\",\n",
|
|
" weight_type=QuantType.QInt8\n",
|
|
")\n",
|
|
"\n",
|
|
"print(\"INT8 DONE\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "0a6d5d0d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from collections import deque\n",
|
|
"import cv2\n",
|
|
"import numpy as np\n",
|
|
"import onnxruntime as ort\n",
|
|
"\n",
|
|
"# =============================================\n",
|
|
"# CONFIG\n",
|
|
"# =============================================\n",
|
|
"WEIGHTS_PATH = \"runs/classify/train5/weights/best_int8.onnx\"\n",
|
|
"\n",
|
|
"CONF_THRESH = 0.6\n",
|
|
"SMOOTH_N = 5\n",
|
|
"IMAGE_SIZE = 224\n",
|
|
"roi_size = 400\n",
|
|
"\n",
|
|
"CLASS_NAMES = {\n",
|
|
" 0: \"Can\",\n",
|
|
" 1: \"Others\",\n",
|
|
" 2: \"PET-Bottle\",\n",
|
|
"}\n",
|
|
"\n",
|
|
"CLASS_COLORS = {\n",
|
|
" \"Can\": (0, 200, 50),\n",
|
|
" \"Others\": (0, 165, 255),\n",
|
|
" \"PET-Bottle\": (255, 50, 50),\n",
|
|
"}\n",
|
|
"\n",
|
|
"# =============================================\n",
|
|
"# LOAD ONNX MODEL\n",
|
|
"# =============================================\n",
|
|
"session = ort.InferenceSession(WEIGHTS_PATH, providers=[\"CPUExecutionProvider\"])\n",
|
|
"\n",
|
|
"input_name = session.get_inputs()[0].name\n",
|
|
"output_name = session.get_outputs()[0].name\n",
|
|
"input_shape = session.get_inputs()[0].shape\n",
|
|
"\n",
|
|
"print(\"===================================\")\n",
|
|
"print(\"ONNX MODEL LOADED\")\n",
|
|
"print(\"===================================\")\n",
|
|
"print(f\"Input : {input_name} shape={input_shape}\")\n",
|
|
"print(f\"Output : {output_name}\")\n",
|
|
"for i, name in CLASS_NAMES.items():\n",
|
|
" print(f\" [{i}] {name}\")\n",
|
|
"\n",
|
|
"# =============================================\n",
|
|
"# CEK APAKAH MODEL SUDAH INCLUDE SOFTMAX\n",
|
|
"# Jalankan sekali dengan dummy input\n",
|
|
"# =============================================\n",
|
|
"dummy = np.zeros((1, 3, IMAGE_SIZE, IMAGE_SIZE), dtype=np.float32)\n",
|
|
"dummy_out = session.run([output_name], {input_name: dummy})[0][0]\n",
|
|
"output_sum = float(np.sum(dummy_out))\n",
|
|
"\n",
|
|
"# sum ~ 1.0 → sudah softmax di dalam model\n",
|
|
"# sum jauh dari 1.0 → output masih logits mentah\n",
|
|
"ALREADY_SOFTMAX = abs(output_sum - 1.0) < 0.1\n",
|
|
"print(f\"\\nOutput dummy sum = {output_sum:.4f}\")\n",
|
|
"print(f\"Model sudah include softmax? -> {'YA' if ALREADY_SOFTMAX else 'TIDAK'}\\n\")\n",
|
|
"\n",
|
|
"# =============================================\n",
|
|
"# SOFTMAX (hanya dipakai jika model belum include)\n",
|
|
"# =============================================\n",
|
|
"def softmax(x: np.ndarray) -> np.ndarray:\n",
|
|
" e = np.exp(x - x.max())\n",
|
|
" return e / e.sum()\n",
|
|
"\n",
|
|
"# =============================================\n",
|
|
"# PREPROCESSING\n",
|
|
"# =============================================\n",
|
|
"def preprocess(bgr_img: np.ndarray, img_size: int = IMAGE_SIZE) -> np.ndarray:\n",
|
|
" img = cv2.resize(bgr_img, (img_size, img_size), interpolation=cv2.INTER_LINEAR)\n",
|
|
" img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n",
|
|
" img = img.astype(np.float32) / 255.0\n",
|
|
" img = np.transpose(img, (2, 0, 1))[np.newaxis, ...] # (1, 3, H, W)\n",
|
|
" return np.ascontiguousarray(img)\n",
|
|
"\n",
|
|
"# =============================================\n",
|
|
"# SMOOTHING BUFFER\n",
|
|
"# =============================================\n",
|
|
"buffer = deque(maxlen=SMOOTH_N)\n",
|
|
"\n",
|
|
"# =============================================\n",
|
|
"# CAMERA\n",
|
|
"# =============================================\n",
|
|
"cap = cv2.VideoCapture(1, cv2.CAP_DSHOW)\n",
|
|
"cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1920)\n",
|
|
"cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080)\n",
|
|
"\n",
|
|
"actual_w = cap.get(cv2.CAP_PROP_FRAME_WIDTH)\n",
|
|
"actual_h = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)\n",
|
|
"print(f\"Resolusi kamera: {int(actual_w)} x {int(actual_h)}\\n\")\n",
|
|
"\n",
|
|
"frame_count = 0\n",
|
|
"\n",
|
|
"# =============================================\n",
|
|
"# MAIN LOOP\n",
|
|
"# =============================================\n",
|
|
"while True:\n",
|
|
" ret, frame = cap.read()\n",
|
|
" if not ret:\n",
|
|
" print(\"Kamera gagal\")\n",
|
|
" break\n",
|
|
"\n",
|
|
" frame = cv2.flip(frame, 1)\n",
|
|
" h, w, _ = frame.shape\n",
|
|
"\n",
|
|
" # ROI TENGAH\n",
|
|
" cx, cy = w // 2, h // 2\n",
|
|
" x1 = cx - roi_size // 2\n",
|
|
" y1 = cy - roi_size // 2\n",
|
|
" x2 = cx + roi_size // 2\n",
|
|
" y2 = cy + roi_size // 2\n",
|
|
"\n",
|
|
" roi = frame[y1:y2, x1:x2]\n",
|
|
" if roi.size == 0:\n",
|
|
" continue\n",
|
|
"\n",
|
|
" # =========================================\n",
|
|
" # INFERENCE\n",
|
|
" # =========================================\n",
|
|
" tensor = preprocess(roi)\n",
|
|
" raw_output = session.run([output_name], {input_name: tensor})[0][0] # (num_classes,)\n",
|
|
"\n",
|
|
" # Terapkan softmax hanya jika model belum include\n",
|
|
" probs = raw_output if ALREADY_SOFTMAX else softmax(raw_output)\n",
|
|
"\n",
|
|
" idx = int(np.argmax(probs))\n",
|
|
" conf = float(probs[idx])\n",
|
|
"\n",
|
|
" # DEBUG: print tiap 30 frame ke terminal\n",
|
|
" frame_count += 1\n",
|
|
" if frame_count % 30 == 0:\n",
|
|
" print(f\"[DEBUG] raw = {np.round(raw_output, 4)}\")\n",
|
|
" print(f\" prob = {np.round(probs, 4)}\")\n",
|
|
" print(f\" top1 = {CLASS_NAMES.get(idx,'?')} conf={conf*100:.1f}%\")\n",
|
|
"\n",
|
|
" # =========================================\n",
|
|
" # LABEL SMOOTHING\n",
|
|
" # =========================================\n",
|
|
" if conf >= CONF_THRESH:\n",
|
|
" buffer.append(idx)\n",
|
|
" stable_idx = max(set(buffer), key=buffer.count)\n",
|
|
" label = CLASS_NAMES.get(stable_idx, f\"class_{stable_idx}\")\n",
|
|
" color = CLASS_COLORS.get(label, (255, 255, 255))\n",
|
|
" text = f\"{label} {conf * 100:.1f}%\"\n",
|
|
" else:\n",
|
|
" label = \"Unknown\"\n",
|
|
" color = (120, 120, 120)\n",
|
|
" text = f\"Low conf: {conf * 100:.1f}%\" # tampilkan angka untuk debug\n",
|
|
"\n",
|
|
" # =========================================\n",
|
|
" # DISPLAY\n",
|
|
" # =========================================\n",
|
|
" display_frame = frame.copy()\n",
|
|
"\n",
|
|
" cv2.rectangle(display_frame, (x1, y1), (x2, y2), color, 2)\n",
|
|
"\n",
|
|
" cv2.putText(display_frame, text,\n",
|
|
" (x1, y1 - 10),\n",
|
|
" cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)\n",
|
|
"\n",
|
|
" cv2.putText(display_frame, \"[ESC] Quit\",\n",
|
|
" (10, h - 15),\n",
|
|
" cv2.FONT_HERSHEY_SIMPLEX, 0.6, (200, 200, 200), 1)\n",
|
|
"\n",
|
|
" roi_display = cv2.resize(roi, (IMAGE_SIZE, IMAGE_SIZE))\n",
|
|
" cv2.putText(roi_display, text,\n",
|
|
" (10, 25),\n",
|
|
" cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)\n",
|
|
"\n",
|
|
" cv2.imshow(\"Camera\", display_frame)\n",
|
|
" cv2.imshow(\"ROI\", roi_display)\n",
|
|
"\n",
|
|
" if cv2.waitKey(1) & 0xFF == 27:\n",
|
|
" break\n",
|
|
"\n",
|
|
"cap.release()\n",
|
|
"cv2.destroyAllWindows()"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"language_info": {
|
|
"name": "python"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|