Projeck_ML/TrainMobilenetV2.ipynb
2026-05-19 16:02:17 +07:00

1146 lines
51 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"id": "f8d41c5b",
"metadata": {},
"source": [
"Training dengan weight Imagenet_v1"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6194bec9",
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"import os\n",
"\n",
"# ✅ HARUS di paling atas, sebelum import torch\n",
"os.environ[\"CUDA_LAUNCH_BLOCKING\"] = \"1\"\n",
"\n",
"import torch\n",
"import torch.nn as nn\n",
"from torch.utils.data import DataLoader\n",
"from torchvision import datasets, transforms, models\n",
"import lightning as L\n",
"from torchmetrics import Accuracy, ConfusionMatrix\n",
"from lightning.pytorch.callbacks import EarlyStopping, TQDMProgressBar, Callback\n",
"from lightning.pytorch.loggers import TensorBoardLogger\n",
"from lightning.pytorch.profilers import SimpleProfiler\n",
"import matplotlib.pyplot as plt\n",
"import matplotlib\n",
"matplotlib.use(\"Agg\")\n",
"import seaborn as sns\n",
"import logging\n",
"import random\n",
"import time\n",
"import numpy as np\n",
"\n",
"# =============================================\n",
"# SETTING KONFIGURASI DI SINI\n",
"# =============================================\n",
"\n",
"TRAIN_DIR = \"Train 6\"\n",
"VAL_DIR = \"Val 6\"\n",
"TEST_DIR = \"Test 6\"\n",
"\n",
"NUM_CLASSES = 3\n",
"BATCH_SIZE = 16\n",
"MAX_EPOCHS = 100\n",
"LR = 1e-5\n",
"IMG_SIZE = 224\n",
"NUM_WORKERS = 4\n",
"BASE_DIR = \"runs 2\"\n",
"SEED = 30\n",
"\n",
"EMA_DECAY = 0.999 # ← Decay untuk EMA (0.9990.9999 umum dipakai)\n",
"\n",
"# =============================================\n",
"\n",
"\n",
"# ── Logger setup ─────────────────────────────────────────\n",
"class _SectionFormatter(logging.Formatter):\n",
" GREY = \"\\x1b[38;5;245m\"\n",
" GREEN = \"\\x1b[32;1m\"\n",
" YELLOW = \"\\x1b[33;1m\"\n",
" CYAN = \"\\x1b[36;1m\"\n",
" BOLD = \"\\x1b[1m\"\n",
" RESET = \"\\x1b[0m\"\n",
"\n",
" LEVEL_COLORS = {\n",
" logging.DEBUG: GREY,\n",
" logging.INFO: \"\",\n",
" logging.WARNING: YELLOW,\n",
" logging.ERROR: \"\\x1b[31;1m\",\n",
" logging.CRITICAL: \"\\x1b[41;1m\",\n",
" }\n",
"\n",
" def format(self, record):\n",
" color = self.LEVEL_COLORS.get(record.levelno, \"\")\n",
" msg = super().format(record)\n",
" if color:\n",
" return f\"{color}{msg}{self.RESET}\"\n",
" return msg\n",
"\n",
"\n",
"def _setup_logger():\n",
" logger = logging.getLogger(\"train\")\n",
" logger.setLevel(logging.DEBUG)\n",
" logger.propagate = False\n",
" if not logger.handlers:\n",
" ch = logging.StreamHandler()\n",
" ch.setFormatter(_SectionFormatter(\"%(message)s\"))\n",
" logger.addHandler(ch)\n",
" return logger\n",
"\n",
"\n",
"log = _setup_logger()\n",
"\n",
"\n",
"def _box(title: str, width: int = 60) -> str:\n",
" inner = f\" {title} \"\n",
" pad = max(0, width - len(inner) - 2)\n",
" l, r = pad // 2, pad - pad // 2\n",
" bar = \"─\" * width\n",
" return f\"\\n┌{bar}┐\\n│{' ' * l}{inner}{' ' * r}│\\n└{bar}┘\"\n",
"\n",
"\n",
"def _sep(char: str = \"─\", width: int = 62) -> str:\n",
" return char * width\n",
"\n",
"\n",
"# ── Seed ─────────────────────────────────────────────────\n",
"log.info(_box(\"🌱 INISIALISASI SEED & LINGKUNGAN\"))\n",
"\n",
"random.seed(SEED)\n",
"np.random.seed(SEED)\n",
"torch.manual_seed(SEED)\n",
"torch.backends.cudnn.deterministic = True\n",
"torch.backends.cudnn.benchmark = False\n",
"L.seed_everything(SEED, workers=True)\n",
"\n",
"log.info(f\" Seed : {SEED}\")\n",
"log.info(f\" CUDA tersedia : {torch.cuda.is_available()}\")\n",
"if torch.cuda.is_available():\n",
" log.info(f\" GPU : {torch.cuda.get_device_name(0)}\")\n",
" log.info(f\" VRAM : {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB\")\n",
"\n",
"\n",
"# ── Run dir ───────────────────────────────────────────────\n",
"def get_next_run_dir(base_dir=BASE_DIR, prefix=\"train\"):\n",
" os.makedirs(base_dir, exist_ok=True)\n",
" existing = [\n",
" d for d in os.listdir(base_dir)\n",
" if os.path.isdir(os.path.join(base_dir, d)) and d.startswith(prefix)\n",
" ]\n",
" numbers = []\n",
" for d in existing:\n",
" try:\n",
" numbers.append(int(d.replace(prefix, \"\")))\n",
" except Exception:\n",
" pass\n",
" next_num = max(numbers) + 1 if numbers else 1\n",
" run_dir = os.path.join(base_dir, f\"{prefix}{next_num}\")\n",
" os.makedirs(run_dir, exist_ok=True)\n",
" return run_dir\n",
"\n",
"\n",
"RUN_DIR = get_next_run_dir(BASE_DIR, prefix=\"train\")\n",
"WEIGHTS_DIR = os.path.join(RUN_DIR, \"weights\")\n",
"RESULTS_DIR = os.path.join(RUN_DIR, \"results\")\n",
"PROFILER_DIR = os.path.join(RUN_DIR, \"profiler\")\n",
"\n",
"for d in (WEIGHTS_DIR, RESULTS_DIR, PROFILER_DIR):\n",
" os.makedirs(d, exist_ok=True)\n",
"\n",
"log.info(f\"\\n Run dir : {os.path.abspath(RUN_DIR)}\")\n",
"\n",
"\n",
"# ── Transform ─────────────────────────────────────────────\n",
"train_transform = transforms.Compose([\n",
"\n",
" transforms.Resize((IMG_SIZE, IMG_SIZE)),\n",
"\n",
" transforms.RandomHorizontalFlip(p=0.5),\n",
"\n",
" transforms.RandomAffine(\n",
" degrees=3,\n",
" translate=(0.03, 0.03),\n",
" scale=(0.98, 1.02)\n",
" ),\n",
"\n",
" transforms.ColorJitter(\n",
" brightness=0.25,\n",
" contrast=0.2,\n",
" saturation=0.1\n",
" ),\n",
"\n",
" transforms.ToTensor(),\n",
"\n",
" transforms.Normalize(\n",
" [0.485, 0.456, 0.406],\n",
" [0.229, 0.224, 0.225]\n",
" ),\n",
"])\n",
"\n",
"val_test_transform = transforms.Compose([\n",
" transforms.Resize((IMG_SIZE, IMG_SIZE)),\n",
" transforms.ToTensor(),\n",
" transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),\n",
"])\n",
"\n",
"train_dataset = datasets.ImageFolder(TRAIN_DIR, transform=train_transform)\n",
"val_dataset = datasets.ImageFolder(VAL_DIR, transform=val_test_transform)\n",
"test_dataset = datasets.ImageFolder(TEST_DIR, transform=val_test_transform)\n",
"\n",
"CLASS_NAMES = train_dataset.classes\n",
"\n",
"train_loader = DataLoader(\n",
" train_dataset, batch_size=BATCH_SIZE, shuffle=True,\n",
" num_workers=NUM_WORKERS, pin_memory=True,\n",
" persistent_workers=True, prefetch_factor=2,\n",
")\n",
"val_loader = DataLoader(\n",
" val_dataset, batch_size=BATCH_SIZE, shuffle=False,\n",
" num_workers=NUM_WORKERS, pin_memory=True,\n",
" persistent_workers=True, prefetch_factor=2,\n",
")\n",
"test_loader = DataLoader(\n",
" test_dataset, batch_size=BATCH_SIZE, shuffle=False,\n",
" num_workers=NUM_WORKERS, pin_memory=True,\n",
" persistent_workers=True, prefetch_factor=2,\n",
")\n",
"\n",
"log.info(_box(\"📂 DATASET\"))\n",
"log.info(f\" Kelas : {CLASS_NAMES}\")\n",
"log.info(f\" Train : {len(train_dataset):>6,} gambar ({len(train_loader)} batch)\")\n",
"log.info(f\" Validation : {len(val_dataset):>6,} gambar ({len(val_loader)} batch)\")\n",
"log.info(f\" Test : {len(test_dataset):>6,} gambar ({len(test_loader)} batch)\")\n",
"\n",
"log.info(_box(\"⚙️ KONFIGURASI TRAINING\"))\n",
"log.info(f\" Backbone : MobileNetV2 (ImageNet pretrained)\")\n",
"log.info(f\" Num classes : {NUM_CLASSES}\")\n",
"log.info(f\" Batch size : {BATCH_SIZE}\")\n",
"log.info(f\" Max epochs : {MAX_EPOCHS}\")\n",
"log.info(f\" Learning rate : {LR}\")\n",
"log.info(f\" Image size : {IMG_SIZE}×{IMG_SIZE}\")\n",
"log.info(f\" Seed : {SEED}\")\n",
"log.info(f\" EMA decay : {EMA_DECAY}\")\n",
"\n",
"\n",
"# ══════════════════════════════════════════════════════════\n",
"# EMA CALLBACK\n",
"# - Shadow weights diinisialisasi saat training dimulai\n",
"# - Update setiap akhir batch: shadow = decay*shadow + (1-decay)*param\n",
"# - Sebelum val/test: swap ke EMA weights\n",
"# - Setelah val/test : restore kembali ke weights asli\n",
"# - best.pt menyimpan EMA weights (bukan raw weights)\n",
"# ══════════════════════════════════════════════════════════\n",
"class EMACallback(Callback):\n",
" \"\"\"Exponential Moving Average of model weights.\"\"\"\n",
"\n",
" def __init__(self, decay: float = 0.9999):\n",
" super().__init__()\n",
" self.decay = decay\n",
" self.shadow: dict[str, torch.Tensor] = {} # EMA weights\n",
" self.backup: dict[str, torch.Tensor] = {} # raw weights (saat swap)\n",
"\n",
" # ── Init shadow dari bobot awal ──────────────────────\n",
" def on_train_start(self, trainer, pl_module):\n",
" self.shadow = {\n",
" name: param.data.clone().detach()\n",
" for name, param in pl_module.model.named_parameters()\n",
" if param.requires_grad\n",
" }\n",
" log.info(f\" [EMA] Shadow weights diinisialisasi (decay={self.decay})\")\n",
"\n",
" # ── Update shadow setiap akhir batch ─────────────────\n",
" def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx):\n",
" d = self.decay\n",
" with torch.no_grad():\n",
" for name, param in pl_module.model.named_parameters():\n",
" if param.requires_grad and name in self.shadow:\n",
" self.shadow[name].mul_(d).add_((1.0 - d) * param.data)\n",
"\n",
" # ── Swap ke EMA sebelum validasi ─────────────────────\n",
" def on_validation_start(self, trainer, pl_module):\n",
" if trainer.sanity_checking:\n",
" return\n",
" self._apply_shadow(pl_module)\n",
"\n",
" def on_validation_end(self, trainer, pl_module):\n",
" if trainer.sanity_checking:\n",
" return\n",
" self._restore(pl_module)\n",
"\n",
" # ── Swap ke EMA sebelum test ──────────────────────────\n",
" def on_test_start(self, trainer, pl_module):\n",
" self._apply_shadow(pl_module)\n",
"\n",
" def on_test_end(self, trainer, pl_module):\n",
" self._restore(pl_module)\n",
"\n",
" # ── Helper ───────────────────────────────────────────\n",
" def _apply_shadow(self, pl_module):\n",
" \"\"\"Tukar param model dengan shadow EMA, simpan aslinya ke backup.\"\"\"\n",
" for name, param in pl_module.model.named_parameters():\n",
" if name in self.shadow:\n",
" self.backup[name] = param.data.clone()\n",
" param.data.copy_(self.shadow[name])\n",
"\n",
" def _restore(self, pl_module):\n",
" \"\"\"Kembalikan param model ke nilai asli (raw training weights).\"\"\"\n",
" for name, param in pl_module.model.named_parameters():\n",
" if name in self.backup:\n",
" param.data.copy_(self.backup[name])\n",
" self.backup.clear()\n",
"\n",
" def state_dict(self):\n",
" \"\"\"Untuk checkpoint Lightning (opsional).\"\"\"\n",
" return {\"shadow\": {k: v.cpu() for k, v in self.shadow.items()},\n",
" \"decay\": self.decay}\n",
"\n",
" def load_state_dict(self, state_dict):\n",
" self.decay = state_dict[\"decay\"]\n",
" self.shadow = state_dict[\"shadow\"]\n",
"\n",
"\n",
"# ══════════════════════════════════════════════════════════\n",
"# SAVE PT CALLBACK (BUG FIX: baca metrik dari pl_module)\n",
"#\n",
"# Masalah lama:\n",
"# metrics.get(\"train_loss\", tensor(0)) di on_validation_epoch_end\n",
"# sering mengembalikan 0 karena callback_metrics belum tentu\n",
"# ter-update ketika hook ini dipanggil pada awal epoch.\n",
"#\n",
"# Fix:\n",
"# SmartBinClassifier menyimpan _train_loss_epoch dan _train_acc_epoch\n",
"# di on_train_epoch_end (yang pasti sudah selesai sebelum val dimulai).\n",
"# Callback membaca atribut tersebut langsung dari pl_module.\n",
"# ══════════════════════════════════════════════════════════\n",
"class SavePTCallback(Callback):\n",
" def __init__(self, save_dir: str, ema_callback: EMACallback):\n",
" self.save_dir = save_dir\n",
" self.ema_callback = ema_callback\n",
" self.best_acc = 0.0\n",
" self._prev_val_loss = float(\"inf\")\n",
" os.makedirs(save_dir, exist_ok=True)\n",
"\n",
" def on_validation_epoch_end(self, trainer, pl_module):\n",
" if trainer.sanity_checking:\n",
" return\n",
"\n",
" metrics = trainer.callback_metrics\n",
"\n",
" # ✅ FIX: train metrics dibaca dari atribut module (sudah di-set\n",
" # di on_train_epoch_end, jadi dijamin tidak 0 palsu)\n",
" train_loss = pl_module._train_loss_epoch # ← dari modul\n",
" train_acc = pl_module._train_acc_epoch # ← dari modul\n",
"\n",
" val_loss = metrics.get(\"val_loss\", torch.tensor(0.0)).item()\n",
" val_acc = metrics.get(\"val_acc\", torch.tensor(0.0)).item()\n",
" epoch = trainer.current_epoch\n",
"\n",
" # ── Selalu simpan last.pt (pakai EMA weights karena kita sudah swap) ──\n",
" ckpt = {\n",
" \"epoch\" : epoch,\n",
" \"model_state\" : pl_module.model.state_dict(), # EMA weights (sudah swap)\n",
" \"val_acc\" : val_acc,\n",
" \"class_names\" : pl_module.class_names,\n",
" \"seed\" : SEED,\n",
" \"ema_decay\" : self.ema_callback.decay,\n",
" }\n",
" torch.save(ckpt, os.path.join(self.save_dir, \"last.pt\"))\n",
"\n",
" # ── Simpan best.pt jika val_acc meningkat ──\n",
" gap = val_loss - train_loss\n",
" if val_acc > self.best_acc:\n",
" self.best_acc = val_acc\n",
" torch.save(ckpt, os.path.join(self.save_dir, \"best.pt\"))\n",
" status = \"🏆 BEST\"\n",
" elif gap > 0.15:\n",
" status = \"⚠️ OVERFIT\"\n",
" elif val_loss < self._prev_val_loss:\n",
" status = \"📉 improving\"\n",
" else:\n",
" status = \"➡️ stagnant\"\n",
"\n",
" self._prev_val_loss = val_loss\n",
"\n",
" log.info(\n",
" f\" Epoch {epoch:>3}/{MAX_EPOCHS} │ \"\n",
" f\"loss {train_loss:.4f}→{val_loss:.4f} │ \"\n",
" f\"acc {train_acc:.4f}→{val_acc:.4f} │ \"\n",
" f\"gap {gap:+.4f} │ {status}\"\n",
" )\n",
"\n",
"\n",
"# ── Lightning Module ──────────────────────────────────────\n",
"class SmartBinClassifier(L.LightningModule):\n",
" def __init__(self, num_classes: int, lr: float, class_names: list, results_dir: str):\n",
" super().__init__()\n",
" self.save_hyperparameters()\n",
" self.lr = lr\n",
" self.class_names = class_names\n",
" self.results_dir = results_dir\n",
"\n",
" # ✅ FIX: atribut untuk menyimpan metrik train per-epoch\n",
" # Di-set di on_train_epoch_end → dijamin valid saat val callback jalan\n",
" self._train_loss_epoch: float = 0.0\n",
" self._train_acc_epoch: float = 0.0\n",
"\n",
" # ── Backbone ──\n",
" self.model = models.mobilenet_v2(weights=\"IMAGENET1K_V1\")\n",
"\n",
" for param in self.model.features.parameters():\n",
" param.requires_grad = False\n",
" for param in self.model.features[-7:].parameters():\n",
" param.requires_grad = True\n",
"\n",
" in_features = self.model.classifier[-1].in_features\n",
" self.model.classifier = nn.Sequential(\n",
" nn.Linear(in_features, 128, bias=False),\n",
" nn.BatchNorm1d(128),\n",
" nn.ReLU(inplace=True),\n",
" nn.Dropout(0.4),\n",
"\n",
" nn.Linear(128, 64, bias=False),\n",
" nn.BatchNorm1d(64),\n",
" nn.ReLU(inplace=True),\n",
" nn.Dropout(0.2),\n",
"\n",
" nn.Linear(64, NUM_CLASSES)\n",
" )\n",
"\n",
" self.criterion = nn.CrossEntropyLoss(label_smoothing=0.05)\n",
" self.train_acc = Accuracy(task=\"multiclass\", num_classes=num_classes)\n",
" self.val_acc = Accuracy(task=\"multiclass\", num_classes=num_classes)\n",
" self.test_acc = Accuracy(task=\"multiclass\", num_classes=num_classes)\n",
" self.conf_mat = ConfusionMatrix(task=\"multiclass\", num_classes=num_classes)\n",
"\n",
" # Akumulator loss untuk hitung rata-rata per epoch secara manual\n",
" self._train_loss_sum: float = 0.0\n",
" self._train_loss_count: int = 0\n",
"\n",
" self.test_preds = []\n",
" self.test_labels = []\n",
"\n",
" def forward(self, x):\n",
" return self.model(x)\n",
"\n",
" def training_step(self, batch, batch_idx):\n",
" x, y = batch\n",
" logits = self(x)\n",
" loss = self.criterion(logits, y)\n",
" preds = torch.argmax(logits, dim=1)\n",
" self.train_acc(preds, y)\n",
"\n",
" # ✅ Akumulasi loss manual agar on_train_epoch_end bisa hitung rata-rata\n",
" self._train_loss_sum += loss.detach().item()\n",
" self._train_loss_count += 1\n",
"\n",
" self.log(\"train_loss\", loss, prog_bar=True, on_epoch=True, on_step=False)\n",
" self.log(\"train_acc\", self.train_acc, prog_bar=True, on_epoch=True, on_step=False)\n",
" return loss\n",
"\n",
" def on_train_epoch_end(self):\n",
" # ✅ FIX: simpan metrik ke atribut — callback baca dari sini, bukan\n",
" # dari callback_metrics yang kadang belum ter-update\n",
" self._train_loss_epoch = (\n",
" self._train_loss_sum / self._train_loss_count\n",
" if self._train_loss_count > 0 else 0.0\n",
" )\n",
" self._train_acc_epoch = self.train_acc.compute().item()\n",
"\n",
" # Reset akumulator untuk epoch berikutnya\n",
" self._train_loss_sum = 0.0\n",
" self._train_loss_count = 0\n",
"\n",
" lr = self.optimizers().param_groups[0][\"lr\"]\n",
" self.log(\"learning_rate\", lr)\n",
"\n",
" def validation_step(self, batch, batch_idx):\n",
" x, y = batch\n",
" logits = self(x)\n",
" loss = self.criterion(logits, y)\n",
" preds = torch.argmax(logits, dim=1)\n",
" self.val_acc(preds, y)\n",
" self.log(\"val_loss\", loss, prog_bar=True, on_epoch=True)\n",
" self.log(\"val_acc\", self.val_acc, prog_bar=True, on_epoch=True)\n",
"\n",
" def test_step(self, batch, batch_idx):\n",
" x, y = batch\n",
" logits = self(x)\n",
" preds = torch.argmax(logits, dim=1)\n",
" self.test_acc(preds, y)\n",
" self.log(\"test_acc\", self.test_acc, prog_bar=True)\n",
" self.test_preds.append(preds.cpu())\n",
" self.test_labels.append(y.cpu())\n",
"\n",
" def on_test_epoch_end(self):\n",
" all_preds = torch.cat(self.test_preds)\n",
" all_labels = torch.cat(self.test_labels)\n",
"\n",
" self.conf_mat = self.conf_mat.to(\"cpu\")\n",
" cm = self.conf_mat(all_preds, all_labels).numpy()\n",
"\n",
" per_class = cm.diagonal() / cm.sum(axis=1)\n",
" log.info(_box(\"📊 HASIL TEST\"))\n",
" for i, (name, acc) in enumerate(zip(self.class_names, per_class)):\n",
" log.info(f\" Akurasi [{name:>15}] : {acc:.4f} ({int(cm[i, i])}/{int(cm[i].sum())})\")\n",
"\n",
" for fig, fname in [\n",
" (self._plot_confusion_matrix(cm), \"confusion_matrix.png\"),\n",
" ]:\n",
" path = os.path.join(self.results_dir, fname)\n",
" fig.savefig(path, dpi=150, bbox_inches=\"tight\")\n",
" plt.close(fig)\n",
" log.info(f\" Disimpan → {path}\")\n",
"\n",
" fig2 = self._plot_confusion_matrix(cm)\n",
" self.logger.experiment.add_figure(\"Test/Confusion Matrix\", fig2, self.current_epoch)\n",
" plt.close(fig2)\n",
"\n",
" self.conf_mat.reset()\n",
" self.test_preds = []\n",
" self.test_labels = []\n",
"\n",
" def configure_optimizers(self):\n",
" optimizer = torch.optim.AdamW(\n",
" [\n",
" {\"params\": self.model.features[-7:].parameters(), \"lr\": 1e-5},\n",
" {\"params\": self.model.classifier.parameters(), \"lr\": 1e-3},\n",
" ],\n",
" weight_decay=1e-3,\n",
" )\n",
" scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(\n",
" optimizer, T_max=MAX_EPOCHS, eta_min=1e-6\n",
" )\n",
" return {\n",
" \"optimizer\" : optimizer,\n",
" \"lr_scheduler\": {\"scheduler\": scheduler, \"monitor\": \"val_loss\"},\n",
" }\n",
"\n",
" def _plot_confusion_matrix(self, cm):\n",
" cm_norm = cm.astype(\"float\") / cm.sum(axis=1, keepdims=True)\n",
" fig, axes = plt.subplots(1, 2, figsize=(14, 5))\n",
" fig.suptitle(\"Confusion Matrix — MobileNetV2\", fontsize=14, fontweight=\"bold\")\n",
"\n",
" for ax, data, fmt, title in zip(\n",
" axes,\n",
" [cm, cm_norm],\n",
" [\"d\", \".2f\"],\n",
" [\"Count\", \"Normalized\"],\n",
" ):\n",
" sns.heatmap(\n",
" data, annot=True, fmt=fmt, cmap=\"Blues\",\n",
" xticklabels=self.class_names,\n",
" yticklabels=self.class_names,\n",
" ax=ax,\n",
" )\n",
" ax.set_title(title)\n",
" ax.set_xlabel(\"Predicted\")\n",
" ax.set_ylabel(\"Actual\")\n",
"\n",
" plt.tight_layout()\n",
" return fig\n",
"\n",
"\n",
"# ── Callbacks & logger ────────────────────────────────────\n",
"ema_callback = EMACallback(decay=EMA_DECAY)\n",
"save_pt_callback = SavePTCallback(save_dir=WEIGHTS_DIR, ema_callback=ema_callback)\n",
"early_stop_callback = EarlyStopping(monitor=\"val_loss\", patience=12, mode=\"min\")\n",
"progress_bar = TQDMProgressBar(refresh_rate=10)\n",
"\n",
"tb_logger = TensorBoardLogger(save_dir=\"runs\", name=os.path.basename(RUN_DIR), version=0)\n",
"profiler = SimpleProfiler(dirpath=PROFILER_DIR, filename=\"profiler_report\")\n",
"\n",
"# ── Training ──────────────────────────────────────────────\n",
"model = SmartBinClassifier(NUM_CLASSES, LR, CLASS_NAMES, RESULTS_DIR)\n",
"\n",
"trainer = L.Trainer(\n",
" max_epochs = MAX_EPOCHS,\n",
" callbacks = [\n",
" ema_callback, # ← EMA harus urutan pertama\n",
" save_pt_callback,\n",
" early_stop_callback,\n",
" progress_bar,\n",
" ],\n",
" accelerator = \"gpu\" if torch.cuda.is_available() else \"cpu\",\n",
" devices = 1,\n",
" enable_model_summary = True,\n",
" log_every_n_steps = 5,\n",
" logger = tb_logger,\n",
" profiler = profiler,\n",
" deterministic = True,\n",
")\n",
"\n",
"log.info(_box(\"🚀 MULAI TRAINING\"))\n",
"log.info(f\" {'Epoch':>5} │ {'train_loss':>10} {'val_loss':>10} │ {'train_acc':>9} {'val_acc':>9} │ {'gap':>7} │ Status\")\n",
"log.info(f\" {_sep('─', 80)}\")\n",
"\n",
"t0 = time.time()\n",
"trainer.fit(model, train_loader, val_loader)\n",
"elapsed = time.time() - t0\n",
"\n",
"log.info(f\"\\n Total waktu training : {elapsed / 60:.1f} menit ({elapsed:.0f} detik)\")\n",
"log.info(f\" Early stop epoch : {trainer.current_epoch}\")\n",
"log.info(f\" Best val_acc : {save_pt_callback.best_acc:.4f}\")\n",
"\n",
"# ── Test ──────────────────────────────────────────────────\n",
"log.info(_box(\"🧪 TESTING MODEL TERBAIK (EMA weights)\"))\n",
"\n",
"best_weights = torch.load(os.path.join(WEIGHTS_DIR, \"best.pt\"), weights_only=True)\n",
"model.model.load_state_dict(best_weights[\"model_state\"])\n",
"\n",
"log.info(f\" Loaded : weights/best.pt\")\n",
"log.info(f\" Epoch : {best_weights.get('epoch', '?')}\")\n",
"log.info(f\" Val acc : {best_weights.get('val_acc', 0):.4f}\")\n",
"log.info(f\" Seed : {best_weights.get('seed', 'N/A')}\")\n",
"log.info(f\" EMA dec : {best_weights.get('ema_decay', 'N/A')}\")\n",
"\n",
"# Saat test dipanggil, EMACallback.on_test_start akan swap weights.\n",
"# Karena kita sudah load EMA weights dari best.pt, restore di on_test_end\n",
"# akan kembalikan ke raw weights — aman.\n",
"t1 = time.time()\n",
"trainer.test(model, test_loader, ckpt_path=None)\n",
"log.info(f\" Waktu test : {time.time() - t1:.1f} detik\")\n",
"\n",
"# ── Summary ───────────────────────────────────────────────\n",
"log.info(_box(\"✅ SELESAI — RINGKASAN\"))\n",
"abs_run = os.path.abspath(RUN_DIR)\n",
"log.info(f\" Run dir : {abs_run}\")\n",
"log.info(f\" ├── weights/best.pt → val_acc: {save_pt_callback.best_acc:.4f} (EMA weights)\")\n",
"log.info(f\" ├── weights/last.pt (EMA weights)\")\n",
"log.info(f\" ├── results/confusion_matrix.png\")\n",
"log.info(f\" ├── profiler/profiler_report.txt\")\n",
"log.info(f\" └── tensorboard/\")\n",
"log.info(f\"\\n Total waktu : {(time.time() - t0) / 60:.1f} menit\")\n",
"log.info(f\"\\n 👉 Buka TensorBoard:\")\n",
"log.info(f\" tensorboard --logdir=runs/\")\n",
"log.info(_sep(\"═\"))"
]
},
{
"cell_type": "markdown",
"id": "9bd8c266",
"metadata": {},
"source": [
"Training tanpa weight Imagenet_v1"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ba1070e2",
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"import os\n",
"\n",
"# ✅ HARUS di paling atas, sebelum import torch\n",
"# Membuat CUDA error muncul di baris yang tepat, bukan di manual_seed\n",
"os.environ[\"CUDA_LAUNCH_BLOCKING\"] = \"1\"\n",
"\n",
"import torch\n",
"import torch.nn as nn\n",
"from torch.utils.data import DataLoader\n",
"from torchvision import datasets, transforms, models\n",
"import lightning as L\n",
"from torchmetrics import Accuracy, ConfusionMatrix\n",
"from lightning.pytorch.callbacks import EarlyStopping, TQDMProgressBar, Callback\n",
"from lightning.pytorch.loggers import TensorBoardLogger\n",
"from lightning.pytorch.profilers import SimpleProfiler\n",
"import matplotlib.pyplot as plt\n",
"import matplotlib\n",
"matplotlib.use(\"Agg\")\n",
"import seaborn as sns\n",
"import logging\n",
"import random\n",
"import time\n",
"import numpy as np\n",
"\n",
"# =============================================\n",
"# SETTING KONFIGURASI DI SINI\n",
"# =============================================\n",
"\n",
"TRAIN_DIR = \"Train 6\"\n",
"VAL_DIR = \"Val 6\"\n",
"TEST_DIR = \"Test 6\"\n",
"\n",
"NUM_CLASSES = 3\n",
"BATCH_SIZE = 16\n",
"MAX_EPOCHS = 100\n",
"LR = 1e-5\n",
"IMG_SIZE = 224\n",
"NUM_WORKERS = 4\n",
"BASE_DIR = \"runs 1\"\n",
"SEED = 30 # ← Ganti angka ini untuk hasil berbeda\n",
"\n",
"# =============================================\n",
"\n",
"\n",
"# ── Logger setup ─────────────────────────────────────────\n",
"class _SectionFormatter(logging.Formatter):\n",
" \"\"\"Formatter dengan warna ANSI untuk terminal.\"\"\"\n",
" GREY = \"\\x1b[38;5;245m\"\n",
" GREEN = \"\\x1b[32;1m\"\n",
" YELLOW = \"\\x1b[33;1m\"\n",
" CYAN = \"\\x1b[36;1m\"\n",
" BOLD = \"\\x1b[1m\"\n",
" RESET = \"\\x1b[0m\"\n",
"\n",
" LEVEL_COLORS = {\n",
" logging.DEBUG: GREY,\n",
" logging.INFO: \"\", # default, pakai warna dari message\n",
" logging.WARNING: YELLOW,\n",
" logging.ERROR: \"\\x1b[31;1m\",\n",
" logging.CRITICAL: \"\\x1b[41;1m\",\n",
" }\n",
"\n",
" def format(self, record):\n",
" color = self.LEVEL_COLORS.get(record.levelno, \"\")\n",
" msg = super().format(record)\n",
" if color:\n",
" return f\"{color}{msg}{self.RESET}\"\n",
" return msg\n",
"\n",
"\n",
"def _setup_logger():\n",
" logger = logging.getLogger(\"train\")\n",
" logger.setLevel(logging.DEBUG)\n",
" logger.propagate = False\n",
" if not logger.handlers:\n",
" ch = logging.StreamHandler()\n",
" ch.setFormatter(_SectionFormatter(\"%(message)s\"))\n",
" logger.addHandler(ch)\n",
" return logger\n",
"\n",
"\n",
"log = _setup_logger()\n",
"\n",
"\n",
"def _box(title: str, width: int = 60) -> str:\n",
" \"\"\"Buat kotak ASCII sederhana.\"\"\"\n",
" inner = f\" {title} \"\n",
" pad = max(0, width - len(inner) - 2)\n",
" l, r = pad // 2, pad - pad // 2\n",
" bar = \"─\" * width\n",
" return f\"\\n┌{bar}┐\\n│{' ' * l}{inner}{' ' * r}│\\n└{bar}┘\"\n",
"\n",
"\n",
"def _sep(char: str = \"─\", width: int = 62) -> str:\n",
" return char * width\n",
"\n",
"\n",
"# ── Seed ─────────────────────────────────────────────────\n",
"log.info(_box(\"🌱 INISIALISASI SEED & LINGKUNGAN\"))\n",
"\n",
"random.seed(SEED)\n",
"np.random.seed(SEED)\n",
"\n",
"# ✅ torch.manual_seed sudah memanggil cuda.manual_seed_all() secara internal.\n",
"# JANGAN panggil torch.cuda.manual_seed / manual_seed_all secara manual\n",
"# setelah ini — bisa memicu AcceleratorError jika CUDA context corrupt.\n",
"torch.manual_seed(SEED)\n",
"torch.backends.cudnn.deterministic = True\n",
"torch.backends.cudnn.benchmark = False\n",
"L.seed_everything(SEED, workers=True)\n",
"\n",
"log.info(f\" Seed : {SEED}\")\n",
"log.info(f\" CUDA tersedia : {torch.cuda.is_available()}\")\n",
"if torch.cuda.is_available():\n",
" log.info(f\" GPU : {torch.cuda.get_device_name(0)}\")\n",
" log.info(f\" VRAM : {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB\")\n",
"\n",
"\n",
"# ── Run dir ───────────────────────────────────────────────\n",
"def get_next_run_dir(base_dir=BASE_DIR, prefix=\"train\"):\n",
" os.makedirs(base_dir, exist_ok=True)\n",
" existing = [\n",
" d for d in os.listdir(base_dir)\n",
" if os.path.isdir(os.path.join(base_dir, d)) and d.startswith(prefix)\n",
" ]\n",
" numbers = []\n",
" for d in existing:\n",
" try:\n",
" numbers.append(int(d.replace(prefix, \"\")))\n",
" except Exception:\n",
" pass\n",
" next_num = max(numbers) + 1 if numbers else 1\n",
" run_dir = os.path.join(base_dir, f\"{prefix}{next_num}\")\n",
" os.makedirs(run_dir, exist_ok=True)\n",
" return run_dir\n",
"\n",
"\n",
"RUN_DIR = get_next_run_dir(BASE_DIR, prefix=\"train\")\n",
"WEIGHTS_DIR = os.path.join(RUN_DIR, \"weights\")\n",
"RESULTS_DIR = os.path.join(RUN_DIR, \"results\")\n",
"PROFILER_DIR = os.path.join(RUN_DIR, \"profiler\")\n",
"\n",
"for d in (WEIGHTS_DIR, RESULTS_DIR, PROFILER_DIR):\n",
" os.makedirs(d, exist_ok=True)\n",
"\n",
"log.info(f\"\\n Run dir : {os.path.abspath(RUN_DIR)}\")\n",
"\n",
"\n",
"# ── Transform ─────────────────────────────────────────────\n",
"train_transform = transforms.Compose([\n",
" transforms.Resize((IMG_SIZE, IMG_SIZE)),\n",
"\n",
" # Geometri\n",
" transforms.RandomHorizontalFlip(p=0.5),\n",
" transforms.RandomVerticalFlip(p=0.3),\n",
" transforms.RandomRotation(degrees=15),\n",
" transforms.RandomAffine(\n",
" degrees=0,\n",
" translate=(0.1, 0.1),\n",
" scale=(0.85, 1.15),\n",
" ),\n",
"\n",
" # Warna & Cahaya\n",
" transforms.ColorJitter(\n",
" brightness=0.3,\n",
" contrast=0.3,\n",
" saturation=0.2,\n",
" hue=0.05,\n",
" ),\n",
"\n",
" # Blur\n",
" transforms.RandomApply([\n",
" transforms.GaussianBlur(kernel_size=5, sigma=(0.1, 2.0))\n",
" ], p=0.3),\n",
"\n",
" transforms.ToTensor(),\n",
" transforms.Normalize([0.485, 0.456, 0.406],\n",
" [0.229, 0.224, 0.225]),\n",
" transforms.RandomErasing(\n",
" p=0.3,\n",
" scale=(0.02, 0.1),\n",
" ratio=(0.3, 3.0),\n",
" ),\n",
"])\n",
"\n",
"val_test_transform = transforms.Compose([\n",
" transforms.Resize((IMG_SIZE, IMG_SIZE)),\n",
" transforms.ToTensor(),\n",
" transforms.Normalize([0.485, 0.456, 0.406],\n",
" [0.229, 0.224, 0.225]),\n",
"])\n",
"\n",
"train_dataset = datasets.ImageFolder(TRAIN_DIR, transform=train_transform)\n",
"val_dataset = datasets.ImageFolder(VAL_DIR, transform=val_test_transform)\n",
"test_dataset = datasets.ImageFolder(TEST_DIR, transform=val_test_transform)\n",
"\n",
"CLASS_NAMES = train_dataset.classes\n",
"\n",
"train_loader = DataLoader(\n",
" train_dataset, batch_size=BATCH_SIZE, shuffle=True,\n",
" num_workers=NUM_WORKERS, pin_memory=True,\n",
" persistent_workers=True, prefetch_factor=2,\n",
")\n",
"val_loader = DataLoader(\n",
" val_dataset, batch_size=BATCH_SIZE, shuffle=False,\n",
" num_workers=NUM_WORKERS, pin_memory=True,\n",
" persistent_workers=True, prefetch_factor=2,\n",
")\n",
"test_loader = DataLoader(\n",
" test_dataset, batch_size=BATCH_SIZE, shuffle=False,\n",
" num_workers=NUM_WORKERS, pin_memory=True,\n",
" persistent_workers=True, prefetch_factor=2,\n",
")\n",
"\n",
"log.info(_box(\"📂 DATASET\"))\n",
"log.info(f\" Kelas : {CLASS_NAMES}\")\n",
"log.info(f\" Train : {len(train_dataset):>6,} gambar ({len(train_loader)} batch)\")\n",
"log.info(f\" Validation : {len(val_dataset):>6,} gambar ({len(val_loader)} batch)\")\n",
"log.info(f\" Test : {len(test_dataset):>6,} gambar ({len(test_loader)} batch)\")\n",
"\n",
"log.info(_box(\"⚙️ KONFIGURASI TRAINING\"))\n",
"log.info(f\" Backbone : MobileNetV2 (ImageNet pretrained)\")\n",
"log.info(f\" Num classes : {NUM_CLASSES}\")\n",
"log.info(f\" Batch size : {BATCH_SIZE}\")\n",
"log.info(f\" Max epochs : {MAX_EPOCHS}\")\n",
"log.info(f\" Learning rate : {LR}\")\n",
"log.info(f\" Image size : {IMG_SIZE}×{IMG_SIZE}\")\n",
"log.info(f\" Seed : {SEED}\")\n",
"\n",
"\n",
"# ── Callback: simpan .pt ──────────────────────────────────\n",
"class SavePTCallback(Callback):\n",
" def __init__(self, save_dir: str):\n",
" self.save_dir = save_dir\n",
" self.best_acc = 0.0\n",
" self._prev_val_loss = float(\"inf\")\n",
" os.makedirs(save_dir, exist_ok=True)\n",
"\n",
" def on_validation_epoch_end(self, trainer, pl_module):\n",
" if trainer.sanity_checking:\n",
" return\n",
"\n",
" metrics = trainer.callback_metrics\n",
" val_acc = metrics.get(\"val_acc\", torch.tensor(0.0)).item()\n",
" val_loss = metrics.get(\"val_loss\", torch.tensor(0.0)).item()\n",
" train_loss = metrics.get(\"train_loss\", torch.tensor(0.0)).item()\n",
" train_acc = metrics.get(\"train_acc\", torch.tensor(0.0)).item()\n",
" epoch = trainer.current_epoch\n",
"\n",
" # ── Selalu simpan last ──\n",
" ckpt = {\n",
" \"epoch\" : epoch,\n",
" \"model_state\" : pl_module.model.state_dict(),\n",
" \"val_acc\" : val_acc,\n",
" \"class_names\" : pl_module.class_names,\n",
" \"seed\" : SEED,\n",
" }\n",
" torch.save(ckpt, os.path.join(self.save_dir, \"last.pt\"))\n",
"\n",
" # ── Status & best ──\n",
" gap = val_loss - train_loss\n",
" if val_acc > self.best_acc:\n",
" self.best_acc = val_acc\n",
" torch.save(ckpt, os.path.join(self.save_dir, \"best.pt\"))\n",
" status = \"🏆 BEST\"\n",
" elif gap > 0.15:\n",
" status = \"⚠️ OVERFIT\"\n",
" elif val_loss < self._prev_val_loss:\n",
" status = \"📉 improving\"\n",
" else:\n",
" status = \"➡️ stagnant\"\n",
"\n",
" self._prev_val_loss = val_loss\n",
"\n",
" # ── Log baris tunggal yang rapi ──\n",
" log.info(\n",
" f\" Epoch {epoch:>3}/{MAX_EPOCHS} │ \"\n",
" f\"loss {train_loss:.4f}→{val_loss:.4f} │ \"\n",
" f\"acc {train_acc:.4f}→{val_acc:.4f} │ \"\n",
" f\"gap {gap:+.4f} │ {status}\"\n",
" )\n",
"\n",
"\n",
"# ── Lightning Module ──────────────────────────────────────\n",
"class SmartBinClassifier(L.LightningModule):\n",
" def __init__(self, num_classes: int, lr: float, class_names: list, results_dir: str):\n",
" super().__init__()\n",
" self.save_hyperparameters()\n",
" self.lr = lr\n",
" self.class_names = class_names\n",
" self.results_dir = results_dir\n",
"\n",
" # ── Backbone ──\n",
" self.model = models.mobilenet_v2(weights=None)\n",
"\n",
" for param in self.model.features.parameters():\n",
" param.requires_grad = True\n",
"\n",
"\n",
" in_features = self.model.classifier[1].in_features\n",
" self.model.classifier = nn.Sequential(\n",
" nn.Dropout(p=0.3),\n",
" nn.Linear(in_features, 256),\n",
" nn.ReLU(),\n",
" nn.Dropout(p=0.2),\n",
" nn.Linear(256, num_classes),\n",
" )\n",
"\n",
" self.criterion = nn.CrossEntropyLoss(label_smoothing=0.05)\n",
" self.train_acc = Accuracy(task=\"multiclass\", num_classes=num_classes)\n",
" self.val_acc = Accuracy(task=\"multiclass\", num_classes=num_classes)\n",
" self.test_acc = Accuracy(task=\"multiclass\", num_classes=num_classes)\n",
" self.conf_mat = ConfusionMatrix(task=\"multiclass\", num_classes=num_classes)\n",
"\n",
" self.test_preds = []\n",
" self.test_labels = []\n",
"\n",
" def forward(self, x):\n",
" return self.model(x)\n",
"\n",
" def training_step(self, batch, batch_idx):\n",
" x, y = batch\n",
" logits = self(x)\n",
" loss = self.criterion(logits, y)\n",
" preds = torch.argmax(logits, dim=1)\n",
" self.train_acc(preds, y)\n",
" self.log(\"train_loss\", loss, prog_bar=True, on_epoch=True, on_step=False)\n",
" self.log(\"train_acc\", self.train_acc, prog_bar=True, on_epoch=True, on_step=False)\n",
" return loss\n",
"\n",
" def validation_step(self, batch, batch_idx):\n",
" x, y = batch\n",
" logits = self(x)\n",
" loss = self.criterion(logits, y)\n",
" preds = torch.argmax(logits, dim=1)\n",
" self.val_acc(preds, y)\n",
" self.log(\"val_loss\", loss, prog_bar=True, on_epoch=True)\n",
" self.log(\"val_acc\", self.val_acc, prog_bar=True, on_epoch=True)\n",
"\n",
" def on_train_epoch_end(self):\n",
" lr = self.optimizers().param_groups[0][\"lr\"]\n",
" self.log(\"learning_rate\", lr)\n",
"\n",
" def test_step(self, batch, batch_idx):\n",
" x, y = batch\n",
" logits = self(x)\n",
" preds = torch.argmax(logits, dim=1)\n",
" self.test_acc(preds, y)\n",
" self.log(\"test_acc\", self.test_acc, prog_bar=True)\n",
" self.test_preds.append(preds.cpu())\n",
" self.test_labels.append(y.cpu())\n",
"\n",
" def on_test_epoch_end(self):\n",
" all_preds = torch.cat(self.test_preds)\n",
" all_labels = torch.cat(self.test_labels)\n",
"\n",
" self.conf_mat = self.conf_mat.to(\"cpu\")\n",
" cm = self.conf_mat(all_preds, all_labels).numpy()\n",
"\n",
" # ── Hitung per-class accuracy ──\n",
" per_class = cm.diagonal() / cm.sum(axis=1)\n",
" log.info(_box(\"📊 HASIL TEST\"))\n",
" for i, (name, acc) in enumerate(zip(self.class_names, per_class)):\n",
" log.info(f\" Akurasi [{name:>15}] : {acc:.4f} ({int(cm[i, i])}/{int(cm[i].sum())})\")\n",
"\n",
" # ── Simpan confusion matrix ──\n",
" for fig, fname in [\n",
" (self._plot_confusion_matrix(cm), \"confusion_matrix.png\"),\n",
" ]:\n",
" path = os.path.join(self.results_dir, fname)\n",
" fig.savefig(path, dpi=150, bbox_inches=\"tight\")\n",
" plt.close(fig)\n",
" log.info(f\" Disimpan → {path}\")\n",
"\n",
" fig2 = self._plot_confusion_matrix(cm)\n",
" self.logger.experiment.add_figure(\"Test/Confusion Matrix\", fig2, self.current_epoch)\n",
" plt.close(fig2)\n",
"\n",
" self.conf_mat.reset()\n",
" self.test_preds = []\n",
" self.test_labels = []\n",
"\n",
" def configure_optimizers(self):\n",
" optimizer = torch.optim.AdamW(\n",
" [\n",
" {\"params\": self.model.features.parameters(), \"lr\": 1e-4},\n",
" {\"params\": self.model.classifier.parameters(), \"lr\": 1e-3},\n",
" ],\n",
" weight_decay=1e-3,\n",
" )\n",
" scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(\n",
" optimizer, T_max=30, eta_min=1e-6\n",
" )\n",
" return {\n",
" \"optimizer\" : optimizer,\n",
" \"lr_scheduler\": {\"scheduler\": scheduler, \"monitor\": \"val_loss\"},\n",
" }\n",
"\n",
" def _plot_confusion_matrix(self, cm):\n",
" cm_norm = cm.astype(\"float\") / cm.sum(axis=1, keepdims=True)\n",
" fig, axes = plt.subplots(1, 2, figsize=(14, 5))\n",
" fig.suptitle(\"Confusion Matrix — MobileNetV2\", fontsize=14, fontweight=\"bold\")\n",
"\n",
" for ax, data, fmt, title in zip(\n",
" axes,\n",
" [cm, cm_norm],\n",
" [\"d\", \".2f\"],\n",
" [\"Count\", \"Normalized\"],\n",
" ):\n",
" sns.heatmap(\n",
" data, annot=True, fmt=fmt, cmap=\"Blues\",\n",
" xticklabels=self.class_names,\n",
" yticklabels=self.class_names,\n",
" ax=ax,\n",
" )\n",
" ax.set_title(title)\n",
" ax.set_xlabel(\"Predicted\")\n",
" ax.set_ylabel(\"Actual\")\n",
"\n",
" plt.tight_layout()\n",
" return fig\n",
"\n",
"\n",
"# ── Callbacks & logger ────────────────────────────────────\n",
"save_pt_callback = SavePTCallback(save_dir=WEIGHTS_DIR)\n",
"early_stop_callback = EarlyStopping(monitor=\"val_loss\", patience=12, mode=\"min\")\n",
"progress_bar = TQDMProgressBar(refresh_rate=10)\n",
"\n",
"tb_logger = TensorBoardLogger(save_dir=\"runs\", name=os.path.basename(RUN_DIR), version=0)\n",
"profiler = SimpleProfiler(dirpath=PROFILER_DIR, filename=\"profiler_report\")\n",
"\n",
"# ── Training ──────────────────────────────────────────────\n",
"model = SmartBinClassifier(NUM_CLASSES, LR, CLASS_NAMES, RESULTS_DIR)\n",
"\n",
"trainer = L.Trainer(\n",
" max_epochs = MAX_EPOCHS,\n",
" callbacks = [save_pt_callback, early_stop_callback, progress_bar],\n",
" accelerator = \"gpu\" if torch.cuda.is_available() else \"cpu\",\n",
" devices = 1,\n",
" enable_model_summary = True,\n",
" log_every_n_steps = 5,\n",
" logger = tb_logger,\n",
" profiler = profiler,\n",
" deterministic = True,\n",
")\n",
"\n",
"log.info(_box(\"🚀 MULAI TRAINING\"))\n",
"log.info(f\" {'Epoch':>5} │ {'train_loss':>10} {'val_loss':>10} │ {'train_acc':>9} {'val_acc':>9} │ {'gap':>7} │ Status\")\n",
"log.info(f\" {_sep('─', 80)}\")\n",
"\n",
"t0 = time.time()\n",
"trainer.fit(model, train_loader, val_loader)\n",
"elapsed = time.time() - t0\n",
"\n",
"log.info(f\"\\n Total waktu training : {elapsed / 60:.1f} menit ({elapsed:.0f} detik)\")\n",
"log.info(f\" Early stop epoch : {trainer.current_epoch}\")\n",
"log.info(f\" Best val_acc : {save_pt_callback.best_acc:.4f}\")\n",
"\n",
"# ── Test ──────────────────────────────────────────────────\n",
"log.info(_box(\"🧪 TESTING MODEL TERBAIK\"))\n",
"\n",
"best_weights = torch.load(os.path.join(WEIGHTS_DIR, \"best.pt\"), weights_only=True)\n",
"model.model.load_state_dict(best_weights[\"model_state\"])\n",
"\n",
"log.info(f\" Loaded : weights/best.pt\")\n",
"log.info(f\" Epoch : {best_weights.get('epoch', '?')}\")\n",
"log.info(f\" Val acc : {best_weights.get('val_acc', 0):.4f}\")\n",
"log.info(f\" Seed : {best_weights.get('seed', 'N/A')}\")\n",
"\n",
"t1 = time.time()\n",
"trainer.test(model, test_loader, ckpt_path=None)\n",
"log.info(f\" Waktu test : {time.time() - t1:.1f} detik\")\n",
"\n",
"# ── Summary ───────────────────────────────────────────────\n",
"log.info(_box(\"✅ SELESAI — RINGKASAN\"))\n",
"abs_run = os.path.abspath(RUN_DIR)\n",
"log.info(f\" Run dir : {abs_run}\")\n",
"log.info(f\" ├── weights/best.pt → val_acc: {save_pt_callback.best_acc:.4f}\")\n",
"log.info(f\" ├── weights/last.pt\")\n",
"log.info(f\" ├── results/confusion_matrix.png\")\n",
"log.info(f\" ├── profiler/profiler_report.txt\")\n",
"log.info(f\" └── tensorboard/\")\n",
"log.info(f\"\\n Total waktu : {(time.time() - t0) / 60:.1f} menit\")\n",
"log.info(f\"\\n 👉 Buka TensorBoard:\")\n",
"log.info(f\" tensorboard --logdir=runs/\")\n",
"log.info(_sep(\"═\"))"
]
}
],
"metadata": {
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 5
}