1146 lines
51 KiB
Plaintext
1146 lines
51 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "f8d41c5b",
|
||
"metadata": {},
|
||
"source": [
|
||
"Training dengan weight Imagenet_v1"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "6194bec9",
|
||
"metadata": {
|
||
"vscode": {
|
||
"languageId": "plaintext"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"import os\n",
|
||
"\n",
|
||
"# ✅ HARUS di paling atas, sebelum import torch\n",
|
||
"os.environ[\"CUDA_LAUNCH_BLOCKING\"] = \"1\"\n",
|
||
"\n",
|
||
"import torch\n",
|
||
"import torch.nn as nn\n",
|
||
"from torch.utils.data import DataLoader\n",
|
||
"from torchvision import datasets, transforms, models\n",
|
||
"import lightning as L\n",
|
||
"from torchmetrics import Accuracy, ConfusionMatrix\n",
|
||
"from lightning.pytorch.callbacks import EarlyStopping, TQDMProgressBar, Callback\n",
|
||
"from lightning.pytorch.loggers import TensorBoardLogger\n",
|
||
"from lightning.pytorch.profilers import SimpleProfiler\n",
|
||
"import matplotlib.pyplot as plt\n",
|
||
"import matplotlib\n",
|
||
"matplotlib.use(\"Agg\")\n",
|
||
"import seaborn as sns\n",
|
||
"import logging\n",
|
||
"import random\n",
|
||
"import time\n",
|
||
"import numpy as np\n",
|
||
"\n",
|
||
"# =============================================\n",
|
||
"# SETTING KONFIGURASI DI SINI\n",
|
||
"# =============================================\n",
|
||
"\n",
|
||
"TRAIN_DIR = \"Train 6\"\n",
|
||
"VAL_DIR = \"Val 6\"\n",
|
||
"TEST_DIR = \"Test 6\"\n",
|
||
"\n",
|
||
"NUM_CLASSES = 3\n",
|
||
"BATCH_SIZE = 16\n",
|
||
"MAX_EPOCHS = 100\n",
|
||
"LR = 1e-5\n",
|
||
"IMG_SIZE = 224\n",
|
||
"NUM_WORKERS = 4\n",
|
||
"BASE_DIR = \"runs 2\"\n",
|
||
"SEED = 30\n",
|
||
"\n",
|
||
"EMA_DECAY = 0.999 # ← Decay untuk EMA (0.999–0.9999 umum dipakai)\n",
|
||
"\n",
|
||
"# =============================================\n",
|
||
"\n",
|
||
"\n",
|
||
"# ── Logger setup ─────────────────────────────────────────\n",
|
||
"class _SectionFormatter(logging.Formatter):\n",
|
||
" GREY = \"\\x1b[38;5;245m\"\n",
|
||
" GREEN = \"\\x1b[32;1m\"\n",
|
||
" YELLOW = \"\\x1b[33;1m\"\n",
|
||
" CYAN = \"\\x1b[36;1m\"\n",
|
||
" BOLD = \"\\x1b[1m\"\n",
|
||
" RESET = \"\\x1b[0m\"\n",
|
||
"\n",
|
||
" LEVEL_COLORS = {\n",
|
||
" logging.DEBUG: GREY,\n",
|
||
" logging.INFO: \"\",\n",
|
||
" logging.WARNING: YELLOW,\n",
|
||
" logging.ERROR: \"\\x1b[31;1m\",\n",
|
||
" logging.CRITICAL: \"\\x1b[41;1m\",\n",
|
||
" }\n",
|
||
"\n",
|
||
" def format(self, record):\n",
|
||
" color = self.LEVEL_COLORS.get(record.levelno, \"\")\n",
|
||
" msg = super().format(record)\n",
|
||
" if color:\n",
|
||
" return f\"{color}{msg}{self.RESET}\"\n",
|
||
" return msg\n",
|
||
"\n",
|
||
"\n",
|
||
"def _setup_logger():\n",
|
||
" logger = logging.getLogger(\"train\")\n",
|
||
" logger.setLevel(logging.DEBUG)\n",
|
||
" logger.propagate = False\n",
|
||
" if not logger.handlers:\n",
|
||
" ch = logging.StreamHandler()\n",
|
||
" ch.setFormatter(_SectionFormatter(\"%(message)s\"))\n",
|
||
" logger.addHandler(ch)\n",
|
||
" return logger\n",
|
||
"\n",
|
||
"\n",
|
||
"log = _setup_logger()\n",
|
||
"\n",
|
||
"\n",
|
||
"def _box(title: str, width: int = 60) -> str:\n",
|
||
" inner = f\" {title} \"\n",
|
||
" pad = max(0, width - len(inner) - 2)\n",
|
||
" l, r = pad // 2, pad - pad // 2\n",
|
||
" bar = \"─\" * width\n",
|
||
" return f\"\\n┌{bar}┐\\n│{' ' * l}{inner}{' ' * r}│\\n└{bar}┘\"\n",
|
||
"\n",
|
||
"\n",
|
||
"def _sep(char: str = \"─\", width: int = 62) -> str:\n",
|
||
" return char * width\n",
|
||
"\n",
|
||
"\n",
|
||
"# ── Seed ─────────────────────────────────────────────────\n",
|
||
"log.info(_box(\"🌱 INISIALISASI SEED & LINGKUNGAN\"))\n",
|
||
"\n",
|
||
"random.seed(SEED)\n",
|
||
"np.random.seed(SEED)\n",
|
||
"torch.manual_seed(SEED)\n",
|
||
"torch.backends.cudnn.deterministic = True\n",
|
||
"torch.backends.cudnn.benchmark = False\n",
|
||
"L.seed_everything(SEED, workers=True)\n",
|
||
"\n",
|
||
"log.info(f\" Seed : {SEED}\")\n",
|
||
"log.info(f\" CUDA tersedia : {torch.cuda.is_available()}\")\n",
|
||
"if torch.cuda.is_available():\n",
|
||
" log.info(f\" GPU : {torch.cuda.get_device_name(0)}\")\n",
|
||
" log.info(f\" VRAM : {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB\")\n",
|
||
"\n",
|
||
"\n",
|
||
"# ── Run dir ───────────────────────────────────────────────\n",
|
||
"def get_next_run_dir(base_dir=BASE_DIR, prefix=\"train\"):\n",
|
||
" os.makedirs(base_dir, exist_ok=True)\n",
|
||
" existing = [\n",
|
||
" d for d in os.listdir(base_dir)\n",
|
||
" if os.path.isdir(os.path.join(base_dir, d)) and d.startswith(prefix)\n",
|
||
" ]\n",
|
||
" numbers = []\n",
|
||
" for d in existing:\n",
|
||
" try:\n",
|
||
" numbers.append(int(d.replace(prefix, \"\")))\n",
|
||
" except Exception:\n",
|
||
" pass\n",
|
||
" next_num = max(numbers) + 1 if numbers else 1\n",
|
||
" run_dir = os.path.join(base_dir, f\"{prefix}{next_num}\")\n",
|
||
" os.makedirs(run_dir, exist_ok=True)\n",
|
||
" return run_dir\n",
|
||
"\n",
|
||
"\n",
|
||
"RUN_DIR = get_next_run_dir(BASE_DIR, prefix=\"train\")\n",
|
||
"WEIGHTS_DIR = os.path.join(RUN_DIR, \"weights\")\n",
|
||
"RESULTS_DIR = os.path.join(RUN_DIR, \"results\")\n",
|
||
"PROFILER_DIR = os.path.join(RUN_DIR, \"profiler\")\n",
|
||
"\n",
|
||
"for d in (WEIGHTS_DIR, RESULTS_DIR, PROFILER_DIR):\n",
|
||
" os.makedirs(d, exist_ok=True)\n",
|
||
"\n",
|
||
"log.info(f\"\\n Run dir : {os.path.abspath(RUN_DIR)}\")\n",
|
||
"\n",
|
||
"\n",
|
||
"# ── Transform ─────────────────────────────────────────────\n",
|
||
"train_transform = transforms.Compose([\n",
|
||
"\n",
|
||
" transforms.Resize((IMG_SIZE, IMG_SIZE)),\n",
|
||
"\n",
|
||
" transforms.RandomHorizontalFlip(p=0.5),\n",
|
||
"\n",
|
||
" transforms.RandomAffine(\n",
|
||
" degrees=3,\n",
|
||
" translate=(0.03, 0.03),\n",
|
||
" scale=(0.98, 1.02)\n",
|
||
" ),\n",
|
||
"\n",
|
||
" transforms.ColorJitter(\n",
|
||
" brightness=0.25,\n",
|
||
" contrast=0.2,\n",
|
||
" saturation=0.1\n",
|
||
" ),\n",
|
||
"\n",
|
||
" transforms.ToTensor(),\n",
|
||
"\n",
|
||
" transforms.Normalize(\n",
|
||
" [0.485, 0.456, 0.406],\n",
|
||
" [0.229, 0.224, 0.225]\n",
|
||
" ),\n",
|
||
"])\n",
|
||
"\n",
|
||
"val_test_transform = transforms.Compose([\n",
|
||
" transforms.Resize((IMG_SIZE, IMG_SIZE)),\n",
|
||
" transforms.ToTensor(),\n",
|
||
" transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),\n",
|
||
"])\n",
|
||
"\n",
|
||
"train_dataset = datasets.ImageFolder(TRAIN_DIR, transform=train_transform)\n",
|
||
"val_dataset = datasets.ImageFolder(VAL_DIR, transform=val_test_transform)\n",
|
||
"test_dataset = datasets.ImageFolder(TEST_DIR, transform=val_test_transform)\n",
|
||
"\n",
|
||
"CLASS_NAMES = train_dataset.classes\n",
|
||
"\n",
|
||
"train_loader = DataLoader(\n",
|
||
" train_dataset, batch_size=BATCH_SIZE, shuffle=True,\n",
|
||
" num_workers=NUM_WORKERS, pin_memory=True,\n",
|
||
" persistent_workers=True, prefetch_factor=2,\n",
|
||
")\n",
|
||
"val_loader = DataLoader(\n",
|
||
" val_dataset, batch_size=BATCH_SIZE, shuffle=False,\n",
|
||
" num_workers=NUM_WORKERS, pin_memory=True,\n",
|
||
" persistent_workers=True, prefetch_factor=2,\n",
|
||
")\n",
|
||
"test_loader = DataLoader(\n",
|
||
" test_dataset, batch_size=BATCH_SIZE, shuffle=False,\n",
|
||
" num_workers=NUM_WORKERS, pin_memory=True,\n",
|
||
" persistent_workers=True, prefetch_factor=2,\n",
|
||
")\n",
|
||
"\n",
|
||
"log.info(_box(\"📂 DATASET\"))\n",
|
||
"log.info(f\" Kelas : {CLASS_NAMES}\")\n",
|
||
"log.info(f\" Train : {len(train_dataset):>6,} gambar ({len(train_loader)} batch)\")\n",
|
||
"log.info(f\" Validation : {len(val_dataset):>6,} gambar ({len(val_loader)} batch)\")\n",
|
||
"log.info(f\" Test : {len(test_dataset):>6,} gambar ({len(test_loader)} batch)\")\n",
|
||
"\n",
|
||
"log.info(_box(\"⚙️ KONFIGURASI TRAINING\"))\n",
|
||
"log.info(f\" Backbone : MobileNetV2 (ImageNet pretrained)\")\n",
|
||
"log.info(f\" Num classes : {NUM_CLASSES}\")\n",
|
||
"log.info(f\" Batch size : {BATCH_SIZE}\")\n",
|
||
"log.info(f\" Max epochs : {MAX_EPOCHS}\")\n",
|
||
"log.info(f\" Learning rate : {LR}\")\n",
|
||
"log.info(f\" Image size : {IMG_SIZE}×{IMG_SIZE}\")\n",
|
||
"log.info(f\" Seed : {SEED}\")\n",
|
||
"log.info(f\" EMA decay : {EMA_DECAY}\")\n",
|
||
"\n",
|
||
"\n",
|
||
"# ══════════════════════════════════════════════════════════\n",
|
||
"# EMA CALLBACK\n",
|
||
"# - Shadow weights diinisialisasi saat training dimulai\n",
|
||
"# - Update setiap akhir batch: shadow = decay*shadow + (1-decay)*param\n",
|
||
"# - Sebelum val/test: swap ke EMA weights\n",
|
||
"# - Setelah val/test : restore kembali ke weights asli\n",
|
||
"# - best.pt menyimpan EMA weights (bukan raw weights)\n",
|
||
"# ══════════════════════════════════════════════════════════\n",
|
||
"class EMACallback(Callback):\n",
|
||
" \"\"\"Exponential Moving Average of model weights.\"\"\"\n",
|
||
"\n",
|
||
" def __init__(self, decay: float = 0.9999):\n",
|
||
" super().__init__()\n",
|
||
" self.decay = decay\n",
|
||
" self.shadow: dict[str, torch.Tensor] = {} # EMA weights\n",
|
||
" self.backup: dict[str, torch.Tensor] = {} # raw weights (saat swap)\n",
|
||
"\n",
|
||
" # ── Init shadow dari bobot awal ──────────────────────\n",
|
||
" def on_train_start(self, trainer, pl_module):\n",
|
||
" self.shadow = {\n",
|
||
" name: param.data.clone().detach()\n",
|
||
" for name, param in pl_module.model.named_parameters()\n",
|
||
" if param.requires_grad\n",
|
||
" }\n",
|
||
" log.info(f\" [EMA] Shadow weights diinisialisasi (decay={self.decay})\")\n",
|
||
"\n",
|
||
" # ── Update shadow setiap akhir batch ─────────────────\n",
|
||
" def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx):\n",
|
||
" d = self.decay\n",
|
||
" with torch.no_grad():\n",
|
||
" for name, param in pl_module.model.named_parameters():\n",
|
||
" if param.requires_grad and name in self.shadow:\n",
|
||
" self.shadow[name].mul_(d).add_((1.0 - d) * param.data)\n",
|
||
"\n",
|
||
" # ── Swap ke EMA sebelum validasi ─────────────────────\n",
|
||
" def on_validation_start(self, trainer, pl_module):\n",
|
||
" if trainer.sanity_checking:\n",
|
||
" return\n",
|
||
" self._apply_shadow(pl_module)\n",
|
||
"\n",
|
||
" def on_validation_end(self, trainer, pl_module):\n",
|
||
" if trainer.sanity_checking:\n",
|
||
" return\n",
|
||
" self._restore(pl_module)\n",
|
||
"\n",
|
||
" # ── Swap ke EMA sebelum test ──────────────────────────\n",
|
||
" def on_test_start(self, trainer, pl_module):\n",
|
||
" self._apply_shadow(pl_module)\n",
|
||
"\n",
|
||
" def on_test_end(self, trainer, pl_module):\n",
|
||
" self._restore(pl_module)\n",
|
||
"\n",
|
||
" # ── Helper ───────────────────────────────────────────\n",
|
||
" def _apply_shadow(self, pl_module):\n",
|
||
" \"\"\"Tukar param model dengan shadow EMA, simpan aslinya ke backup.\"\"\"\n",
|
||
" for name, param in pl_module.model.named_parameters():\n",
|
||
" if name in self.shadow:\n",
|
||
" self.backup[name] = param.data.clone()\n",
|
||
" param.data.copy_(self.shadow[name])\n",
|
||
"\n",
|
||
" def _restore(self, pl_module):\n",
|
||
" \"\"\"Kembalikan param model ke nilai asli (raw training weights).\"\"\"\n",
|
||
" for name, param in pl_module.model.named_parameters():\n",
|
||
" if name in self.backup:\n",
|
||
" param.data.copy_(self.backup[name])\n",
|
||
" self.backup.clear()\n",
|
||
"\n",
|
||
" def state_dict(self):\n",
|
||
" \"\"\"Untuk checkpoint Lightning (opsional).\"\"\"\n",
|
||
" return {\"shadow\": {k: v.cpu() for k, v in self.shadow.items()},\n",
|
||
" \"decay\": self.decay}\n",
|
||
"\n",
|
||
" def load_state_dict(self, state_dict):\n",
|
||
" self.decay = state_dict[\"decay\"]\n",
|
||
" self.shadow = state_dict[\"shadow\"]\n",
|
||
"\n",
|
||
"\n",
|
||
"# ══════════════════════════════════════════════════════════\n",
|
||
"# SAVE PT CALLBACK (BUG FIX: baca metrik dari pl_module)\n",
|
||
"#\n",
|
||
"# Masalah lama:\n",
|
||
"# metrics.get(\"train_loss\", tensor(0)) di on_validation_epoch_end\n",
|
||
"# sering mengembalikan 0 karena callback_metrics belum tentu\n",
|
||
"# ter-update ketika hook ini dipanggil pada awal epoch.\n",
|
||
"#\n",
|
||
"# Fix:\n",
|
||
"# SmartBinClassifier menyimpan _train_loss_epoch dan _train_acc_epoch\n",
|
||
"# di on_train_epoch_end (yang pasti sudah selesai sebelum val dimulai).\n",
|
||
"# Callback membaca atribut tersebut langsung dari pl_module.\n",
|
||
"# ══════════════════════════════════════════════════════════\n",
|
||
"class SavePTCallback(Callback):\n",
|
||
" def __init__(self, save_dir: str, ema_callback: EMACallback):\n",
|
||
" self.save_dir = save_dir\n",
|
||
" self.ema_callback = ema_callback\n",
|
||
" self.best_acc = 0.0\n",
|
||
" self._prev_val_loss = float(\"inf\")\n",
|
||
" os.makedirs(save_dir, exist_ok=True)\n",
|
||
"\n",
|
||
" def on_validation_epoch_end(self, trainer, pl_module):\n",
|
||
" if trainer.sanity_checking:\n",
|
||
" return\n",
|
||
"\n",
|
||
" metrics = trainer.callback_metrics\n",
|
||
"\n",
|
||
" # ✅ FIX: train metrics dibaca dari atribut module (sudah di-set\n",
|
||
" # di on_train_epoch_end, jadi dijamin tidak 0 palsu)\n",
|
||
" train_loss = pl_module._train_loss_epoch # ← dari modul\n",
|
||
" train_acc = pl_module._train_acc_epoch # ← dari modul\n",
|
||
"\n",
|
||
" val_loss = metrics.get(\"val_loss\", torch.tensor(0.0)).item()\n",
|
||
" val_acc = metrics.get(\"val_acc\", torch.tensor(0.0)).item()\n",
|
||
" epoch = trainer.current_epoch\n",
|
||
"\n",
|
||
" # ── Selalu simpan last.pt (pakai EMA weights karena kita sudah swap) ──\n",
|
||
" ckpt = {\n",
|
||
" \"epoch\" : epoch,\n",
|
||
" \"model_state\" : pl_module.model.state_dict(), # EMA weights (sudah swap)\n",
|
||
" \"val_acc\" : val_acc,\n",
|
||
" \"class_names\" : pl_module.class_names,\n",
|
||
" \"seed\" : SEED,\n",
|
||
" \"ema_decay\" : self.ema_callback.decay,\n",
|
||
" }\n",
|
||
" torch.save(ckpt, os.path.join(self.save_dir, \"last.pt\"))\n",
|
||
"\n",
|
||
" # ── Simpan best.pt jika val_acc meningkat ──\n",
|
||
" gap = val_loss - train_loss\n",
|
||
" if val_acc > self.best_acc:\n",
|
||
" self.best_acc = val_acc\n",
|
||
" torch.save(ckpt, os.path.join(self.save_dir, \"best.pt\"))\n",
|
||
" status = \"🏆 BEST\"\n",
|
||
" elif gap > 0.15:\n",
|
||
" status = \"⚠️ OVERFIT\"\n",
|
||
" elif val_loss < self._prev_val_loss:\n",
|
||
" status = \"📉 improving\"\n",
|
||
" else:\n",
|
||
" status = \"➡️ stagnant\"\n",
|
||
"\n",
|
||
" self._prev_val_loss = val_loss\n",
|
||
"\n",
|
||
" log.info(\n",
|
||
" f\" Epoch {epoch:>3}/{MAX_EPOCHS} │ \"\n",
|
||
" f\"loss {train_loss:.4f}→{val_loss:.4f} │ \"\n",
|
||
" f\"acc {train_acc:.4f}→{val_acc:.4f} │ \"\n",
|
||
" f\"gap {gap:+.4f} │ {status}\"\n",
|
||
" )\n",
|
||
"\n",
|
||
"\n",
|
||
"# ── Lightning Module ──────────────────────────────────────\n",
|
||
"class SmartBinClassifier(L.LightningModule):\n",
|
||
" def __init__(self, num_classes: int, lr: float, class_names: list, results_dir: str):\n",
|
||
" super().__init__()\n",
|
||
" self.save_hyperparameters()\n",
|
||
" self.lr = lr\n",
|
||
" self.class_names = class_names\n",
|
||
" self.results_dir = results_dir\n",
|
||
"\n",
|
||
" # ✅ FIX: atribut untuk menyimpan metrik train per-epoch\n",
|
||
" # Di-set di on_train_epoch_end → dijamin valid saat val callback jalan\n",
|
||
" self._train_loss_epoch: float = 0.0\n",
|
||
" self._train_acc_epoch: float = 0.0\n",
|
||
"\n",
|
||
" # ── Backbone ──\n",
|
||
" self.model = models.mobilenet_v2(weights=\"IMAGENET1K_V1\")\n",
|
||
"\n",
|
||
" for param in self.model.features.parameters():\n",
|
||
" param.requires_grad = False\n",
|
||
" for param in self.model.features[-7:].parameters():\n",
|
||
" param.requires_grad = True\n",
|
||
"\n",
|
||
" in_features = self.model.classifier[-1].in_features\n",
|
||
" self.model.classifier = nn.Sequential(\n",
|
||
" nn.Linear(in_features, 128, bias=False),\n",
|
||
" nn.BatchNorm1d(128),\n",
|
||
" nn.ReLU(inplace=True),\n",
|
||
" nn.Dropout(0.4),\n",
|
||
"\n",
|
||
" nn.Linear(128, 64, bias=False),\n",
|
||
" nn.BatchNorm1d(64),\n",
|
||
" nn.ReLU(inplace=True),\n",
|
||
" nn.Dropout(0.2),\n",
|
||
"\n",
|
||
" nn.Linear(64, NUM_CLASSES)\n",
|
||
" )\n",
|
||
"\n",
|
||
" self.criterion = nn.CrossEntropyLoss(label_smoothing=0.05)\n",
|
||
" self.train_acc = Accuracy(task=\"multiclass\", num_classes=num_classes)\n",
|
||
" self.val_acc = Accuracy(task=\"multiclass\", num_classes=num_classes)\n",
|
||
" self.test_acc = Accuracy(task=\"multiclass\", num_classes=num_classes)\n",
|
||
" self.conf_mat = ConfusionMatrix(task=\"multiclass\", num_classes=num_classes)\n",
|
||
"\n",
|
||
" # Akumulator loss untuk hitung rata-rata per epoch secara manual\n",
|
||
" self._train_loss_sum: float = 0.0\n",
|
||
" self._train_loss_count: int = 0\n",
|
||
"\n",
|
||
" self.test_preds = []\n",
|
||
" self.test_labels = []\n",
|
||
"\n",
|
||
" def forward(self, x):\n",
|
||
" return self.model(x)\n",
|
||
"\n",
|
||
" def training_step(self, batch, batch_idx):\n",
|
||
" x, y = batch\n",
|
||
" logits = self(x)\n",
|
||
" loss = self.criterion(logits, y)\n",
|
||
" preds = torch.argmax(logits, dim=1)\n",
|
||
" self.train_acc(preds, y)\n",
|
||
"\n",
|
||
" # ✅ Akumulasi loss manual agar on_train_epoch_end bisa hitung rata-rata\n",
|
||
" self._train_loss_sum += loss.detach().item()\n",
|
||
" self._train_loss_count += 1\n",
|
||
"\n",
|
||
" self.log(\"train_loss\", loss, prog_bar=True, on_epoch=True, on_step=False)\n",
|
||
" self.log(\"train_acc\", self.train_acc, prog_bar=True, on_epoch=True, on_step=False)\n",
|
||
" return loss\n",
|
||
"\n",
|
||
" def on_train_epoch_end(self):\n",
|
||
" # ✅ FIX: simpan metrik ke atribut — callback baca dari sini, bukan\n",
|
||
" # dari callback_metrics yang kadang belum ter-update\n",
|
||
" self._train_loss_epoch = (\n",
|
||
" self._train_loss_sum / self._train_loss_count\n",
|
||
" if self._train_loss_count > 0 else 0.0\n",
|
||
" )\n",
|
||
" self._train_acc_epoch = self.train_acc.compute().item()\n",
|
||
"\n",
|
||
" # Reset akumulator untuk epoch berikutnya\n",
|
||
" self._train_loss_sum = 0.0\n",
|
||
" self._train_loss_count = 0\n",
|
||
"\n",
|
||
" lr = self.optimizers().param_groups[0][\"lr\"]\n",
|
||
" self.log(\"learning_rate\", lr)\n",
|
||
"\n",
|
||
" def validation_step(self, batch, batch_idx):\n",
|
||
" x, y = batch\n",
|
||
" logits = self(x)\n",
|
||
" loss = self.criterion(logits, y)\n",
|
||
" preds = torch.argmax(logits, dim=1)\n",
|
||
" self.val_acc(preds, y)\n",
|
||
" self.log(\"val_loss\", loss, prog_bar=True, on_epoch=True)\n",
|
||
" self.log(\"val_acc\", self.val_acc, prog_bar=True, on_epoch=True)\n",
|
||
"\n",
|
||
" def test_step(self, batch, batch_idx):\n",
|
||
" x, y = batch\n",
|
||
" logits = self(x)\n",
|
||
" preds = torch.argmax(logits, dim=1)\n",
|
||
" self.test_acc(preds, y)\n",
|
||
" self.log(\"test_acc\", self.test_acc, prog_bar=True)\n",
|
||
" self.test_preds.append(preds.cpu())\n",
|
||
" self.test_labels.append(y.cpu())\n",
|
||
"\n",
|
||
" def on_test_epoch_end(self):\n",
|
||
" all_preds = torch.cat(self.test_preds)\n",
|
||
" all_labels = torch.cat(self.test_labels)\n",
|
||
"\n",
|
||
" self.conf_mat = self.conf_mat.to(\"cpu\")\n",
|
||
" cm = self.conf_mat(all_preds, all_labels).numpy()\n",
|
||
"\n",
|
||
" per_class = cm.diagonal() / cm.sum(axis=1)\n",
|
||
" log.info(_box(\"📊 HASIL TEST\"))\n",
|
||
" for i, (name, acc) in enumerate(zip(self.class_names, per_class)):\n",
|
||
" log.info(f\" Akurasi [{name:>15}] : {acc:.4f} ({int(cm[i, i])}/{int(cm[i].sum())})\")\n",
|
||
"\n",
|
||
" for fig, fname in [\n",
|
||
" (self._plot_confusion_matrix(cm), \"confusion_matrix.png\"),\n",
|
||
" ]:\n",
|
||
" path = os.path.join(self.results_dir, fname)\n",
|
||
" fig.savefig(path, dpi=150, bbox_inches=\"tight\")\n",
|
||
" plt.close(fig)\n",
|
||
" log.info(f\" Disimpan → {path}\")\n",
|
||
"\n",
|
||
" fig2 = self._plot_confusion_matrix(cm)\n",
|
||
" self.logger.experiment.add_figure(\"Test/Confusion Matrix\", fig2, self.current_epoch)\n",
|
||
" plt.close(fig2)\n",
|
||
"\n",
|
||
" self.conf_mat.reset()\n",
|
||
" self.test_preds = []\n",
|
||
" self.test_labels = []\n",
|
||
"\n",
|
||
" def configure_optimizers(self):\n",
|
||
" optimizer = torch.optim.AdamW(\n",
|
||
" [\n",
|
||
" {\"params\": self.model.features[-7:].parameters(), \"lr\": 1e-5},\n",
|
||
" {\"params\": self.model.classifier.parameters(), \"lr\": 1e-3},\n",
|
||
" ],\n",
|
||
" weight_decay=1e-3,\n",
|
||
" )\n",
|
||
" scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(\n",
|
||
" optimizer, T_max=MAX_EPOCHS, eta_min=1e-6\n",
|
||
" )\n",
|
||
" return {\n",
|
||
" \"optimizer\" : optimizer,\n",
|
||
" \"lr_scheduler\": {\"scheduler\": scheduler, \"monitor\": \"val_loss\"},\n",
|
||
" }\n",
|
||
"\n",
|
||
" def _plot_confusion_matrix(self, cm):\n",
|
||
" cm_norm = cm.astype(\"float\") / cm.sum(axis=1, keepdims=True)\n",
|
||
" fig, axes = plt.subplots(1, 2, figsize=(14, 5))\n",
|
||
" fig.suptitle(\"Confusion Matrix — MobileNetV2\", fontsize=14, fontweight=\"bold\")\n",
|
||
"\n",
|
||
" for ax, data, fmt, title in zip(\n",
|
||
" axes,\n",
|
||
" [cm, cm_norm],\n",
|
||
" [\"d\", \".2f\"],\n",
|
||
" [\"Count\", \"Normalized\"],\n",
|
||
" ):\n",
|
||
" sns.heatmap(\n",
|
||
" data, annot=True, fmt=fmt, cmap=\"Blues\",\n",
|
||
" xticklabels=self.class_names,\n",
|
||
" yticklabels=self.class_names,\n",
|
||
" ax=ax,\n",
|
||
" )\n",
|
||
" ax.set_title(title)\n",
|
||
" ax.set_xlabel(\"Predicted\")\n",
|
||
" ax.set_ylabel(\"Actual\")\n",
|
||
"\n",
|
||
" plt.tight_layout()\n",
|
||
" return fig\n",
|
||
"\n",
|
||
"\n",
|
||
"# ── Callbacks & logger ────────────────────────────────────\n",
|
||
"ema_callback = EMACallback(decay=EMA_DECAY)\n",
|
||
"save_pt_callback = SavePTCallback(save_dir=WEIGHTS_DIR, ema_callback=ema_callback)\n",
|
||
"early_stop_callback = EarlyStopping(monitor=\"val_loss\", patience=12, mode=\"min\")\n",
|
||
"progress_bar = TQDMProgressBar(refresh_rate=10)\n",
|
||
"\n",
|
||
"tb_logger = TensorBoardLogger(save_dir=\"runs\", name=os.path.basename(RUN_DIR), version=0)\n",
|
||
"profiler = SimpleProfiler(dirpath=PROFILER_DIR, filename=\"profiler_report\")\n",
|
||
"\n",
|
||
"# ── Training ──────────────────────────────────────────────\n",
|
||
"model = SmartBinClassifier(NUM_CLASSES, LR, CLASS_NAMES, RESULTS_DIR)\n",
|
||
"\n",
|
||
"trainer = L.Trainer(\n",
|
||
" max_epochs = MAX_EPOCHS,\n",
|
||
" callbacks = [\n",
|
||
" ema_callback, # ← EMA harus urutan pertama\n",
|
||
" save_pt_callback,\n",
|
||
" early_stop_callback,\n",
|
||
" progress_bar,\n",
|
||
" ],\n",
|
||
" accelerator = \"gpu\" if torch.cuda.is_available() else \"cpu\",\n",
|
||
" devices = 1,\n",
|
||
" enable_model_summary = True,\n",
|
||
" log_every_n_steps = 5,\n",
|
||
" logger = tb_logger,\n",
|
||
" profiler = profiler,\n",
|
||
" deterministic = True,\n",
|
||
")\n",
|
||
"\n",
|
||
"log.info(_box(\"🚀 MULAI TRAINING\"))\n",
|
||
"log.info(f\" {'Epoch':>5} │ {'train_loss':>10} {'val_loss':>10} │ {'train_acc':>9} {'val_acc':>9} │ {'gap':>7} │ Status\")\n",
|
||
"log.info(f\" {_sep('─', 80)}\")\n",
|
||
"\n",
|
||
"t0 = time.time()\n",
|
||
"trainer.fit(model, train_loader, val_loader)\n",
|
||
"elapsed = time.time() - t0\n",
|
||
"\n",
|
||
"log.info(f\"\\n Total waktu training : {elapsed / 60:.1f} menit ({elapsed:.0f} detik)\")\n",
|
||
"log.info(f\" Early stop epoch : {trainer.current_epoch}\")\n",
|
||
"log.info(f\" Best val_acc : {save_pt_callback.best_acc:.4f}\")\n",
|
||
"\n",
|
||
"# ── Test ──────────────────────────────────────────────────\n",
|
||
"log.info(_box(\"🧪 TESTING MODEL TERBAIK (EMA weights)\"))\n",
|
||
"\n",
|
||
"best_weights = torch.load(os.path.join(WEIGHTS_DIR, \"best.pt\"), weights_only=True)\n",
|
||
"model.model.load_state_dict(best_weights[\"model_state\"])\n",
|
||
"\n",
|
||
"log.info(f\" Loaded : weights/best.pt\")\n",
|
||
"log.info(f\" Epoch : {best_weights.get('epoch', '?')}\")\n",
|
||
"log.info(f\" Val acc : {best_weights.get('val_acc', 0):.4f}\")\n",
|
||
"log.info(f\" Seed : {best_weights.get('seed', 'N/A')}\")\n",
|
||
"log.info(f\" EMA dec : {best_weights.get('ema_decay', 'N/A')}\")\n",
|
||
"\n",
|
||
"# Saat test dipanggil, EMACallback.on_test_start akan swap weights.\n",
|
||
"# Karena kita sudah load EMA weights dari best.pt, restore di on_test_end\n",
|
||
"# akan kembalikan ke raw weights — aman.\n",
|
||
"t1 = time.time()\n",
|
||
"trainer.test(model, test_loader, ckpt_path=None)\n",
|
||
"log.info(f\" Waktu test : {time.time() - t1:.1f} detik\")\n",
|
||
"\n",
|
||
"# ── Summary ───────────────────────────────────────────────\n",
|
||
"log.info(_box(\"✅ SELESAI — RINGKASAN\"))\n",
|
||
"abs_run = os.path.abspath(RUN_DIR)\n",
|
||
"log.info(f\" Run dir : {abs_run}\")\n",
|
||
"log.info(f\" ├── weights/best.pt → val_acc: {save_pt_callback.best_acc:.4f} (EMA weights)\")\n",
|
||
"log.info(f\" ├── weights/last.pt (EMA weights)\")\n",
|
||
"log.info(f\" ├── results/confusion_matrix.png\")\n",
|
||
"log.info(f\" ├── profiler/profiler_report.txt\")\n",
|
||
"log.info(f\" └── tensorboard/\")\n",
|
||
"log.info(f\"\\n Total waktu : {(time.time() - t0) / 60:.1f} menit\")\n",
|
||
"log.info(f\"\\n 👉 Buka TensorBoard:\")\n",
|
||
"log.info(f\" tensorboard --logdir=runs/\")\n",
|
||
"log.info(_sep(\"═\"))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "9bd8c266",
|
||
"metadata": {},
|
||
"source": [
|
||
"Training tanpa weight Imagenet_v1"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "ba1070e2",
|
||
"metadata": {
|
||
"vscode": {
|
||
"languageId": "plaintext"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"import os\n",
|
||
"\n",
|
||
"# ✅ HARUS di paling atas, sebelum import torch\n",
|
||
"# Membuat CUDA error muncul di baris yang tepat, bukan di manual_seed\n",
|
||
"os.environ[\"CUDA_LAUNCH_BLOCKING\"] = \"1\"\n",
|
||
"\n",
|
||
"import torch\n",
|
||
"import torch.nn as nn\n",
|
||
"from torch.utils.data import DataLoader\n",
|
||
"from torchvision import datasets, transforms, models\n",
|
||
"import lightning as L\n",
|
||
"from torchmetrics import Accuracy, ConfusionMatrix\n",
|
||
"from lightning.pytorch.callbacks import EarlyStopping, TQDMProgressBar, Callback\n",
|
||
"from lightning.pytorch.loggers import TensorBoardLogger\n",
|
||
"from lightning.pytorch.profilers import SimpleProfiler\n",
|
||
"import matplotlib.pyplot as plt\n",
|
||
"import matplotlib\n",
|
||
"matplotlib.use(\"Agg\")\n",
|
||
"import seaborn as sns\n",
|
||
"import logging\n",
|
||
"import random\n",
|
||
"import time\n",
|
||
"import numpy as np\n",
|
||
"\n",
|
||
"# =============================================\n",
|
||
"# SETTING KONFIGURASI DI SINI\n",
|
||
"# =============================================\n",
|
||
"\n",
|
||
"TRAIN_DIR = \"Train 6\"\n",
|
||
"VAL_DIR = \"Val 6\"\n",
|
||
"TEST_DIR = \"Test 6\"\n",
|
||
"\n",
|
||
"NUM_CLASSES = 3\n",
|
||
"BATCH_SIZE = 16\n",
|
||
"MAX_EPOCHS = 100\n",
|
||
"LR = 1e-5\n",
|
||
"IMG_SIZE = 224\n",
|
||
"NUM_WORKERS = 4\n",
|
||
"BASE_DIR = \"runs 1\"\n",
|
||
"SEED = 30 # ← Ganti angka ini untuk hasil berbeda\n",
|
||
"\n",
|
||
"# =============================================\n",
|
||
"\n",
|
||
"\n",
|
||
"# ── Logger setup ─────────────────────────────────────────\n",
|
||
"class _SectionFormatter(logging.Formatter):\n",
|
||
" \"\"\"Formatter dengan warna ANSI untuk terminal.\"\"\"\n",
|
||
" GREY = \"\\x1b[38;5;245m\"\n",
|
||
" GREEN = \"\\x1b[32;1m\"\n",
|
||
" YELLOW = \"\\x1b[33;1m\"\n",
|
||
" CYAN = \"\\x1b[36;1m\"\n",
|
||
" BOLD = \"\\x1b[1m\"\n",
|
||
" RESET = \"\\x1b[0m\"\n",
|
||
"\n",
|
||
" LEVEL_COLORS = {\n",
|
||
" logging.DEBUG: GREY,\n",
|
||
" logging.INFO: \"\", # default, pakai warna dari message\n",
|
||
" logging.WARNING: YELLOW,\n",
|
||
" logging.ERROR: \"\\x1b[31;1m\",\n",
|
||
" logging.CRITICAL: \"\\x1b[41;1m\",\n",
|
||
" }\n",
|
||
"\n",
|
||
" def format(self, record):\n",
|
||
" color = self.LEVEL_COLORS.get(record.levelno, \"\")\n",
|
||
" msg = super().format(record)\n",
|
||
" if color:\n",
|
||
" return f\"{color}{msg}{self.RESET}\"\n",
|
||
" return msg\n",
|
||
"\n",
|
||
"\n",
|
||
"def _setup_logger():\n",
|
||
" logger = logging.getLogger(\"train\")\n",
|
||
" logger.setLevel(logging.DEBUG)\n",
|
||
" logger.propagate = False\n",
|
||
" if not logger.handlers:\n",
|
||
" ch = logging.StreamHandler()\n",
|
||
" ch.setFormatter(_SectionFormatter(\"%(message)s\"))\n",
|
||
" logger.addHandler(ch)\n",
|
||
" return logger\n",
|
||
"\n",
|
||
"\n",
|
||
"log = _setup_logger()\n",
|
||
"\n",
|
||
"\n",
|
||
"def _box(title: str, width: int = 60) -> str:\n",
|
||
" \"\"\"Buat kotak ASCII sederhana.\"\"\"\n",
|
||
" inner = f\" {title} \"\n",
|
||
" pad = max(0, width - len(inner) - 2)\n",
|
||
" l, r = pad // 2, pad - pad // 2\n",
|
||
" bar = \"─\" * width\n",
|
||
" return f\"\\n┌{bar}┐\\n│{' ' * l}{inner}{' ' * r}│\\n└{bar}┘\"\n",
|
||
"\n",
|
||
"\n",
|
||
"def _sep(char: str = \"─\", width: int = 62) -> str:\n",
|
||
" return char * width\n",
|
||
"\n",
|
||
"\n",
|
||
"# ── Seed ─────────────────────────────────────────────────\n",
|
||
"log.info(_box(\"🌱 INISIALISASI SEED & LINGKUNGAN\"))\n",
|
||
"\n",
|
||
"random.seed(SEED)\n",
|
||
"np.random.seed(SEED)\n",
|
||
"\n",
|
||
"# ✅ torch.manual_seed sudah memanggil cuda.manual_seed_all() secara internal.\n",
|
||
"# JANGAN panggil torch.cuda.manual_seed / manual_seed_all secara manual\n",
|
||
"# setelah ini — bisa memicu AcceleratorError jika CUDA context corrupt.\n",
|
||
"torch.manual_seed(SEED)\n",
|
||
"torch.backends.cudnn.deterministic = True\n",
|
||
"torch.backends.cudnn.benchmark = False\n",
|
||
"L.seed_everything(SEED, workers=True)\n",
|
||
"\n",
|
||
"log.info(f\" Seed : {SEED}\")\n",
|
||
"log.info(f\" CUDA tersedia : {torch.cuda.is_available()}\")\n",
|
||
"if torch.cuda.is_available():\n",
|
||
" log.info(f\" GPU : {torch.cuda.get_device_name(0)}\")\n",
|
||
" log.info(f\" VRAM : {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB\")\n",
|
||
"\n",
|
||
"\n",
|
||
"# ── Run dir ───────────────────────────────────────────────\n",
|
||
"def get_next_run_dir(base_dir=BASE_DIR, prefix=\"train\"):\n",
|
||
" os.makedirs(base_dir, exist_ok=True)\n",
|
||
" existing = [\n",
|
||
" d for d in os.listdir(base_dir)\n",
|
||
" if os.path.isdir(os.path.join(base_dir, d)) and d.startswith(prefix)\n",
|
||
" ]\n",
|
||
" numbers = []\n",
|
||
" for d in existing:\n",
|
||
" try:\n",
|
||
" numbers.append(int(d.replace(prefix, \"\")))\n",
|
||
" except Exception:\n",
|
||
" pass\n",
|
||
" next_num = max(numbers) + 1 if numbers else 1\n",
|
||
" run_dir = os.path.join(base_dir, f\"{prefix}{next_num}\")\n",
|
||
" os.makedirs(run_dir, exist_ok=True)\n",
|
||
" return run_dir\n",
|
||
"\n",
|
||
"\n",
|
||
"RUN_DIR = get_next_run_dir(BASE_DIR, prefix=\"train\")\n",
|
||
"WEIGHTS_DIR = os.path.join(RUN_DIR, \"weights\")\n",
|
||
"RESULTS_DIR = os.path.join(RUN_DIR, \"results\")\n",
|
||
"PROFILER_DIR = os.path.join(RUN_DIR, \"profiler\")\n",
|
||
"\n",
|
||
"for d in (WEIGHTS_DIR, RESULTS_DIR, PROFILER_DIR):\n",
|
||
" os.makedirs(d, exist_ok=True)\n",
|
||
"\n",
|
||
"log.info(f\"\\n Run dir : {os.path.abspath(RUN_DIR)}\")\n",
|
||
"\n",
|
||
"\n",
|
||
"# ── Transform ─────────────────────────────────────────────\n",
|
||
"train_transform = transforms.Compose([\n",
|
||
" transforms.Resize((IMG_SIZE, IMG_SIZE)),\n",
|
||
"\n",
|
||
" # Geometri\n",
|
||
" transforms.RandomHorizontalFlip(p=0.5),\n",
|
||
" transforms.RandomVerticalFlip(p=0.3),\n",
|
||
" transforms.RandomRotation(degrees=15),\n",
|
||
" transforms.RandomAffine(\n",
|
||
" degrees=0,\n",
|
||
" translate=(0.1, 0.1),\n",
|
||
" scale=(0.85, 1.15),\n",
|
||
" ),\n",
|
||
"\n",
|
||
" # Warna & Cahaya\n",
|
||
" transforms.ColorJitter(\n",
|
||
" brightness=0.3,\n",
|
||
" contrast=0.3,\n",
|
||
" saturation=0.2,\n",
|
||
" hue=0.05,\n",
|
||
" ),\n",
|
||
"\n",
|
||
" # Blur\n",
|
||
" transforms.RandomApply([\n",
|
||
" transforms.GaussianBlur(kernel_size=5, sigma=(0.1, 2.0))\n",
|
||
" ], p=0.3),\n",
|
||
"\n",
|
||
" transforms.ToTensor(),\n",
|
||
" transforms.Normalize([0.485, 0.456, 0.406],\n",
|
||
" [0.229, 0.224, 0.225]),\n",
|
||
" transforms.RandomErasing(\n",
|
||
" p=0.3,\n",
|
||
" scale=(0.02, 0.1),\n",
|
||
" ratio=(0.3, 3.0),\n",
|
||
" ),\n",
|
||
"])\n",
|
||
"\n",
|
||
"val_test_transform = transforms.Compose([\n",
|
||
" transforms.Resize((IMG_SIZE, IMG_SIZE)),\n",
|
||
" transforms.ToTensor(),\n",
|
||
" transforms.Normalize([0.485, 0.456, 0.406],\n",
|
||
" [0.229, 0.224, 0.225]),\n",
|
||
"])\n",
|
||
"\n",
|
||
"train_dataset = datasets.ImageFolder(TRAIN_DIR, transform=train_transform)\n",
|
||
"val_dataset = datasets.ImageFolder(VAL_DIR, transform=val_test_transform)\n",
|
||
"test_dataset = datasets.ImageFolder(TEST_DIR, transform=val_test_transform)\n",
|
||
"\n",
|
||
"CLASS_NAMES = train_dataset.classes\n",
|
||
"\n",
|
||
"train_loader = DataLoader(\n",
|
||
" train_dataset, batch_size=BATCH_SIZE, shuffle=True,\n",
|
||
" num_workers=NUM_WORKERS, pin_memory=True,\n",
|
||
" persistent_workers=True, prefetch_factor=2,\n",
|
||
")\n",
|
||
"val_loader = DataLoader(\n",
|
||
" val_dataset, batch_size=BATCH_SIZE, shuffle=False,\n",
|
||
" num_workers=NUM_WORKERS, pin_memory=True,\n",
|
||
" persistent_workers=True, prefetch_factor=2,\n",
|
||
")\n",
|
||
"test_loader = DataLoader(\n",
|
||
" test_dataset, batch_size=BATCH_SIZE, shuffle=False,\n",
|
||
" num_workers=NUM_WORKERS, pin_memory=True,\n",
|
||
" persistent_workers=True, prefetch_factor=2,\n",
|
||
")\n",
|
||
"\n",
|
||
"log.info(_box(\"📂 DATASET\"))\n",
|
||
"log.info(f\" Kelas : {CLASS_NAMES}\")\n",
|
||
"log.info(f\" Train : {len(train_dataset):>6,} gambar ({len(train_loader)} batch)\")\n",
|
||
"log.info(f\" Validation : {len(val_dataset):>6,} gambar ({len(val_loader)} batch)\")\n",
|
||
"log.info(f\" Test : {len(test_dataset):>6,} gambar ({len(test_loader)} batch)\")\n",
|
||
"\n",
|
||
"log.info(_box(\"⚙️ KONFIGURASI TRAINING\"))\n",
|
||
"log.info(f\" Backbone : MobileNetV2 (ImageNet pretrained)\")\n",
|
||
"log.info(f\" Num classes : {NUM_CLASSES}\")\n",
|
||
"log.info(f\" Batch size : {BATCH_SIZE}\")\n",
|
||
"log.info(f\" Max epochs : {MAX_EPOCHS}\")\n",
|
||
"log.info(f\" Learning rate : {LR}\")\n",
|
||
"log.info(f\" Image size : {IMG_SIZE}×{IMG_SIZE}\")\n",
|
||
"log.info(f\" Seed : {SEED}\")\n",
|
||
"\n",
|
||
"\n",
|
||
"# ── Callback: simpan .pt ──────────────────────────────────\n",
|
||
"class SavePTCallback(Callback):\n",
|
||
" def __init__(self, save_dir: str):\n",
|
||
" self.save_dir = save_dir\n",
|
||
" self.best_acc = 0.0\n",
|
||
" self._prev_val_loss = float(\"inf\")\n",
|
||
" os.makedirs(save_dir, exist_ok=True)\n",
|
||
"\n",
|
||
" def on_validation_epoch_end(self, trainer, pl_module):\n",
|
||
" if trainer.sanity_checking:\n",
|
||
" return\n",
|
||
"\n",
|
||
" metrics = trainer.callback_metrics\n",
|
||
" val_acc = metrics.get(\"val_acc\", torch.tensor(0.0)).item()\n",
|
||
" val_loss = metrics.get(\"val_loss\", torch.tensor(0.0)).item()\n",
|
||
" train_loss = metrics.get(\"train_loss\", torch.tensor(0.0)).item()\n",
|
||
" train_acc = metrics.get(\"train_acc\", torch.tensor(0.0)).item()\n",
|
||
" epoch = trainer.current_epoch\n",
|
||
"\n",
|
||
" # ── Selalu simpan last ──\n",
|
||
" ckpt = {\n",
|
||
" \"epoch\" : epoch,\n",
|
||
" \"model_state\" : pl_module.model.state_dict(),\n",
|
||
" \"val_acc\" : val_acc,\n",
|
||
" \"class_names\" : pl_module.class_names,\n",
|
||
" \"seed\" : SEED,\n",
|
||
" }\n",
|
||
" torch.save(ckpt, os.path.join(self.save_dir, \"last.pt\"))\n",
|
||
"\n",
|
||
" # ── Status & best ──\n",
|
||
" gap = val_loss - train_loss\n",
|
||
" if val_acc > self.best_acc:\n",
|
||
" self.best_acc = val_acc\n",
|
||
" torch.save(ckpt, os.path.join(self.save_dir, \"best.pt\"))\n",
|
||
" status = \"🏆 BEST\"\n",
|
||
" elif gap > 0.15:\n",
|
||
" status = \"⚠️ OVERFIT\"\n",
|
||
" elif val_loss < self._prev_val_loss:\n",
|
||
" status = \"📉 improving\"\n",
|
||
" else:\n",
|
||
" status = \"➡️ stagnant\"\n",
|
||
"\n",
|
||
" self._prev_val_loss = val_loss\n",
|
||
"\n",
|
||
" # ── Log baris tunggal yang rapi ──\n",
|
||
" log.info(\n",
|
||
" f\" Epoch {epoch:>3}/{MAX_EPOCHS} │ \"\n",
|
||
" f\"loss {train_loss:.4f}→{val_loss:.4f} │ \"\n",
|
||
" f\"acc {train_acc:.4f}→{val_acc:.4f} │ \"\n",
|
||
" f\"gap {gap:+.4f} │ {status}\"\n",
|
||
" )\n",
|
||
"\n",
|
||
"\n",
|
||
"# ── Lightning Module ──────────────────────────────────────\n",
|
||
"class SmartBinClassifier(L.LightningModule):\n",
|
||
" def __init__(self, num_classes: int, lr: float, class_names: list, results_dir: str):\n",
|
||
" super().__init__()\n",
|
||
" self.save_hyperparameters()\n",
|
||
" self.lr = lr\n",
|
||
" self.class_names = class_names\n",
|
||
" self.results_dir = results_dir\n",
|
||
"\n",
|
||
" # ── Backbone ──\n",
|
||
" self.model = models.mobilenet_v2(weights=None)\n",
|
||
"\n",
|
||
" for param in self.model.features.parameters():\n",
|
||
" param.requires_grad = True\n",
|
||
"\n",
|
||
"\n",
|
||
" in_features = self.model.classifier[1].in_features\n",
|
||
" self.model.classifier = nn.Sequential(\n",
|
||
" nn.Dropout(p=0.3),\n",
|
||
" nn.Linear(in_features, 256),\n",
|
||
" nn.ReLU(),\n",
|
||
" nn.Dropout(p=0.2),\n",
|
||
" nn.Linear(256, num_classes),\n",
|
||
" )\n",
|
||
"\n",
|
||
" self.criterion = nn.CrossEntropyLoss(label_smoothing=0.05)\n",
|
||
" self.train_acc = Accuracy(task=\"multiclass\", num_classes=num_classes)\n",
|
||
" self.val_acc = Accuracy(task=\"multiclass\", num_classes=num_classes)\n",
|
||
" self.test_acc = Accuracy(task=\"multiclass\", num_classes=num_classes)\n",
|
||
" self.conf_mat = ConfusionMatrix(task=\"multiclass\", num_classes=num_classes)\n",
|
||
"\n",
|
||
" self.test_preds = []\n",
|
||
" self.test_labels = []\n",
|
||
"\n",
|
||
" def forward(self, x):\n",
|
||
" return self.model(x)\n",
|
||
"\n",
|
||
" def training_step(self, batch, batch_idx):\n",
|
||
" x, y = batch\n",
|
||
" logits = self(x)\n",
|
||
" loss = self.criterion(logits, y)\n",
|
||
" preds = torch.argmax(logits, dim=1)\n",
|
||
" self.train_acc(preds, y)\n",
|
||
" self.log(\"train_loss\", loss, prog_bar=True, on_epoch=True, on_step=False)\n",
|
||
" self.log(\"train_acc\", self.train_acc, prog_bar=True, on_epoch=True, on_step=False)\n",
|
||
" return loss\n",
|
||
"\n",
|
||
" def validation_step(self, batch, batch_idx):\n",
|
||
" x, y = batch\n",
|
||
" logits = self(x)\n",
|
||
" loss = self.criterion(logits, y)\n",
|
||
" preds = torch.argmax(logits, dim=1)\n",
|
||
" self.val_acc(preds, y)\n",
|
||
" self.log(\"val_loss\", loss, prog_bar=True, on_epoch=True)\n",
|
||
" self.log(\"val_acc\", self.val_acc, prog_bar=True, on_epoch=True)\n",
|
||
"\n",
|
||
" def on_train_epoch_end(self):\n",
|
||
" lr = self.optimizers().param_groups[0][\"lr\"]\n",
|
||
" self.log(\"learning_rate\", lr)\n",
|
||
"\n",
|
||
" def test_step(self, batch, batch_idx):\n",
|
||
" x, y = batch\n",
|
||
" logits = self(x)\n",
|
||
" preds = torch.argmax(logits, dim=1)\n",
|
||
" self.test_acc(preds, y)\n",
|
||
" self.log(\"test_acc\", self.test_acc, prog_bar=True)\n",
|
||
" self.test_preds.append(preds.cpu())\n",
|
||
" self.test_labels.append(y.cpu())\n",
|
||
"\n",
|
||
" def on_test_epoch_end(self):\n",
|
||
" all_preds = torch.cat(self.test_preds)\n",
|
||
" all_labels = torch.cat(self.test_labels)\n",
|
||
"\n",
|
||
" self.conf_mat = self.conf_mat.to(\"cpu\")\n",
|
||
" cm = self.conf_mat(all_preds, all_labels).numpy()\n",
|
||
"\n",
|
||
" # ── Hitung per-class accuracy ──\n",
|
||
" per_class = cm.diagonal() / cm.sum(axis=1)\n",
|
||
" log.info(_box(\"📊 HASIL TEST\"))\n",
|
||
" for i, (name, acc) in enumerate(zip(self.class_names, per_class)):\n",
|
||
" log.info(f\" Akurasi [{name:>15}] : {acc:.4f} ({int(cm[i, i])}/{int(cm[i].sum())})\")\n",
|
||
"\n",
|
||
" # ── Simpan confusion matrix ──\n",
|
||
" for fig, fname in [\n",
|
||
" (self._plot_confusion_matrix(cm), \"confusion_matrix.png\"),\n",
|
||
" ]:\n",
|
||
" path = os.path.join(self.results_dir, fname)\n",
|
||
" fig.savefig(path, dpi=150, bbox_inches=\"tight\")\n",
|
||
" plt.close(fig)\n",
|
||
" log.info(f\" Disimpan → {path}\")\n",
|
||
"\n",
|
||
" fig2 = self._plot_confusion_matrix(cm)\n",
|
||
" self.logger.experiment.add_figure(\"Test/Confusion Matrix\", fig2, self.current_epoch)\n",
|
||
" plt.close(fig2)\n",
|
||
"\n",
|
||
" self.conf_mat.reset()\n",
|
||
" self.test_preds = []\n",
|
||
" self.test_labels = []\n",
|
||
"\n",
|
||
" def configure_optimizers(self):\n",
|
||
" optimizer = torch.optim.AdamW(\n",
|
||
" [\n",
|
||
" {\"params\": self.model.features.parameters(), \"lr\": 1e-4},\n",
|
||
" {\"params\": self.model.classifier.parameters(), \"lr\": 1e-3},\n",
|
||
" ],\n",
|
||
" weight_decay=1e-3,\n",
|
||
" )\n",
|
||
" scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(\n",
|
||
" optimizer, T_max=30, eta_min=1e-6\n",
|
||
" )\n",
|
||
" return {\n",
|
||
" \"optimizer\" : optimizer,\n",
|
||
" \"lr_scheduler\": {\"scheduler\": scheduler, \"monitor\": \"val_loss\"},\n",
|
||
" }\n",
|
||
"\n",
|
||
" def _plot_confusion_matrix(self, cm):\n",
|
||
" cm_norm = cm.astype(\"float\") / cm.sum(axis=1, keepdims=True)\n",
|
||
" fig, axes = plt.subplots(1, 2, figsize=(14, 5))\n",
|
||
" fig.suptitle(\"Confusion Matrix — MobileNetV2\", fontsize=14, fontweight=\"bold\")\n",
|
||
"\n",
|
||
" for ax, data, fmt, title in zip(\n",
|
||
" axes,\n",
|
||
" [cm, cm_norm],\n",
|
||
" [\"d\", \".2f\"],\n",
|
||
" [\"Count\", \"Normalized\"],\n",
|
||
" ):\n",
|
||
" sns.heatmap(\n",
|
||
" data, annot=True, fmt=fmt, cmap=\"Blues\",\n",
|
||
" xticklabels=self.class_names,\n",
|
||
" yticklabels=self.class_names,\n",
|
||
" ax=ax,\n",
|
||
" )\n",
|
||
" ax.set_title(title)\n",
|
||
" ax.set_xlabel(\"Predicted\")\n",
|
||
" ax.set_ylabel(\"Actual\")\n",
|
||
"\n",
|
||
" plt.tight_layout()\n",
|
||
" return fig\n",
|
||
"\n",
|
||
"\n",
|
||
"# ── Callbacks & logger ────────────────────────────────────\n",
|
||
"save_pt_callback = SavePTCallback(save_dir=WEIGHTS_DIR)\n",
|
||
"early_stop_callback = EarlyStopping(monitor=\"val_loss\", patience=12, mode=\"min\")\n",
|
||
"progress_bar = TQDMProgressBar(refresh_rate=10)\n",
|
||
"\n",
|
||
"tb_logger = TensorBoardLogger(save_dir=\"runs\", name=os.path.basename(RUN_DIR), version=0)\n",
|
||
"profiler = SimpleProfiler(dirpath=PROFILER_DIR, filename=\"profiler_report\")\n",
|
||
"\n",
|
||
"# ── Training ──────────────────────────────────────────────\n",
|
||
"model = SmartBinClassifier(NUM_CLASSES, LR, CLASS_NAMES, RESULTS_DIR)\n",
|
||
"\n",
|
||
"trainer = L.Trainer(\n",
|
||
" max_epochs = MAX_EPOCHS,\n",
|
||
" callbacks = [save_pt_callback, early_stop_callback, progress_bar],\n",
|
||
" accelerator = \"gpu\" if torch.cuda.is_available() else \"cpu\",\n",
|
||
" devices = 1,\n",
|
||
" enable_model_summary = True,\n",
|
||
" log_every_n_steps = 5,\n",
|
||
" logger = tb_logger,\n",
|
||
" profiler = profiler,\n",
|
||
" deterministic = True,\n",
|
||
")\n",
|
||
"\n",
|
||
"log.info(_box(\"🚀 MULAI TRAINING\"))\n",
|
||
"log.info(f\" {'Epoch':>5} │ {'train_loss':>10} {'val_loss':>10} │ {'train_acc':>9} {'val_acc':>9} │ {'gap':>7} │ Status\")\n",
|
||
"log.info(f\" {_sep('─', 80)}\")\n",
|
||
"\n",
|
||
"t0 = time.time()\n",
|
||
"trainer.fit(model, train_loader, val_loader)\n",
|
||
"elapsed = time.time() - t0\n",
|
||
"\n",
|
||
"log.info(f\"\\n Total waktu training : {elapsed / 60:.1f} menit ({elapsed:.0f} detik)\")\n",
|
||
"log.info(f\" Early stop epoch : {trainer.current_epoch}\")\n",
|
||
"log.info(f\" Best val_acc : {save_pt_callback.best_acc:.4f}\")\n",
|
||
"\n",
|
||
"# ── Test ──────────────────────────────────────────────────\n",
|
||
"log.info(_box(\"🧪 TESTING MODEL TERBAIK\"))\n",
|
||
"\n",
|
||
"best_weights = torch.load(os.path.join(WEIGHTS_DIR, \"best.pt\"), weights_only=True)\n",
|
||
"model.model.load_state_dict(best_weights[\"model_state\"])\n",
|
||
"\n",
|
||
"log.info(f\" Loaded : weights/best.pt\")\n",
|
||
"log.info(f\" Epoch : {best_weights.get('epoch', '?')}\")\n",
|
||
"log.info(f\" Val acc : {best_weights.get('val_acc', 0):.4f}\")\n",
|
||
"log.info(f\" Seed : {best_weights.get('seed', 'N/A')}\")\n",
|
||
"\n",
|
||
"t1 = time.time()\n",
|
||
"trainer.test(model, test_loader, ckpt_path=None)\n",
|
||
"log.info(f\" Waktu test : {time.time() - t1:.1f} detik\")\n",
|
||
"\n",
|
||
"# ── Summary ───────────────────────────────────────────────\n",
|
||
"log.info(_box(\"✅ SELESAI — RINGKASAN\"))\n",
|
||
"abs_run = os.path.abspath(RUN_DIR)\n",
|
||
"log.info(f\" Run dir : {abs_run}\")\n",
|
||
"log.info(f\" ├── weights/best.pt → val_acc: {save_pt_callback.best_acc:.4f}\")\n",
|
||
"log.info(f\" ├── weights/last.pt\")\n",
|
||
"log.info(f\" ├── results/confusion_matrix.png\")\n",
|
||
"log.info(f\" ├── profiler/profiler_report.txt\")\n",
|
||
"log.info(f\" └── tensorboard/\")\n",
|
||
"log.info(f\"\\n Total waktu : {(time.time() - t0) / 60:.1f} menit\")\n",
|
||
"log.info(f\"\\n 👉 Buka TensorBoard:\")\n",
|
||
"log.info(f\" tensorboard --logdir=runs/\")\n",
|
||
"log.info(_sep(\"═\"))"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"language_info": {
|
||
"name": "python"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|