@@ -42,14 +42,14 @@ def main(): | |||
help="number of epochs in each learning loop iteration (default : 1)", | |||
) | |||
parser.add_argument( | |||
"--lr", type=float, default=1e-3, help="base model learning rate (default : 0.001)" | |||
"--lr", type=float, default=3e-4, help="base model learning rate (default : 0.0003)" | |||
) | |||
parser.add_argument("--alpha", type=float, default=0.9, help="alpha in RMSprop (default : 0.9)") | |||
parser.add_argument( | |||
"--batch-size", type=int, default=32, help="base model batch size (default : 32)" | |||
) | |||
parser.add_argument( | |||
"--loops", type=int, default=1, help="number of loop iterations (default : 1)" | |||
"--loops", type=int, default=2, help="number of loop iterations (default : 2)" | |||
) | |||
parser.add_argument( | |||
"--segment_size", type=int or float, default=0.01, help="segment size (default : 0.01)" | |||
@@ -84,14 +84,14 @@ def main(): | |||
### Building the Learning Part | |||
# Build necessary components for BasicNN | |||
cls = LeNet5(num_classes=10) | |||
loss_fn = nn.CrossEntropyLoss(label_smoothing=0.1) | |||
loss_fn = nn.CrossEntropyLoss(label_smoothing=0.2) | |||
optimizer = RMSprop(cls.parameters(), lr=args.lr, alpha=args.alpha) | |||
use_cuda = not args.no_cuda and torch.cuda.is_available() | |||
device = torch.device("cuda" if use_cuda else "cpu") | |||
scheduler = lr_scheduler.OneCycleLR( | |||
optimizer, | |||
max_lr=args.lr, | |||
pct_start=0.2, | |||
pct_start=0.15, | |||
epochs=args.loops, | |||
steps_per_epoch=int(1 / args.segment_size), | |||
) | |||
@@ -178,10 +178,10 @@ | |||
"outputs": [], | |||
"source": [ | |||
"cls = LeNet5(num_classes=10)\n", | |||
"loss_fn = nn.CrossEntropyLoss(label_smoothing=0.1)\n", | |||
"optimizer = RMSprop(cls.parameters(), lr=0.001, alpha=0.9)\n", | |||
"loss_fn = nn.CrossEntropyLoss(label_smoothing=0.2)\n", | |||
"optimizer = RMSprop(cls.parameters(), lr=0.0003, alpha=0.9)\n", | |||
"device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", | |||
"scheduler = lr_scheduler.OneCycleLR(optimizer, max_lr=0.001, pct_start=0.1, total_steps=100)\n", | |||
"scheduler = lr_scheduler.OneCycleLR(optimizer, max_lr=0.0003, pct_start=0.15, total_steps=200)\n", | |||
"\n", | |||
"base_model = BasicNN(\n", | |||
" cls,\n", | |||
@@ -434,7 +434,7 @@ | |||
"log_dir = ABLLogger.get_current_instance().log_dir\n", | |||
"weights_dir = osp.join(log_dir, \"weights\")\n", | |||
"\n", | |||
"bridge.train(train_data, loops=1, segment_size=0.01, save_interval=1, save_dir=weights_dir)\n", | |||
"bridge.train(train_data, loops=2, segment_size=0.01, save_interval=1, save_dir=weights_dir)\n", | |||
"bridge.test(test_data)" | |||
] | |||
} | |||
@@ -455,7 +455,7 @@ | |||
"name": "python", | |||
"nbconvert_exporter": "python", | |||
"pygments_lexer": "ipython3", | |||
"version": "3.8.13" | |||
"version": "3.8.18" | |||
}, | |||
"orig_nbformat": 4, | |||
"vscode": { | |||
@@ -1,34 +1,28 @@ | |||
import numpy as np | |||
import torch | |||
from torch import nn | |||
class LeNet5(nn.Module): | |||
def __init__(self, num_classes=10, image_size=(28, 28)): | |||
def __init__(self, num_classes=10, image_size=(28, 28, 1)): | |||
super(LeNet5, self).__init__() | |||
self.conv1 = nn.Sequential( | |||
nn.Conv2d(1, 6, 3, padding=1), | |||
nn.ReLU(), | |||
nn.MaxPool2d(kernel_size=2, stride=2), | |||
self.size = 16 * ((image_size[0] // 2 - 6) // 2) * ((image_size[1] // 2 - 6) // 2) | |||
self.encoder = nn.Sequential( | |||
nn.Conv2d(1, 6, 5), | |||
nn.MaxPool2d(2, 2), # 6 24 24 -> 6 12 12 | |||
nn.ReLU(True), | |||
nn.Conv2d(6, 16, 5), # 6 12 12 -> 16 8 8 | |||
nn.MaxPool2d(2, 2), # 16 8 8 -> 16 4 4 | |||
nn.ReLU(True), | |||
) | |||
self.conv2 = nn.Sequential( | |||
nn.Conv2d(6, 16, 3), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2) | |||
self.classifier = nn.Sequential( | |||
nn.Linear(self.size, 120), | |||
nn.ReLU(), | |||
nn.Linear(120, 84), | |||
nn.ReLU(), | |||
nn.Linear(84, num_classes), | |||
) | |||
self.conv3 = nn.Sequential(nn.Conv2d(16, 16, 3), nn.ReLU()) | |||
feature_map_size = (np.array(image_size) // 2 - 2) // 2 - 2 | |||
num_features = 16 * feature_map_size[0] * feature_map_size[1] | |||
self.fc1 = nn.Sequential(nn.Linear(num_features, 120), nn.ReLU()) | |||
self.fc2 = nn.Sequential(nn.Linear(120, 84), nn.ReLU()) | |||
self.fc3 = nn.Linear(84, num_classes) | |||
def forward(self, x): | |||
x = self.conv1(x) | |||
x = self.conv2(x) | |||
x = self.conv3(x) | |||
x = torch.flatten(x, 1) | |||
x = self.fc1(x) | |||
x = self.fc2(x) | |||
x = self.fc3(x) | |||
x = self.encoder(x) | |||
x = x.view(-1, self.size) | |||
x = self.classifier(x) | |||
return x |