In [None]:
!git clone https://github.com/tarun-bisht/lcnn.git > /dev/null
import sys
sys.path.insert(0, 'lcnn/')

Cloning into 'lcnn'...
remote: Enumerating objects: 97, done.[K
remote: Counting objects: 100% (97/97), done.[K
remote: Compressing objects: 100% (68/68), done.[K
remote: Total 97 (delta 44), reused 69 (delta 22), pack-reused 0[K
Unpacking objects: 100% (97/97), 43.42 KiB | 1.40 MiB/s, done.


In [None]:
import torch
import torch.nn as nn
import torch.optim as opt
import torch.nn.functional as F
import torchvision.transforms as ttf
import torchvision.datasets as tds
from tqdm.auto import tqdm
import os
import gc
from functools import partial

In [None]:
from lcnn.models.conv_spectral_norm import convspectralnorm_wrapper
from lcnn.regularized_loss import CELoss
from lcnn.models.layers import ConvBNBlock

In [None]:
random_seed = 1
torch.backends.cudnn.enabled = False
torch.manual_seed(random_seed)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [None]:
def mnist_dataset(batch_size, dataset_path="./temp", val=False):
    labelmap = {idx: idx for idx in range(10)}
    train_dataset = tds.MNIST(root=dataset_path, train=True, download=True, transform=ttf.Compose([ttf.ToTensor()]))
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    if val:
        val_dataset = tds.MNIST(root=dataset_path, train=False, download=True, transform=ttf.Compose([ttf.ToTensor()]))
        val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
        return train_dataloader, val_dataloader, labelmap
    return train_dataloader, labelmap

In [None]:
class Net(nn.Module):
    def __init__(self, activation):
        super(Net, self).__init__()
        self.conv1 = ConvBNBlock(1, 10, kernel_size=5, bias=False, init_lipschitz=10)
        self.conv2 = ConvBNBlock(10, 20, kernel_size=5, bias=False, init_lipschitz=10)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.act1 = activation()
        self.act2 = activation()
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.max_pool2d(self.conv1(x), 2)
        x = self.act1(x)
        x = F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)
        x = self.act2(x)
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return x

In [None]:
def train(network, train_loader, criterion, optimizer, val_loader, model_save_path, device):
    network.train()
    best_val_loss = torch.inf
    train_loss = 0
    for data, target in tqdm(train_loader):
        data, target = data.to(device).requires_grad_(), target.to(device)
        optimizer.zero_grad()
        network.zero_grad()
        loss = criterion.compute_loss(data, target)
        train_loss += criterion.metadata["raw_loss"]
        loss.backward()
        optimizer.step()
    train_loss /= len(train_loader.dataset)
    val_loss, val_acc = test(network, val_loader, device)
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(network.state_dict(), os.path.join(model_save_path, "best_model.pt"))
    print("Train Loss", train_loss, "\t", "Val Loss: ", val_loss, "\t", "Val Accuracy: ", val_acc)

In [None]:
def test(network, test_loader, device):
    network.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in tqdm(test_loader):
            data, target = data.to(device), target.to(device)
            output = network(data)
            test_loss += F.cross_entropy(output, target).item()
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).sum()
        test_loss /= len(test_loader.dataset)
        test_acc = correct / len(test_loader.dataset)
        print("Test Loss: ", test_loss, "\t", "Test Accuracy: ", test_acc.item())
        return test_loss, test_acc.item()

In [None]:
class GradNormRegularizedLoss(CELoss):
    # norm of gradient of out w.r.t. input
    # usually out are logits or log-probabilites
    def __init__(self,
                model: nn.Module,
                reg_constant: float = 1e-3):
        super().__init__(model)
        self.reg_constant = reg_constant

    def compute_loss(self, input, target):
        out = self.model(input)
        raw_loss = self.loss_criterion(out, target)
        grad_x = torch.autograd.grad(raw_loss, input,
                                    only_inputs=True,
                                    create_graph=True)[0]
        gradnorm = 1e5 * grad_x.pow(2).sum() / input.size(0)

        self.metadata = {
            'raw_loss': raw_loss.item(),
            'gradnorm': gradnorm.item()
        }
        return raw_loss + self.reg_constant * gradnorm

In [None]:
class CurvatureRegularizedLoss(CELoss):
    # curvature regularization by computing the proxy
    def __init__(self,
                model: nn.Module,
                reg_constants: tuple[float, float] = (1e-2, 1e-2)):
        super().__init__(model)
        self.reg_constants = reg_constants

    @torch.no_grad()
    def _apply_hooks_and_init(self, device):
        # add forward hooks
        self.fwd_handles = []
        for m in self.model.modules():
            if isinstance(m, ConvBNBlock):
                handle_f = m.register_forward_hook(self._extract_lipschitz)
                self.fwd_handles.append(handle_f)
            elif isinstance(m, ParametricSoftplus):
                handle_f = m.register_forward_hook(self._extract_curvature)
                self.fwd_handles.append(handle_f)
            elif isinstance(m, ParametricSwish):
                handle_f = m.register_forward_hook(self._extract_curvature)
                self.fwd_handles.append(handle_f)

        self.modules_visited = []
        self.betas = []
        self.gammas = []
        self.index = 0

    @torch.no_grad()
    def _remove_hooks(self):
        for f in self.fwd_handles:
            f.remove()

    def _extract_lipschitz_residual(self, module, input, output):
        temp1, temp2 = 1, 1

        if type(module.f1) == nn.Conv2d:
            temp1 = module.f1.log_lipschitz
            self.modules_visited.append(module.f1)

        if type(module.f2) == nn.Conv2d:
            temp2 = module.f2.log_lipschitz
            self.modules_visited.append(module.f2)

        self.gammas.append(torch.maximum(temp1, temp2))

    def _extract_lipschitz(self, module, input, output):
        if module not in self.modules_visited:
            self.gammas.append(module.log_lipschitz)

    def _extract_curvature(self, module, input, output):
        self.betas.append(module.log_beta.exp())

    def compute_loss(self, input, target):
        self._apply_hooks_and_init(input.device)
        out = self.model(input)
        self._remove_hooks()

        beta_term = 0.
        for b in self.betas:
            beta_term += b

        if len(self.gammas) > 0:
            gamma_term = 0.
            for g in self.gammas:
                gamma_term += g.abs()

        raw_loss = self.loss_criterion(out, target)
        self.metadata = {
            'raw_loss': raw_loss.item(),
            'beta_term': beta_term.item(),
            'gamma_term': gamma_term.item()
        }

        if len(self.gammas) > 0:
            self.metadata.update({'gamma term:': gamma_term.item()})
            return raw_loss + self.reg_constants[0] * beta_term + self.reg_constants[1] * gamma_term
        else:
            return raw_loss + self.reg_constants[0] * beta_term

In [None]:
class CurvatureAndGradientRegularizedLoss(CELoss):
    # curvature regularization by computing the proxy
    # also include gradient norm regularization

    def __init__(self,
                model: nn.Module,
                reg_constants: tuple[float, float, float] = (1e-2, 1e-2, 1e-2)):
        super().__init__(model)
        self.reg_constants = reg_constants

    @torch.no_grad()
    def _apply_hooks_and_init(self, device):
        # add forward hooks
        self.fwd_handles = []
        for m in self.model.modules():
            if isinstance(m, ConvBNBlock):
                handle_f = m.register_forward_hook(self._extract_lipschitz)
                self.fwd_handles.append(handle_f)
            elif isinstance(m, ParametricSoftplus):
                handle_f = m.register_forward_hook(self._extract_curvature)
                self.fwd_handles.append(handle_f)
            elif isinstance(m, ParametricSwish):
                handle_f = m.register_forward_hook(self._extract_curvature)
                self.fwd_handles.append(handle_f)

        self.modules_visited = []
        self.betas = []
        self.gammas = []
        self.index = 0

    @torch.no_grad()
    def _remove_hooks(self):
        for f in self.fwd_handles:
            f.remove()

    def _extract_lipschitz_residual(self, module, input, output):
        temp1, temp2 = 1, 1

        if type(module.f1) == nn.Conv2d:
            temp1 = module.f1.log_lipschitz
            self.modules_visited.append(module.f1)

        if type(module.f2) == nn.Conv2d:
            temp2 = module.f2.log_lipschitz
            self.modules_visited.append(module.f2)

        self.gammas.append(torch.maximum(temp1, temp2))

    def _extract_lipschitz(self, module, input, output):
        if module not in self.modules_visited:
            self.gammas.append(module.log_lipschitz)

    def _extract_curvature(self, module, input, output):
        self.betas.append(module.log_beta.exp())

    def compute_loss(self, input, target):
        self._apply_hooks_and_init(input.device)
        out = self.model(input)
        self._remove_hooks()

        beta_term = 0.
        for b in self.betas:
            beta_term += b
        if len(self.gammas) > 0:
            gamma_term = 0.
            for g in self.gammas:
                gamma_term += g.abs()

        raw_loss = self.loss_criterion(out, target)
        loss_grad = torch.autograd.grad(outputs=raw_loss,
                                        inputs=input,
                                        create_graph=True)[0]
        gradnorm = 1e5 * loss_grad.pow(2).sum() / input.size(0)

        self.metadata = {
            'raw_loss': raw_loss.item(),
            'beta_term': beta_term.item() if type(beta_term) == torch.Tensor else beta_term,
            'gamma_term': gamma_term.item() if type(gamma_term) == torch.Tensor else gamma_term,
            'gradnorm': gradnorm.item()
        }

        if len(self.gammas) > 0:
            return raw_loss + self.reg_constants[0] * beta_term  \
                            + self.reg_constants[1] * gamma_term \
                            + self.reg_constants[2] * gradnorm
        else:
            return raw_loss + self.reg_constants[0] * beta_term \
                            + self.reg_constants[2] * gradnorm

In [None]:
def get_criterion(model, name):
    # Various losses as a dictionary
    loss_dict = {
    'gnorm': partial(GradNormRegularizedLoss, reg_constant=1e-3),
    'curvature': partial(CurvatureRegularizedLoss, reg_constants=(1e-2, 1e-2)),
    'curvature_and_gnorm': partial(CurvatureAndGradientRegularizedLoss, reg_constants=(1e-2, 1e-2, 1e-3))
    }

    loss_fun = loss_dict.get(name, CELoss)
    penalty = loss_fun(model)
    return penalty

In [None]:
def get_optimizer(lcnn, lr, model: torch.nn.Module):
    # Initialize optimizer and scheduler
    if lcnn:
        # Get beta and gamma parameters if working with LCNNs
        swish_paramnames, psoftplus_paramnames, bn_paramnames = [],[], []
        for (name, layer) in model.named_modules():
            if type(layer) == ParametricSoftplus:
                for p in layer.named_parameters():
                    psoftplus_paramnames.append(name + '.' + p[0])
            if type(layer) == ParametricSwish:
                for p in layer.named_parameters():
                    swish_paramnames.append(name + '.' + p[0])
            if type(layer) == ConvBNBlock:
                for p in layer.named_parameters():
                    if 'log_lipschitz' in p[0]:
                        bn_paramnames.append(name + '.' + p[0])

        softplus_params = list(map(lambda x: x[1],
                                   list(filter(lambda kv: kv[0] in psoftplus_paramnames, model.named_parameters()))))
        swish_params = list(map(lambda x: x[1],
                                   list(filter(lambda kv: kv[0] in swish_paramnames, model.named_parameters()))))
        bn_thresh_params = list(map(lambda x: x[1],
                                    list(filter(lambda kv: kv[0] in bn_paramnames, model.named_parameters()))))
        base_params = list(map(lambda x: x[1],
                               list(filter(
                                   lambda kv: (kv[0] not in psoftplus_paramnames)
                                   and (kv[0] not in swish_paramnames)
                                   and (kv[0] not in bn_paramnames),
                                   model.named_parameters()))))
        model_params = [{'params': base_params},
                        {'params': softplus_params, 'weight_decay': 0.},
                        {'params': swish_params, 'weight_decay': 0.},
                        {'params': bn_thresh_params, 'weight_decay': 0.}]
        optimizer = opt.Adam(model_params, lr=lr)

    else:
        optimizer = opt.Adam(network.parameters(), lr=lr)
    return optimizer

In [None]:
class ParametricSoftplus(torch.nn.Module):
    # Adapted from:
    # https://discuss.pytorch.org/t/learnable-parameter-in-softplus/60371/2
    # https://pytorch.org/docs/stable/_modules/torch/nn/modules/activation.html#PReLU
    def __init__(self,
                 init_beta: float = 5.,
                 threshold: float = 20.0):
        super().__init__()
        assert init_beta > 0.0
        assert threshold >= 0.0

        # parameterize in terms of log in order to keep beta > 0
        self.log_beta = torch.nn.Parameter(torch.tensor(float(init_beta)).log())
        self.threshold = threshold
        self.register_buffer('offset', torch.log(torch.tensor(2.)), persistent=False)
        self.eps = 1e-3

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # https://pytorch.org/docs/stable/generated/torch.nn.Softplus.html
        beta = self.log_beta.exp()
        beta_x = (beta + self.eps) * x
        y = (torch.nn.functional.softplus(beta_x, beta=1.0, threshold=self.threshold) - self.offset) / (beta + self.eps)
        return y

In [None]:
class ParametricSwish(torch.nn.Module):
    def __init__(self,
                 init_beta: float = 5):
        super().__init__()
        assert init_beta >= 0.0

        # parameterize in terms of log in order to keep beta > 0
        self.log_beta = torch.nn.Parameter(torch.tensor(float(init_beta)).log())

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # https://pytorch.org/docs/stable/generated/torch.nn.SiLU.html
        beta = self.log_beta.exp()
        beta_x = beta*x
        y = x*(torch.nn.functional.sigmoid(beta_x))
        return y

In [None]:
class Swish(torch.nn.Module):
    def __init__(self,
                 beta: float = 1):
        super().__init__()
        assert beta >= 0.0
        self.beta = beta

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # https://pytorch.org/docs/stable/generated/torch.nn.SiLU.html
        y = x*(torch.nn.functional.sigmoid(self.beta*x))
        return y

In [None]:
class NetLCNN(nn.Module):
    def __init__(self, activation):
        super(NetLCNN, self).__init__()
        self.conv1 = ConvBNBlock(1, 10, kernel_size=5, bias=False, init_lipschitz=10, clip_bn=True, conv_wrapper=convspectralnorm_wrapper)
        self.conv2 = ConvBNBlock(10, 20, kernel_size=5, bias=False, init_lipschitz=10, clip_bn=True, conv_wrapper=convspectralnorm_wrapper)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = torch.nn.utils.parametrizations.spectral_norm(nn.Linear(320, 50))
        self.fc2 = torch.nn.utils.parametrizations.spectral_norm(nn.Linear(50, 10))
        self.act1 = activation()
        self.act2 = activation()

    def forward(self, x):
        x = self.act1(F.max_pool2d(self.conv1(x), 2))
        x = self.act2(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return x

In [None]:
def curvature_hessian_estimator(model: torch.nn.Module,
                        image: torch.Tensor,
                        target: torch.Tensor,
                        num_power_iter: int=20) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:

    model.eval()
    u = torch.randn_like(image)
    u /= torch.norm(u, p=2, dim=(1, 2, 3), keepdim=True)

    with torch.enable_grad():
        image = image.requires_grad_()
        out = model(image)
        y = F.log_softmax(out, 1)
        output = F.nll_loss(y, target, reduction='none')
        model.zero_grad()
        # Gradients w.r.t. input
        gradients = torch.autograd.grad(outputs=output.sum(),
                                        inputs=image, create_graph=True)[0]
        gnorm = torch.norm(gradients, p=2, dim=(1, 2, 3))
        assert not gradients.isnan().any()

        # Power method to find singular value of Hessian
        for _ in range(num_power_iter):
            grad_vector_prod = (gradients * u.detach_()).sum()
            hessian_vector_prod = torch.autograd.grad(outputs=grad_vector_prod, inputs=image, retain_graph=True)[0]
            assert not hessian_vector_prod.isnan().any()

            hvp_norm = torch.norm(hessian_vector_prod, p=2, dim=(1, 2, 3), keepdim=True)
            u = hessian_vector_prod.div(hvp_norm + 1e-6) #1e-6 for numerical stability

        grad_vector_prod = (gradients * u.detach_()).sum()
        hessian_vector_prod = torch.autograd.grad(outputs=grad_vector_prod, inputs=image)[0]
        hessian_singular_value = (hessian_vector_prod * u.detach_()).sum((1, 2, 3))

    # curvature = hessian_singular_value / (grad_norm + epsilon) by definition
    curvatures = hessian_singular_value.abs().div(gnorm + 1e-6)
    hess = hessian_singular_value.abs()
    grad = gnorm
    return curvatures, hess, grad

In [None]:
def measure_curvature(model: torch.nn.Module,
                      dataloader: torch.utils.data.DataLoader,
                      data_fraction: float=0.1,
                      batch_size: int=64,
                      num_power_iter: int=20,
                      device: torch.device='cpu') -> tuple[tuple, tuple, tuple]:

    """
    Compute curvature, hessian norm and gradient norm of a subset of the data given by the dataloader.
    These values are computed using the power method, which requires setting the number of power iterations (num_power_iter).
    """

    model.eval()
    datasize = int(data_fraction * len(dataloader.dataset))
    max_batches = int(datasize / batch_size)
    curvature_agg = torch.zeros(size=(datasize,))
    grad_agg = torch.zeros(size=(datasize,))
    hess_agg = torch.zeros(size=(datasize,))

    for idx, (data, target) in enumerate(dataloader):
        data, target = data.to(device).requires_grad_(), target.to(device)
        with torch.no_grad():
            curvatures, hess, grad = curvature_hessian_estimator(model, data, target, num_power_iter=num_power_iter)
        curvature_agg[idx * batch_size:(idx + 1) * batch_size] = curvatures.detach()
        hess_agg[idx * batch_size:(idx + 1) * batch_size] = hess.detach()
        grad_agg[idx * batch_size:(idx + 1) * batch_size] = grad.detach()

        avg_curvature, std_curvature = curvature_agg.mean().item(), curvature_agg.std().item()
        avg_hessian, std_hessian = hess_agg.mean().item(), hess_agg.std().item()
        avg_grad, std_grad = grad_agg.mean().item(), grad_agg.std().item()

        if idx == (max_batches - 1):
            print('Average Curvature: {:.6f} +/- {:.2f} '.format(avg_curvature, std_curvature))
            print('Average Hessian Spectral Norm: {:.6f} +/- {:.2f} '.format(avg_hessian, std_hessian))
            print('Average Gradient Norm: {:.6f} +/- {:.2f}'.format(avg_grad, std_grad))
            return

In [None]:
def fgsm(model, X, y, epsilon=0.3):
    """ Construct FGSM adversarial examples on the examples X"""
    delta = torch.zeros_like(X, requires_grad=True)
    with torch.enable_grad():
        pred = model(X + delta)
        loss = nn.CrossEntropyLoss()(pred, y) # Calculate loss
    loss.backward() # Calculate gradient of loss with respect to input
    Xadv = X + epsilon * delta.grad.detach().sign()
    # ensuring valid pixel values
    Xadv = Xadv.clamp(0,1)
    return Xadv

In [None]:
def pgd(model, X, y, epsilon=0.3, step_size=0.01, num_steps=40):
    """ Construct FGSM adversarial examples on the examples X"""
    Xadv = X #+ 0
    for _ in range(num_steps):
        delta = torch.zeros_like(Xadv, requires_grad=True)
        with torch.enable_grad():
            pred = model(Xadv + delta)
            loss = nn.CrossEntropyLoss()(pred, y) # Calculate loss
        loss.backward() # Calculate gradient of loss with respect to input
        Xadv = Xadv + step_size * delta.grad.detach().sign()
        # imperceptibility condition
        Xadv = torch.min(torch.max(Xadv, X - epsilon), X + epsilon)
        # ensuring valid pixel values
        Xadv = Xadv.clamp(0,1)
    return Xadv

In [None]:
def adversarial_test(source_model, target_model, attack, device, test_loader):
    torch.cuda.empty_cache()
    gc.collect()
    source_model.to(device)
    target_model.to(device)
    source_model.eval()
    target_model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in tqdm(test_loader):
            data, target = data.to(device), target.to(device)
            adv = attack(source_model, data, target)
            output = target_model(adv)
            test_loss += F.cross_entropy(output, target).item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)
    test_accuracy = correct/len(test_loader.dataset)
    print('\nAdversarial Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    return test_loss, test_accuracy

In [None]:
n_epochs = 10
batch_size_train = 2048
learning_rate = 0.01

In [None]:
train_loader, test_loader, label = mnist_dataset(batch_size=batch_size_train, val=True)
train_cal_curve_data, test_cal_curve_data, label = mnist_dataset(batch_size=64, val=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./temp/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 327476104.26it/s]

Extracting ./temp/MNIST/raw/train-images-idx3-ubyte.gz to ./temp/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./temp/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 118760484.14it/s]


Extracting ./temp/MNIST/raw/train-labels-idx1-ubyte.gz to ./temp/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./temp/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 147561052.24it/s]


Extracting ./temp/MNIST/raw/t10k-images-idx3-ubyte.gz to ./temp/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./temp/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 23147665.57it/s]

Extracting ./temp/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./temp/MNIST/raw






In [None]:
attacks_pgd = [partial(pgd, epsilon=0.05, num_steps=10),
               partial(pgd, epsilon=0.1, num_steps=10),
               partial(pgd, epsilon=0.15, num_steps=10),
               partial(pgd, epsilon=0.2, num_steps=10)]

In [None]:
attacks_fgsm = [partial(fgsm, epsilon=0.05),
                partial(fgsm, epsilon=0.1),
                partial(fgsm, epsilon=0.15),
                partial(fgsm, epsilon=0.2)]

In [None]:
save_path = "saved_models/simple_ce_softplus_1e3"
os.makedirs(save_path, exist_ok=True)
network = Net(partial(nn.Softplus, beta=1e3)).to(device)
criterion = get_criterion(network, "ce")
optimizer = get_optimizer(False, learning_rate, network)
test(network, test_loader, device)
for epoch in range(1, n_epochs + 1):
    print("Epoch: ", epoch)
    train(network, train_loader, criterion, optimizer, test_loader, save_path, device)
test(network, test_loader, device)
print("Curvature using training data")
print("="*100)
measure_curvature(network, train_cal_curve_data,
                  data_fraction=0.1,
                  batch_size=64,
                  num_power_iter=10,
                  device=device)
print("Curvature using test data")
print("="*100)
measure_curvature(network, test_cal_curve_data,
                  data_fraction=0.1,
                  batch_size=64,
                  num_power_iter=10,
                  device=device)
print("Attacking with PGD (Projected Gradient Descent) Attack")
print("="*100)
for attack in attacks_pgd:
    loss, acc = adversarial_test(source_model=network,
                                 target_model=network,
                                 attack=attack,
                                 device=device,
                                 test_loader=test_loader)

print("Attacking with FGSM (Fast gradient sign method) Attack")
print("="*100)
for attack in attacks_fgsm:
    loss, acc = adversarial_test(source_model=network,
                                 target_model=network,
                                 attack=attack,
                                 device=device,
                                 test_loader=test_loader)

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.0011539452075958253 	 Test Accuracy:  0.07279999554157257
Epoch:  1


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.00013927167654037475 	 Test Accuracy:  0.9241999983787537
Train Loss 0.0005595928142468135 	 Val Loss:  0.00013927167654037475 	 Val Accuracy:  0.9241999983787537
Epoch:  2


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  5.551071017980575e-05 	 Test Accuracy:  0.9652999639511108
Train Loss 0.0002362786461909612 	 Val Loss:  5.551071017980575e-05 	 Val Accuracy:  0.9652999639511108
Epoch:  3


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  4.194812104105949e-05 	 Test Accuracy:  0.9733999967575073
Train Loss 0.00018923971354961396 	 Val Loss:  4.194812104105949e-05 	 Val Accuracy:  0.9733999967575073
Epoch:  4


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  3.7316522002220156e-05 	 Test Accuracy:  0.9777999520301819
Train Loss 0.00016877286980549494 	 Val Loss:  3.7316522002220156e-05 	 Val Accuracy:  0.9777999520301819
Epoch:  5


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  3.36541622877121e-05 	 Test Accuracy:  0.9799000024795532
Train Loss 0.00015463200410207112 	 Val Loss:  3.36541622877121e-05 	 Val Accuracy:  0.9799000024795532
Epoch:  6


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  3.0453192070126535e-05 	 Test Accuracy:  0.9807999730110168
Train Loss 0.00014395678838094076 	 Val Loss:  3.0453192070126535e-05 	 Val Accuracy:  0.9807999730110168
Epoch:  7


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.991578169167042e-05 	 Test Accuracy:  0.9820999503135681
Train Loss 0.00013621707856655121 	 Val Loss:  2.991578169167042e-05 	 Val Accuracy:  0.9820999503135681
Epoch:  8


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.8626251593232156e-05 	 Test Accuracy:  0.982699990272522
Train Loss 0.00012477851311365763 	 Val Loss:  2.8626251593232156e-05 	 Val Accuracy:  0.982699990272522
Epoch:  9


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.7308446541428566e-05 	 Test Accuracy:  0.9824000000953674
Train Loss 0.00012062570303678512 	 Val Loss:  2.7308446541428566e-05 	 Val Accuracy:  0.9824000000953674
Epoch:  10


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.617664486169815e-05 	 Test Accuracy:  0.9828999638557434
Train Loss 0.00011771670753757159 	 Val Loss:  2.617664486169815e-05 	 Val Accuracy:  0.9828999638557434


  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.6284608989953993e-05 	 Test Accuracy:  0.9828999638557434
Curvature using training data
Average Curvature: 16.224577 +/- 14.98 
Average Hessian Spectral Norm: 3.608176 +/- 23.20 
Average Gradient Norm: 0.220053 +/- 1.10
Curvature using test data
Average Curvature: 15.477436 +/- 14.36 
Average Hessian Spectral Norm: 3.832908 +/- 34.92 
Average Gradient Norm: 0.193590 +/- 1.09
Attacking with PGD (Projected Gradient Descent) Attack


  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0002, Accuracy: 8809/10000 (88%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0007, Accuracy: 6282/10000 (63%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0007, Accuracy: 6282/10000 (63%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0007, Accuracy: 6282/10000 (63%)

Attacking with FGSM (Fast gradient sign method) Attack


  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0001, Accuracy: 9224/10000 (92%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0004, Accuracy: 7933/10000 (79%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0007, Accuracy: 5809/10000 (58%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0013, Accuracy: 3263/10000 (33%)



In [None]:
save_path = "saved_models/simple_gnorm_softplus_1e3"
os.makedirs(save_path, exist_ok=True)
network = Net(partial(nn.Softplus, beta=1e3)).to(device)
criterion = get_criterion(network, "gnorm")
optimizer = get_optimizer(False, learning_rate, network)
test(network, test_loader, device)
for epoch in range(1, n_epochs + 1):
    print("Epoch: ", epoch)
    train(network, train_loader, criterion, optimizer, test_loader, save_path, device)
test(network, test_loader, device)
print("Curvature using training data")
print("="*100)
measure_curvature(network, train_cal_curve_data,
                  data_fraction=0.1,
                  batch_size=64,
                  num_power_iter=10,
                  device=device)
print("Curvature using test data")
print("="*100)
measure_curvature(network, test_cal_curve_data,
                  data_fraction=0.1,
                  batch_size=64,
                  num_power_iter=10,
                  device=device)
print("Attacking with PGD (Projected Gradient Descent) Attack")
print("="*100)
for attack in attacks_pgd:
    loss, acc = adversarial_test(source_model=network,
                                 target_model=network,
                                 attack=attack,
                                 device=device,
                                 test_loader=test_loader)

print("Attacking with FGSM (Fast gradient sign method) Attack")
print("="*100)
for attack in attacks_fgsm:
    loss, acc = adversarial_test(source_model=network,
                                 target_model=network,
                                 attack=attack,
                                 device=device,
                                 test_loader=test_loader)

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.0011526191473007201 	 Test Accuracy:  0.1136000007390976
Epoch:  1


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  9.992064386606217e-05 	 Test Accuracy:  0.9484999775886536
Train Loss 0.0005015356441338857 	 Val Loss:  9.992064386606217e-05 	 Val Accuracy:  0.9484999775886536
Epoch:  2


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  5.2728020399808884e-05 	 Test Accuracy:  0.9679999947547913
Train Loss 0.00020446584125359852 	 Val Loss:  5.2728020399808884e-05 	 Val Accuracy:  0.9679999947547913
Epoch:  3


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  3.947317451238632e-05 	 Test Accuracy:  0.9745999574661255
Train Loss 0.00016086693356434504 	 Val Loss:  3.947317451238632e-05 	 Val Accuracy:  0.9745999574661255
Epoch:  4


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  3.517168909311295e-05 	 Test Accuracy:  0.977400004863739
Train Loss 0.00013813254435857136 	 Val Loss:  3.517168909311295e-05 	 Val Accuracy:  0.977400004863739
Epoch:  5


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  3.283567167818546e-05 	 Test Accuracy:  0.9797999858856201
Train Loss 0.0001282378983994325 	 Val Loss:  3.283567167818546e-05 	 Val Accuracy:  0.9797999858856201
Epoch:  6


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.7856147661805152e-05 	 Test Accuracy:  0.9822999835014343
Train Loss 0.00011700856064756711 	 Val Loss:  2.7856147661805152e-05 	 Val Accuracy:  0.9822999835014343
Epoch:  7


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.696257494390011e-05 	 Test Accuracy:  0.9825999736785889
Train Loss 0.00011168203875422478 	 Val Loss:  2.696257494390011e-05 	 Val Accuracy:  0.9825999736785889
Epoch:  8


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.524711899459362e-05 	 Test Accuracy:  0.984499990940094
Train Loss 0.00010355800439914068 	 Val Loss:  2.524711899459362e-05 	 Val Accuracy:  0.984499990940094
Epoch:  9


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.4243054166436197e-05 	 Test Accuracy:  0.9836999773979187
Train Loss 0.00010205412805080414 	 Val Loss:  2.4243054166436197e-05 	 Val Accuracy:  0.9836999773979187
Epoch:  10


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.3753834143280982e-05 	 Test Accuracy:  0.9848999977111816
Train Loss 0.00010130206495523452 	 Val Loss:  2.3753834143280982e-05 	 Val Accuracy:  0.9848999977111816


  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.4048297107219697e-05 	 Test Accuracy:  0.9848999977111816
Curvature using training data
Average Curvature: 16.218719 +/- 13.41 
Average Hessian Spectral Norm: 3.727906 +/- 22.72 
Average Gradient Norm: 0.209014 +/- 1.07
Curvature using test data
Average Curvature: 15.245937 +/- 12.76 
Average Hessian Spectral Norm: 3.701952 +/- 23.71 
Average Gradient Norm: 0.232079 +/- 1.30
Attacking with PGD (Projected Gradient Descent) Attack


  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0002, Accuracy: 8918/10000 (89%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0007, Accuracy: 6418/10000 (64%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0007, Accuracy: 6418/10000 (64%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0007, Accuracy: 6418/10000 (64%)

Attacking with FGSM (Fast gradient sign method) Attack


  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0001, Accuracy: 9282/10000 (93%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0004, Accuracy: 7869/10000 (79%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0008, Accuracy: 5561/10000 (56%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0013, Accuracy: 3182/10000 (32%)



In [None]:
save_path = "saved_models/simple_ce_softplus_10"
os.makedirs(save_path, exist_ok=True)
network = Net(partial(nn.Softplus, beta=10)).to(device)
criterion = get_criterion(network, "ce")
optimizer = get_optimizer(False, learning_rate, network)
test(network, test_loader, device)
for epoch in range(1, n_epochs + 1):
    print("Epoch: ", epoch)
    train(network, train_loader, criterion, optimizer, test_loader, save_path, device)
test(network, test_loader, device)
print("Curvature using training data")
print("="*100)
measure_curvature(network, train_cal_curve_data,
                  data_fraction=0.1,
                  batch_size=64,
                  num_power_iter=10,
                  device=device)
print("Curvature using test data")
print("="*100)
measure_curvature(network, test_cal_curve_data,
                  data_fraction=0.1,
                  batch_size=64,
                  num_power_iter=10,
                  device=device)
print("Attacking with PGD (Projected Gradient Descent) Attack")
print("="*100)
for attack in attacks_pgd:
    loss, acc = adversarial_test(source_model=network,
                                 target_model=network,
                                 attack=attack,
                                 device=device,
                                 test_loader=test_loader)

print("Attacking with FGSM (Fast gradient sign method) Attack")
print("="*100)
for attack in attacks_fgsm:
    loss, acc = adversarial_test(source_model=network,
                                 target_model=network,
                                 attack=attack,
                                 device=device,
                                 test_loader=test_loader)

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.0011550359725952149 	 Test Accuracy:  0.09319999814033508
Epoch:  1


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.00011417375206947327 	 Test Accuracy:  0.9391999840736389
Train Loss 0.0005344907263914744 	 Val Loss:  0.00011417375206947327 	 Val Accuracy:  0.9391999840736389
Epoch:  2


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  5.1174847036600116e-05 	 Test Accuracy:  0.9677000045776367
Train Loss 0.00020974621176719665 	 Val Loss:  5.1174847036600116e-05 	 Val Accuracy:  0.9677000045776367
Epoch:  3


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  4.191230162978172e-05 	 Test Accuracy:  0.9734999537467957
Train Loss 0.00015577914267778396 	 Val Loss:  4.191230162978172e-05 	 Val Accuracy:  0.9734999537467957
Epoch:  4


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  3.4806005656719205e-05 	 Test Accuracy:  0.977899968624115
Train Loss 0.00013687414775292078 	 Val Loss:  3.4806005656719205e-05 	 Val Accuracy:  0.977899968624115
Epoch:  5


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  3.3050453662872314e-05 	 Test Accuracy:  0.9790999889373779
Train Loss 0.00012537367418408393 	 Val Loss:  3.3050453662872314e-05 	 Val Accuracy:  0.9790999889373779
Epoch:  6


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.9175077751278876e-05 	 Test Accuracy:  0.9820999503135681
Train Loss 0.00011889737248420716 	 Val Loss:  2.9175077751278876e-05 	 Val Accuracy:  0.9820999503135681
Epoch:  7


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.670326754450798e-05 	 Test Accuracy:  0.9824000000953674
Train Loss 0.0001133953091998895 	 Val Loss:  2.670326754450798e-05 	 Val Accuracy:  0.9824000000953674
Epoch:  8


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.841680608689785e-05 	 Test Accuracy:  0.981499969959259
Train Loss 0.00010708914374311765 	 Val Loss:  2.841680608689785e-05 	 Val Accuracy:  0.981499969959259
Epoch:  9


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.606600560247898e-05 	 Test Accuracy:  0.983299970626831
Train Loss 0.00010538513834277789 	 Val Loss:  2.606600560247898e-05 	 Val Accuracy:  0.983299970626831
Epoch:  10


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.746245302259922e-05 	 Test Accuracy:  0.9818999767303467
Train Loss 0.0001008901779850324 	 Val Loss:  2.746245302259922e-05 	 Val Accuracy:  0.9818999767303467


  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.7571159228682517e-05 	 Test Accuracy:  0.9818999767303467
Curvature using training data
Average Curvature: 7.482766 +/- 3.20 
Average Hessian Spectral Norm: 1.140433 +/- 5.12 
Average Gradient Norm: 0.222886 +/- 1.20
Curvature using test data
Average Curvature: 7.126555 +/- 3.46 
Average Hessian Spectral Norm: 0.938101 +/- 4.46 
Average Gradient Norm: 0.210559 +/- 1.21
Attacking with PGD (Projected Gradient Descent) Attack


  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0002, Accuracy: 8699/10000 (87%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0009, Accuracy: 5933/10000 (59%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0009, Accuracy: 5933/10000 (59%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0009, Accuracy: 5933/10000 (59%)

Attacking with FGSM (Fast gradient sign method) Attack


  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0001, Accuracy: 9158/10000 (92%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0004, Accuracy: 7670/10000 (77%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0009, Accuracy: 5480/10000 (55%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0014, Accuracy: 3456/10000 (35%)



In [None]:
save_path = "saved_models/simple_gnorm_softplus_10"
os.makedirs(save_path, exist_ok=True)
network = Net(partial(nn.Softplus, beta=10)).to(device)
criterion = get_criterion(network, "gnorm")
optimizer = get_optimizer(False, learning_rate, network)
test(network, test_loader, device)
for epoch in range(1, n_epochs + 1):
    print("Epoch: ", epoch)
    train(network, train_loader, criterion, optimizer, test_loader, save_path, device)
test(network, test_loader, device)
print("Curvature using training data")
print("="*100)
measure_curvature(network, train_cal_curve_data,
                  data_fraction=0.1,
                  batch_size=64,
                  num_power_iter=10,
                  device=device)
print("Curvature using test data")
print("="*100)
measure_curvature(network, test_cal_curve_data,
                  data_fraction=0.1,
                  batch_size=64,
                  num_power_iter=10,
                  device=device)
print("Attacking with PGD (Projected Gradient Descent) Attack")
print("="*100)
for attack in attacks_pgd:
    loss, acc = adversarial_test(source_model=network,
                                 target_model=network,
                                 attack=attack,
                                 device=device,
                                 test_loader=test_loader)

print("Attacking with FGSM (Fast gradient sign method) Attack")
print("="*100)
for attack in attacks_fgsm:
    loss, acc = adversarial_test(source_model=network,
                                 target_model=network,
                                 attack=attack,
                                 device=device,
                                 test_loader=test_loader)

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.001153630042076111 	 Test Accuracy:  0.08429999649524689
Epoch:  1


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  9.462961852550507e-05 	 Test Accuracy:  0.9460999965667725
Train Loss 0.00042795609881480536 	 Val Loss:  9.462961852550507e-05 	 Val Accuracy:  0.9460999965667725
Epoch:  2


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  4.749571159482002e-05 	 Test Accuracy:  0.9681999683380127
Train Loss 0.00016646640698115032 	 Val Loss:  4.749571159482002e-05 	 Val Accuracy:  0.9681999683380127
Epoch:  3


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  3.6747381091117856e-05 	 Test Accuracy:  0.9764999747276306
Train Loss 0.00012790814613302548 	 Val Loss:  3.6747381091117856e-05 	 Val Accuracy:  0.9764999747276306
Epoch:  4


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  3.2869105786085126e-05 	 Test Accuracy:  0.9794999957084656
Train Loss 0.00011091185187300047 	 Val Loss:  3.2869105786085126e-05 	 Val Accuracy:  0.9794999957084656
Epoch:  5


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.6675238460302355e-05 	 Test Accuracy:  0.9824000000953674
Train Loss 0.00010277618144949277 	 Val Loss:  2.6675238460302355e-05 	 Val Accuracy:  0.9824000000953674
Epoch:  6


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.7576541155576705e-05 	 Test Accuracy:  0.9824999570846558
Train Loss 9.370439698298772e-05 	 Val Loss:  2.7576541155576705e-05 	 Val Accuracy:  0.9824999570846558
Epoch:  7


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.4563886225223542e-05 	 Test Accuracy:  0.9835000038146973
Train Loss 8.970620781183242e-05 	 Val Loss:  2.4563886225223542e-05 	 Val Accuracy:  0.9835000038146973
Epoch:  8


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.3850329220294954e-05 	 Test Accuracy:  0.984499990940094
Train Loss 8.579891994595528e-05 	 Val Loss:  2.3850329220294954e-05 	 Val Accuracy:  0.984499990940094
Epoch:  9


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.381697967648506e-05 	 Test Accuracy:  0.9857999682426453
Train Loss 8.193544348080952e-05 	 Val Loss:  2.381697967648506e-05 	 Val Accuracy:  0.9857999682426453
Epoch:  10


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.297883965075016e-05 	 Test Accuracy:  0.9855999946594238
Train Loss 8.057606567939123e-05 	 Val Loss:  2.297883965075016e-05 	 Val Accuracy:  0.9855999946594238


  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.292808033525944e-05 	 Test Accuracy:  0.9855999946594238
Curvature using training data
Average Curvature: 15.894379 +/- 14.65 
Average Hessian Spectral Norm: 2.724316 +/- 15.93 
Average Gradient Norm: 0.198548 +/- 1.04
Curvature using test data
Average Curvature: 15.825721 +/- 15.59 
Average Hessian Spectral Norm: 3.710984 +/- 25.98 
Average Gradient Norm: 0.218457 +/- 1.10
Attacking with PGD (Projected Gradient Descent) Attack


  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0002, Accuracy: 8712/10000 (87%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0009, Accuracy: 5659/10000 (57%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0009, Accuracy: 5659/10000 (57%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0009, Accuracy: 5659/10000 (57%)

Attacking with FGSM (Fast gradient sign method) Attack


  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0001, Accuracy: 9151/10000 (92%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0005, Accuracy: 7471/10000 (75%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0010, Accuracy: 4788/10000 (48%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0016, Accuracy: 2615/10000 (26%)



In [None]:
save_path = "saved_models/simple_ce_swish_1e3"
os.makedirs(save_path, exist_ok=True)
network = Net(partial(Swish, beta=1e3)).to(device)
criterion = get_criterion(network, "ce")
optimizer = get_optimizer(False, learning_rate, network)
test(network, test_loader, device)
for epoch in range(1, n_epochs + 1):
    print("Epoch: ", epoch)
    train(network, train_loader, criterion, optimizer, test_loader, save_path, device)
test(network, test_loader, device)
print("Curvature using training data")
print("="*100)
measure_curvature(network, train_cal_curve_data,
                  data_fraction=0.1,
                  batch_size=64,
                  num_power_iter=10,
                  device=device)
print("Curvature using test data")
print("="*100)
measure_curvature(network, test_cal_curve_data,
                  data_fraction=0.1,
                  batch_size=64,
                  num_power_iter=10,
                  device=device)
print("Attacking with PGD (Projected Gradient Descent) Attack")
print("="*100)
for attack in attacks_pgd:
    loss, acc = adversarial_test(source_model=network,
                                 target_model=network,
                                 attack=attack,
                                 device=device,
                                 test_loader=test_loader)

print("Attacking with FGSM (Fast gradient sign method) Attack")
print("="*100)
for attack in attacks_fgsm:
    loss, acc = adversarial_test(source_model=network,
                                 target_model=network,
                                 attack=attack,
                                 device=device,
                                 test_loader=test_loader)

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.0011545294761657716 	 Test Accuracy:  0.05309999734163284
Epoch:  1


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.00010327642858028411 	 Test Accuracy:  0.9426999688148499
Train Loss 0.0005136224096020063 	 Val Loss:  0.00010327642858028411 	 Val Accuracy:  0.9426999688148499
Epoch:  2


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  5.6462188810110093e-05 	 Test Accuracy:  0.9646999835968018
Train Loss 0.00020728308210770288 	 Val Loss:  5.6462188810110093e-05 	 Val Accuracy:  0.9646999835968018
Epoch:  3


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  4.047955423593521e-05 	 Test Accuracy:  0.9729999899864197
Train Loss 0.0001657504588365555 	 Val Loss:  4.047955423593521e-05 	 Val Accuracy:  0.9729999899864197
Epoch:  4


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  3.8951215147972104e-05 	 Test Accuracy:  0.9745999574661255
Train Loss 0.0001431952565908432 	 Val Loss:  3.8951215147972104e-05 	 Val Accuracy:  0.9745999574661255
Epoch:  5


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  3.440160676836968e-05 	 Test Accuracy:  0.9784999489784241
Train Loss 0.00013347262144088745 	 Val Loss:  3.440160676836968e-05 	 Val Accuracy:  0.9784999489784241
Epoch:  6


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  3.12395766377449e-05 	 Test Accuracy:  0.9799000024795532
Train Loss 0.00012217018827795982 	 Val Loss:  3.12395766377449e-05 	 Val Accuracy:  0.9799000024795532
Epoch:  7


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  3.0101310461759567e-05 	 Test Accuracy:  0.9812999963760376
Train Loss 0.00011550195987025896 	 Val Loss:  3.0101310461759567e-05 	 Val Accuracy:  0.9812999963760376
Epoch:  8


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.6276150345802306e-05 	 Test Accuracy:  0.9817999601364136
Train Loss 0.00011163866594433785 	 Val Loss:  2.6276150345802306e-05 	 Val Accuracy:  0.9817999601364136
Epoch:  9


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.815016768872738e-05 	 Test Accuracy:  0.9812999963760376
Train Loss 0.000105515256524086 	 Val Loss:  2.815016768872738e-05 	 Val Accuracy:  0.9812999963760376
Epoch:  10


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.4480515345931052e-05 	 Test Accuracy:  0.9856999516487122
Train Loss 0.00010042816574374835 	 Val Loss:  2.4480515345931052e-05 	 Val Accuracy:  0.9856999516487122


  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.4467667937278747e-05 	 Test Accuracy:  0.9856999516487122
Curvature using training data
Average Curvature: 22.354395 +/- 23.86 
Average Hessian Spectral Norm: 4.568935 +/- 30.54 
Average Gradient Norm: 0.211450 +/- 1.11
Curvature using test data
Average Curvature: 21.717928 +/- 22.84 
Average Hessian Spectral Norm: 2.800561 +/- 17.19 
Average Gradient Norm: 0.164837 +/- 0.95
Attacking with PGD (Projected Gradient Descent) Attack


  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0002, Accuracy: 8958/10000 (90%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0007, Accuracy: 6626/10000 (66%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0007, Accuracy: 6626/10000 (66%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0007, Accuracy: 6626/10000 (66%)

Attacking with FGSM (Fast gradient sign method) Attack


  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0001, Accuracy: 9274/10000 (93%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0004, Accuracy: 7997/10000 (80%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0008, Accuracy: 5734/10000 (57%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0013, Accuracy: 3286/10000 (33%)



In [None]:
save_path = "saved_models/simple_gnorm_swish_1e3"
os.makedirs(save_path, exist_ok=True)
network = Net(partial(Swish, beta=1e3)).to(device)
criterion = get_criterion(network, "gnorm")
optimizer = get_optimizer(False, learning_rate, network)
test(network, test_loader, device)
for epoch in range(1, n_epochs + 1):
    print("Epoch: ", epoch)
    train(network, train_loader, criterion, optimizer, test_loader, save_path, device)
test(network, test_loader, device)
print("Curvature using training data")
print("="*100)
measure_curvature(network, train_cal_curve_data,
                  data_fraction=0.1,
                  batch_size=64,
                  num_power_iter=10,
                  device=device)
print("Curvature using test data")
print("="*100)
measure_curvature(network, test_cal_curve_data,
                  data_fraction=0.1,
                  batch_size=64,
                  num_power_iter=10,
                  device=device)
print("Attacking with PGD (Projected Gradient Descent) Attack")
print("="*100)
for attack in attacks_pgd:
    loss, acc = adversarial_test(source_model=network,
                                 target_model=network,
                                 attack=attack,
                                 device=device,
                                 test_loader=test_loader)

print("Attacking with FGSM (Fast gradient sign method) Attack")
print("="*100)
for attack in attacks_fgsm:
    loss, acc = adversarial_test(source_model=network,
                                 target_model=network,
                                 attack=attack,
                                 device=device,
                                 test_loader=test_loader)

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.0011552131175994872 	 Test Accuracy:  0.10279999673366547
Epoch:  1


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.0001334023043513298 	 Test Accuracy:  0.9253999590873718
Train Loss 0.0005817790548006694 	 Val Loss:  0.0001334023043513298 	 Val Accuracy:  0.9253999590873718
Epoch:  2


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  6.155842319130898e-05 	 Test Accuracy:  0.9637999534606934
Train Loss 0.0002382354314128558 	 Val Loss:  6.155842319130898e-05 	 Val Accuracy:  0.9637999534606934
Epoch:  3


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  4.341244176030159e-05 	 Test Accuracy:  0.9726999998092651
Train Loss 0.00018046150704224904 	 Val Loss:  4.341244176030159e-05 	 Val Accuracy:  0.9726999998092651
Epoch:  4


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  4.090985655784607e-05 	 Test Accuracy:  0.9758999943733215
Train Loss 0.00016050765564044317 	 Val Loss:  4.090985655784607e-05 	 Val Accuracy:  0.9758999943733215
Epoch:  5


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  3.40147003531456e-05 	 Test Accuracy:  0.9786999821662903
Train Loss 0.00014644631892442704 	 Val Loss:  3.40147003531456e-05 	 Val Accuracy:  0.9786999821662903
Epoch:  6


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  3.683154731988907e-05 	 Test Accuracy:  0.976699948310852
Train Loss 0.0001391572100420793 	 Val Loss:  3.683154731988907e-05 	 Val Accuracy:  0.976699948310852
Epoch:  7


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  3.199451304972172e-05 	 Test Accuracy:  0.9791999459266663
Train Loss 0.0001323691174387932 	 Val Loss:  3.199451304972172e-05 	 Val Accuracy:  0.9791999459266663
Epoch:  8


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  3.0424879118800162e-05 	 Test Accuracy:  0.9805999994277954
Train Loss 0.00012287664785981178 	 Val Loss:  3.0424879118800162e-05 	 Val Accuracy:  0.9805999994277954
Epoch:  9


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  3.5118693113327025e-05 	 Test Accuracy:  0.9764999747276306
Train Loss 0.0001200421154499054 	 Val Loss:  3.5118693113327025e-05 	 Val Accuracy:  0.9764999747276306
Epoch:  10


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.9605626687407492e-05 	 Test Accuracy:  0.983299970626831
Train Loss 0.00011698398813605309 	 Val Loss:  2.9605626687407492e-05 	 Val Accuracy:  0.983299970626831


  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.9554206505417823e-05 	 Test Accuracy:  0.983299970626831
Curvature using training data
Average Curvature: 22.375832 +/- 27.77 
Average Hessian Spectral Norm: 4.549871 +/- 41.52 
Average Gradient Norm: 0.196522 +/- 1.02
Curvature using test data
Average Curvature: 22.628010 +/- 30.62 
Average Hessian Spectral Norm: 4.307856 +/- 35.53 
Average Gradient Norm: 0.193900 +/- 1.05
Attacking with PGD (Projected Gradient Descent) Attack


  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0002, Accuracy: 9047/10000 (90%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0005, Accuracy: 7241/10000 (72%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0005, Accuracy: 7241/10000 (72%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0005, Accuracy: 7241/10000 (72%)

Attacking with FGSM (Fast gradient sign method) Attack


  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0001, Accuracy: 9326/10000 (93%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0003, Accuracy: 8355/10000 (84%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0006, Accuracy: 6794/10000 (68%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0010, Accuracy: 4749/10000 (47%)



In [None]:
save_path = "saved_models/simple_ce_swish_10"
os.makedirs(save_path, exist_ok=True)
network = Net(partial(Swish, beta=10)).to(device)
criterion = get_criterion(network, "ce")
optimizer = get_optimizer(False, learning_rate, network)
test(network, test_loader, device)
for epoch in range(1, n_epochs + 1):
    print("Epoch: ", epoch)
    train(network, train_loader, criterion, optimizer, test_loader, save_path, device)
test(network, test_loader, device)
print("Curvature using training data")
print("="*100)
measure_curvature(network, train_cal_curve_data,
                  data_fraction=0.1,
                  batch_size=64,
                  num_power_iter=10,
                  device=device)
print("Curvature using test data")
print("="*100)
measure_curvature(network, test_cal_curve_data,
                  data_fraction=0.1,
                  batch_size=64,
                  num_power_iter=10,
                  device=device)
print("Attacking with PGD (Projected Gradient Descent) Attack")
print("="*100)
for attack in attacks_pgd:
    loss, acc = adversarial_test(source_model=network,
                                 target_model=network,
                                 attack=attack,
                                 device=device,
                                 test_loader=test_loader)

print("Attacking with FGSM (Fast gradient sign method) Attack")
print("="*100)
for attack in attacks_fgsm:
    loss, acc = adversarial_test(source_model=network,
                                 target_model=network,
                                 attack=attack,
                                 device=device,
                                 test_loader=test_loader)

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.0011547011613845825 	 Test Accuracy:  0.07750000059604645
Epoch:  1


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.00011636756211519241 	 Test Accuracy:  0.9371999502182007
Train Loss 0.0005071580325563749 	 Val Loss:  0.00011636756211519241 	 Val Accuracy:  0.9371999502182007
Epoch:  2


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  5.5161715298891064e-05 	 Test Accuracy:  0.9666000008583069
Train Loss 0.00022610516051451365 	 Val Loss:  5.5161715298891064e-05 	 Val Accuracy:  0.9666000008583069
Epoch:  3


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  4.2378517985343935e-05 	 Test Accuracy:  0.9734999537467957
Train Loss 0.0001808916558821996 	 Val Loss:  4.2378517985343935e-05 	 Val Accuracy:  0.9734999537467957
Epoch:  4


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  3.6008281633257864e-05 	 Test Accuracy:  0.9776999950408936
Train Loss 0.00015764661381642025 	 Val Loss:  3.6008281633257864e-05 	 Val Accuracy:  0.9776999950408936
Epoch:  5


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  3.1894692406058314e-05 	 Test Accuracy:  0.9800999760627747
Train Loss 0.00014391061514616013 	 Val Loss:  3.1894692406058314e-05 	 Val Accuracy:  0.9800999760627747
Epoch:  6


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  3.3085817843675614e-05 	 Test Accuracy:  0.979699969291687
Train Loss 0.00013653217752774557 	 Val Loss:  3.3085817843675614e-05 	 Val Accuracy:  0.979699969291687
Epoch:  7


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.7535255625844002e-05 	 Test Accuracy:  0.9827999472618103
Train Loss 0.0001299691873292128 	 Val Loss:  2.7535255625844002e-05 	 Val Accuracy:  0.9827999472618103
Epoch:  8


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.8489353880286217e-05 	 Test Accuracy:  0.9824999570846558
Train Loss 0.00012219919910033545 	 Val Loss:  2.8489353880286217e-05 	 Val Accuracy:  0.9824999570846558
Epoch:  9


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.6435038074851035e-05 	 Test Accuracy:  0.9843999743461609
Train Loss 0.000121324838946263 	 Val Loss:  2.6435038074851035e-05 	 Val Accuracy:  0.9843999743461609
Epoch:  10


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.4581343680620195e-05 	 Test Accuracy:  0.9856999516487122
Train Loss 0.00011544405271609625 	 Val Loss:  2.4581343680620195e-05 	 Val Accuracy:  0.9856999516487122


  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.458074726164341e-05 	 Test Accuracy:  0.9856999516487122
Curvature using training data
Average Curvature: 7.965352 +/- 3.18 
Average Hessian Spectral Norm: 1.220732 +/- 5.42 
Average Gradient Norm: 0.216969 +/- 1.12
Curvature using test data
Average Curvature: 7.800882 +/- 3.37 
Average Hessian Spectral Norm: 1.059488 +/- 5.43 
Average Gradient Norm: 0.215649 +/- 1.22
Attacking with PGD (Projected Gradient Descent) Attack


  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0002, Accuracy: 8747/10000 (87%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0008, Accuracy: 5877/10000 (59%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0008, Accuracy: 5877/10000 (59%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0008, Accuracy: 5877/10000 (59%)

Attacking with FGSM (Fast gradient sign method) Attack


  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0001, Accuracy: 9204/10000 (92%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0004, Accuracy: 7608/10000 (76%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0009, Accuracy: 4885/10000 (49%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0015, Accuracy: 2120/10000 (21%)



In [None]:
save_path = "saved_models/simple_gnorm_swish_10"
os.makedirs(save_path, exist_ok=True)
network = Net(partial(Swish, beta=10)).to(device)
criterion = get_criterion(network, "gnorm")
optimizer = get_optimizer(False, learning_rate, network)
test(network, test_loader, device)
for epoch in range(1, n_epochs + 1):
    print("Epoch: ", epoch)
    train(network, train_loader, criterion, optimizer, test_loader, save_path, device)
test(network, test_loader, device)
print("Curvature using training data")
print("="*100)
measure_curvature(network, train_cal_curve_data,
                  data_fraction=0.1,
                  batch_size=64,
                  num_power_iter=10,
                  device=device)
print("Curvature using test data")
print("="*100)
measure_curvature(network, test_cal_curve_data,
                  data_fraction=0.1,
                  batch_size=64,
                  num_power_iter=10,
                  device=device)
print("Attacking with PGD (Projected Gradient Descent) Attack")
print("="*100)
for attack in attacks_pgd:
    loss, acc = adversarial_test(source_model=network,
                                 target_model=network,
                                 attack=attack,
                                 device=device,
                                 test_loader=test_loader)

print("Attacking with FGSM (Fast gradient sign method) Attack")
print("="*100)
for attack in attacks_fgsm:
    loss, acc = adversarial_test(source_model=network,
                                 target_model=network,
                                 attack=attack,
                                 device=device,
                                 test_loader=test_loader)

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.0011513145923614501 	 Test Accuracy:  0.10159999877214432
Epoch:  1


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.00012654258012771607 	 Test Accuracy:  0.9350000023841858
Train Loss 0.0005690091679493586 	 Val Loss:  0.00012654258012771607 	 Val Accuracy:  0.9350000023841858
Epoch:  2


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  6.083057150244713e-05 	 Test Accuracy:  0.9643999934196472
Train Loss 0.00025773585587739943 	 Val Loss:  6.083057150244713e-05 	 Val Accuracy:  0.9643999934196472
Epoch:  3


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  4.549246281385422e-05 	 Test Accuracy:  0.971299946308136
Train Loss 0.00020404743303855261 	 Val Loss:  4.549246281385422e-05 	 Val Accuracy:  0.971299946308136
Epoch:  4


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  4.0021301060914996e-05 	 Test Accuracy:  0.9757999777793884
Train Loss 0.00017613549927870432 	 Val Loss:  4.0021301060914996e-05 	 Val Accuracy:  0.9757999777793884
Epoch:  5


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  3.530437424778938e-05 	 Test Accuracy:  0.9782999753952026
Train Loss 0.0001643332282702128 	 Val Loss:  3.530437424778938e-05 	 Val Accuracy:  0.9782999753952026
Epoch:  6


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  3.3552271500229837e-05 	 Test Accuracy:  0.9791999459266663
Train Loss 0.00015270122786362967 	 Val Loss:  3.3552271500229837e-05 	 Val Accuracy:  0.9791999459266663
Epoch:  7


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  3.272259645164013e-05 	 Test Accuracy:  0.9791999459266663
Train Loss 0.00014645010034243267 	 Val Loss:  3.272259645164013e-05 	 Val Accuracy:  0.9791999459266663
Epoch:  8


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  3.266631439328194e-05 	 Test Accuracy:  0.9799000024795532
Train Loss 0.00014482563436031342 	 Val Loss:  3.266631439328194e-05 	 Val Accuracy:  0.9799000024795532
Epoch:  9


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  3.242123275995255e-05 	 Test Accuracy:  0.9799000024795532
Train Loss 0.0001373270829518636 	 Val Loss:  3.242123275995255e-05 	 Val Accuracy:  0.9799000024795532
Epoch:  10


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.9483017325401305e-05 	 Test Accuracy:  0.9820999503135681
Train Loss 0.0001335137056807677 	 Val Loss:  2.9483017325401305e-05 	 Val Accuracy:  0.9820999503135681


  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  2.9581732302904128e-05 	 Test Accuracy:  0.9820999503135681
Curvature using training data
Average Curvature: 8.121791 +/- 3.30 
Average Hessian Spectral Norm: 1.092673 +/- 4.47 
Average Gradient Norm: 0.235144 +/- 1.14
Curvature using test data
Average Curvature: 7.771481 +/- 3.50 
Average Hessian Spectral Norm: 1.323260 +/- 5.28 
Average Gradient Norm: 0.232355 +/- 1.02
Attacking with PGD (Projected Gradient Descent) Attack


  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0002, Accuracy: 8728/10000 (87%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0008, Accuracy: 5864/10000 (59%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0008, Accuracy: 5864/10000 (59%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0008, Accuracy: 5864/10000 (59%)

Attacking with FGSM (Fast gradient sign method) Attack


  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0001, Accuracy: 9172/10000 (92%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0004, Accuracy: 7588/10000 (76%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0009, Accuracy: 4631/10000 (46%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0015, Accuracy: 2132/10000 (21%)



In [None]:
save_path = "saved_models/lcnn_curvature_and_gnorm_softplus"
os.makedirs(save_path, exist_ok=True)
network = NetLCNN(ParametricSoftplus).to(device)
criterion = get_criterion(network, "curvature_and_gnorm")
optimizer = get_optimizer(True, learning_rate, network)
test(network, test_loader, device)
for epoch in range(1, n_epochs + 1):
    print("Epoch: ", epoch)
    train(network, train_loader, criterion, optimizer, test_loader, save_path, device)
test(network, test_loader, device)
print("Curvature using training data")
print("="*100)
measure_curvature(network, train_cal_curve_data,
                  data_fraction=0.1,
                  batch_size=64,
                  num_power_iter=10,
                  device=device)
print("Curvature using test data")
print("="*100)
measure_curvature(network, test_cal_curve_data,
                  data_fraction=0.1,
                  batch_size=64,
                  num_power_iter=10,
                  device=device)
print("Attacking with PGD (Projected Gradient Descent) Attack")
print("="*100)
for attack in attacks_pgd:
    loss, acc = adversarial_test(source_model=network,
                                 target_model=network,
                                 attack=attack,
                                 device=device,
                                 test_loader=test_loader)

print("Attacking with FGSM (Fast gradient sign method) Attack")
print("="*100)
for attack in attacks_fgsm:
    loss, acc = adversarial_test(source_model=network,
                                 target_model=network,
                                 attack=attack,
                                 device=device,
                                 test_loader=test_loader)

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.0011526978492736816 	 Test Accuracy:  0.09799999743700027
Epoch:  1


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.000888489842414856 	 Test Accuracy:  0.3042999804019928
Train Loss 0.0007045070290565491 	 Val Loss:  0.000888489842414856 	 Val Accuracy:  0.3042999804019928
Epoch:  2


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.0005957400679588318 	 Test Accuracy:  0.750499963760376
Train Loss 0.00037643997073173523 	 Val Loss:  0.0005957400679588318 	 Val Accuracy:  0.750499963760376
Epoch:  3


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.0004473800003528595 	 Test Accuracy:  0.9289000034332275
Train Loss 0.0003953545461098353 	 Val Loss:  0.0004473800003528595 	 Val Accuracy:  0.9289000034332275
Epoch:  4


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.00013314821124076843 	 Test Accuracy:  0.9549999833106995
Train Loss 0.00041581173539161683 	 Val Loss:  0.00013314821124076843 	 Val Accuracy:  0.9549999833106995
Epoch:  5


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.0001028064638376236 	 Test Accuracy:  0.9646999835968018
Train Loss 0.0002726830964287122 	 Val Loss:  0.0001028064638376236 	 Val Accuracy:  0.9646999835968018
Epoch:  6


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  7.472613304853439e-05 	 Test Accuracy:  0.9702000021934509
Train Loss 0.00021517794728279112 	 Val Loss:  7.472613304853439e-05 	 Val Accuracy:  0.9702000021934509
Epoch:  7


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  6.657021194696426e-05 	 Test Accuracy:  0.973099946975708
Train Loss 0.00018710080881913503 	 Val Loss:  6.657021194696426e-05 	 Val Accuracy:  0.973099946975708
Epoch:  8


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  6.403232589364052e-05 	 Test Accuracy:  0.9715999960899353
Train Loss 0.0001803148478269577 	 Val Loss:  6.403232589364052e-05 	 Val Accuracy:  0.9715999960899353
Epoch:  9


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  5.672382041811943e-05 	 Test Accuracy:  0.97489994764328
Train Loss 0.00017112748970588048 	 Val Loss:  5.672382041811943e-05 	 Val Accuracy:  0.97489994764328
Epoch:  10


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  6.581327542662621e-05 	 Test Accuracy:  0.9739999771118164
Train Loss 0.00017325779497623443 	 Val Loss:  6.581327542662621e-05 	 Val Accuracy:  0.9739999771118164


  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  6.575135588645936e-05 	 Test Accuracy:  0.9739999771118164
Curvature using training data
Average Curvature: 2.574377 +/- 0.74 
Average Hessian Spectral Norm: 0.638976 +/- 0.96 
Average Gradient Norm: 0.300651 +/- 0.55
Curvature using test data
Average Curvature: 2.530658 +/- 0.84 
Average Hessian Spectral Norm: 0.562208 +/- 0.93 
Average Gradient Norm: 0.255436 +/- 0.51
Attacking with PGD (Projected Gradient Descent) Attack


  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0002, Accuracy: 8950/10000 (90%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0005, Accuracy: 6811/10000 (68%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0005, Accuracy: 6811/10000 (68%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0005, Accuracy: 6811/10000 (68%)

Attacking with FGSM (Fast gradient sign method) Attack


  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0002, Accuracy: 9118/10000 (91%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0004, Accuracy: 7707/10000 (77%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0006, Accuracy: 5763/10000 (58%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0009, Accuracy: 3722/10000 (37%)



In [None]:
save_path = "saved_models/lcnn_curvature_softplus"
os.makedirs(save_path, exist_ok=True)
network = NetLCNN(ParametricSoftplus).to(device)
criterion = get_criterion(network, "curvature")
optimizer = get_optimizer(True, learning_rate, network)
test(network, test_loader, device)
for epoch in range(1, n_epochs + 1):
    print("Epoch: ", epoch)
    train(network, train_loader, criterion, optimizer, test_loader, save_path, device)
test(network, test_loader, device)
print("Curvature using training data")
print("="*100)
measure_curvature(network, train_cal_curve_data,
                  data_fraction=0.1,
                  batch_size=64,
                  num_power_iter=10,
                  device=device)
print("Curvature using test data")
print("="*100)
measure_curvature(network, test_cal_curve_data,
                  data_fraction=0.1,
                  batch_size=64,
                  num_power_iter=10,
                  device=device)
print("Attacking with PGD (Projected Gradient Descent) Attack")
print("="*100)
for attack in attacks_pgd:
    loss, acc = adversarial_test(source_model=network,
                                 target_model=network,
                                 attack=attack,
                                 device=device,
                                 test_loader=test_loader)

print("Attacking with FGSM (Fast gradient sign method) Attack")
print("="*100)
for attack in attacks_fgsm:
    loss, acc = adversarial_test(source_model=network,
                                 target_model=network,
                                 attack=attack,
                                 device=device,
                                 test_loader=test_loader)

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.0011540846824645997 	 Test Accuracy:  0.09799999743700027
Epoch:  1


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.0009528741836547851 	 Test Accuracy:  0.3127000033855438
Train Loss 0.000643498557806015 	 Val Loss:  0.0009528741836547851 	 Val Accuracy:  0.3127000033855438
Epoch:  2


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.000710779333114624 	 Test Accuracy:  0.5773000121116638
Train Loss 0.0003362219939629237 	 Val Loss:  0.000710779333114624 	 Val Accuracy:  0.5773000121116638
Epoch:  3


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.0004787184238433838 	 Test Accuracy:  0.9110999703407288
Train Loss 0.000418699038028717 	 Val Loss:  0.0004787184238433838 	 Val Accuracy:  0.9110999703407288
Epoch:  4


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.00018095486462116242 	 Test Accuracy:  0.9559999704360962
Train Loss 0.0003864246984322866 	 Val Loss:  0.00018095486462116242 	 Val Accuracy:  0.9559999704360962
Epoch:  5


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  9.102462977170945e-05 	 Test Accuracy:  0.9662999510765076
Train Loss 0.00026485591530799865 	 Val Loss:  9.102462977170945e-05 	 Val Accuracy:  0.9662999510765076
Epoch:  6


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  6.712909489870071e-05 	 Test Accuracy:  0.9690999984741211
Train Loss 0.00019925771554311117 	 Val Loss:  6.712909489870071e-05 	 Val Accuracy:  0.9690999984741211
Epoch:  7


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  6.141692847013473e-05 	 Test Accuracy:  0.9738999605178833
Train Loss 0.000172311370074749 	 Val Loss:  6.141692847013473e-05 	 Val Accuracy:  0.9738999605178833
Epoch:  8


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  5.917095020413399e-05 	 Test Accuracy:  0.9733999967575073
Train Loss 0.000174760473271211 	 Val Loss:  5.917095020413399e-05 	 Val Accuracy:  0.9733999967575073
Epoch:  9


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  6.224315315485e-05 	 Test Accuracy:  0.974399983882904
Train Loss 0.00017047559668620428 	 Val Loss:  6.224315315485e-05 	 Val Accuracy:  0.974399983882904
Epoch:  10


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  5.797973945736885e-05 	 Test Accuracy:  0.9756999611854553
Train Loss 0.00016392674446105956 	 Val Loss:  5.797973945736885e-05 	 Val Accuracy:  0.9756999611854553


  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  5.8101214468479155e-05 	 Test Accuracy:  0.9756999611854553
Curvature using training data
Average Curvature: 2.663511 +/- 0.68 
Average Hessian Spectral Norm: 0.582752 +/- 0.97 
Average Gradient Norm: 0.261225 +/- 0.53
Curvature using test data
Average Curvature: 2.573309 +/- 0.83 
Average Hessian Spectral Norm: 0.585025 +/- 0.98 
Average Gradient Norm: 0.264202 +/- 0.53
Attacking with PGD (Projected Gradient Descent) Attack


  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0002, Accuracy: 9010/10000 (90%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0004, Accuracy: 7122/10000 (71%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0004, Accuracy: 7122/10000 (71%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0004, Accuracy: 7122/10000 (71%)

Attacking with FGSM (Fast gradient sign method) Attack


  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0002, Accuracy: 9172/10000 (92%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0003, Accuracy: 7944/10000 (79%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0006, Accuracy: 5946/10000 (59%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0009, Accuracy: 3849/10000 (38%)



In [None]:
save_path = "saved_models/lcnn_curvature_and_gnorm_swish"
os.makedirs(save_path, exist_ok=True)
network = NetLCNN(ParametricSwish).to(device)
criterion = get_criterion(network, "curvature_and_gnorm")
optimizer = get_optimizer(True, learning_rate, network)
test(network, test_loader, device)
for epoch in range(1, n_epochs + 1):
    print("Epoch: ", epoch)
    train(network, train_loader, criterion, optimizer, test_loader, save_path, device)
test(network, test_loader, device)
print("Curvature using training data")
print("="*100)
measure_curvature(network, train_cal_curve_data,
                  data_fraction=0.1,
                  batch_size=64,
                  num_power_iter=10,
                  device=device)
print("Curvature using test data")
print("="*100)
measure_curvature(network, test_cal_curve_data,
                  data_fraction=0.1,
                  batch_size=64,
                  num_power_iter=10,
                  device=device)
print("Attacking with PGD (Projected Gradient Descent) Attack")
print("="*100)
for attack in attacks_pgd:
    loss, acc = adversarial_test(source_model=network,
                                 target_model=network,
                                 attack=attack,
                                 device=device,
                                 test_loader=test_loader)

print("Attacking with FGSM (Fast gradient sign method) Attack")
print("="*100)
for attack in attacks_fgsm:
    loss, acc = adversarial_test(source_model=network,
                                 target_model=network,
                                 attack=attack,
                                 device=device,
                                 test_loader=test_loader)

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.0011525011539459229 	 Test Accuracy:  0.13729999959468842
Epoch:  1


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.0008063287734985351 	 Test Accuracy:  0.5027999877929688
Train Loss 0.0006797398577133815 	 Val Loss:  0.0008063287734985351 	 Val Accuracy:  0.5027999877929688
Epoch:  2


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.0004796287477016449 	 Test Accuracy:  0.8553999662399292
Train Loss 0.00035315863390763603 	 Val Loss:  0.0004796287477016449 	 Val Accuracy:  0.8553999662399292
Epoch:  3


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.00037190996408462525 	 Test Accuracy:  0.9437999725341797
Train Loss 0.000363443922996521 	 Val Loss:  0.00037190996408462525 	 Val Accuracy:  0.9437999725341797
Epoch:  4


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.0001220954954624176 	 Test Accuracy:  0.9625999927520752
Train Loss 0.00035343599319458005 	 Val Loss:  0.0001220954954624176 	 Val Accuracy:  0.9625999927520752
Epoch:  5


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.00010060376226902008 	 Test Accuracy:  0.9620999693870544
Train Loss 0.0002570968945821126 	 Val Loss:  0.00010060376226902008 	 Val Accuracy:  0.9620999693870544
Epoch:  6


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  6.175384894013405e-05 	 Test Accuracy:  0.9693999886512756
Train Loss 0.0001962640345096588 	 Val Loss:  6.175384894013405e-05 	 Val Accuracy:  0.9693999886512756
Epoch:  7


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  6.0365571081638334e-05 	 Test Accuracy:  0.9722999930381775
Train Loss 0.00018026076356569926 	 Val Loss:  6.0365571081638334e-05 	 Val Accuracy:  0.9722999930381775
Epoch:  8


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  6.740243285894394e-05 	 Test Accuracy:  0.9745000004768372
Train Loss 0.00017072237332661948 	 Val Loss:  6.740243285894394e-05 	 Val Accuracy:  0.9745000004768372
Epoch:  9


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  5.644569098949432e-05 	 Test Accuracy:  0.973800003528595
Train Loss 0.00016807618737220764 	 Val Loss:  5.644569098949432e-05 	 Val Accuracy:  0.973800003528595
Epoch:  10


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  5.383819043636322e-05 	 Test Accuracy:  0.974399983882904
Train Loss 0.0001576009641091029 	 Val Loss:  5.383819043636322e-05 	 Val Accuracy:  0.974399983882904


  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  5.3717626631259916e-05 	 Test Accuracy:  0.974399983882904
Curvature using training data
Average Curvature: 3.330406 +/- 0.96 
Average Hessian Spectral Norm: 0.809141 +/- 1.51 
Average Gradient Norm: 0.298746 +/- 0.68
Curvature using test data
Average Curvature: 3.248729 +/- 1.10 
Average Hessian Spectral Norm: 0.761144 +/- 1.49 
Average Gradient Norm: 0.265003 +/- 0.62
Attacking with PGD (Projected Gradient Descent) Attack


  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0002, Accuracy: 8810/10000 (88%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0005, Accuracy: 6403/10000 (64%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0005, Accuracy: 6403/10000 (64%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0005, Accuracy: 6403/10000 (64%)

Attacking with FGSM (Fast gradient sign method) Attack


  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0002, Accuracy: 9070/10000 (91%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0004, Accuracy: 7529/10000 (75%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0007, Accuracy: 5402/10000 (54%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0010, Accuracy: 3420/10000 (34%)



In [None]:
save_path = "saved_models/lcnn_curvature_swish"
os.makedirs(save_path, exist_ok=True)
network = NetLCNN(ParametricSwish).to(device)
criterion = get_criterion(network, "curvature")
optimizer = get_optimizer(True, learning_rate, network)
test(network, test_loader, device)
for epoch in range(1, n_epochs + 1):
    print("Epoch: ", epoch)
    train(network, train_loader, criterion, optimizer, test_loader, save_path, device)
test(network, test_loader, device)
print("Curvature using training data")
print("="*100)
measure_curvature(network, train_cal_curve_data,
                  data_fraction=0.1,
                  batch_size=64,
                  num_power_iter=10,
                  device=device)
print("Curvature using test data")
print("="*100)
measure_curvature(network, test_cal_curve_data,
                  data_fraction=0.1,
                  batch_size=64,
                  num_power_iter=10,
                  device=device)
print("Attacking with PGD (Projected Gradient Descent) Attack")
print("="*100)
for attack in attacks_pgd:
    loss, acc = adversarial_test(source_model=network,
                                 target_model=network,
                                 attack=attack,
                                 device=device,
                                 test_loader=test_loader)

print("Attacking with FGSM (Fast gradient sign method) Attack")
print("="*100)
for attack in attacks_fgsm:
    loss, acc = adversarial_test(source_model=network,
                                 target_model=network,
                                 attack=attack,
                                 device=device,
                                 test_loader=test_loader)

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.0011536531925201417 	 Test Accuracy:  0.10369999706745148
Epoch:  1


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.0008750490665435791 	 Test Accuracy:  0.26579999923706055
Train Loss 0.0007879385193188985 	 Val Loss:  0.0008750490665435791 	 Val Accuracy:  0.26579999923706055
Epoch:  2


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.0006053414225578308 	 Test Accuracy:  0.6685000061988831
Train Loss 0.00039246665835380556 	 Val Loss:  0.0006053414225578308 	 Val Accuracy:  0.6685000061988831
Epoch:  3


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.00044467808604240416 	 Test Accuracy:  0.9422999620437622
Train Loss 0.0004108260621627172 	 Val Loss:  0.00044467808604240416 	 Val Accuracy:  0.9422999620437622
Epoch:  4


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  0.00016129318177700042 	 Test Accuracy:  0.9539999961853027
Train Loss 0.00039433738191922506 	 Val Loss:  0.00016129318177700042 	 Val Accuracy:  0.9539999961853027
Epoch:  5


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  9.181348532438278e-05 	 Test Accuracy:  0.9638999700546265
Train Loss 0.00026852263162533443 	 Val Loss:  9.181348532438278e-05 	 Val Accuracy:  0.9638999700546265
Epoch:  6


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  6.625302881002426e-05 	 Test Accuracy:  0.9669999480247498
Train Loss 0.00019727433174848556 	 Val Loss:  6.625302881002426e-05 	 Val Accuracy:  0.9669999480247498
Epoch:  7


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  6.219177693128586e-05 	 Test Accuracy:  0.9717999696731567
Train Loss 0.00018353579888741176 	 Val Loss:  6.219177693128586e-05 	 Val Accuracy:  0.9717999696731567
Epoch:  8


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  6.122948229312897e-05 	 Test Accuracy:  0.9736999869346619
Train Loss 0.0001676871786514918 	 Val Loss:  6.122948229312897e-05 	 Val Accuracy:  0.9736999869346619
Epoch:  9


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  5.92306561768055e-05 	 Test Accuracy:  0.974399983882904
Train Loss 0.00016646481454372407 	 Val Loss:  5.92306561768055e-05 	 Val Accuracy:  0.974399983882904
Epoch:  10


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  6.283501982688904e-05 	 Test Accuracy:  0.97079998254776
Train Loss 0.00016439922352631887 	 Val Loss:  6.283501982688904e-05 	 Val Accuracy:  0.97079998254776


  0%|          | 0/5 [00:00<?, ?it/s]

Test Loss:  6.279144659638405e-05 	 Test Accuracy:  0.97079998254776
Curvature using training data
Average Curvature: 3.292094 +/- 1.00 
Average Hessian Spectral Norm: 0.933410 +/- 1.55 
Average Gradient Norm: 0.352891 +/- 0.69
Curvature using test data
Average Curvature: 3.207642 +/- 1.18 
Average Hessian Spectral Norm: 0.896789 +/- 1.57 
Average Gradient Norm: 0.341723 +/- 0.72
Attacking with PGD (Projected Gradient Descent) Attack


  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0002, Accuracy: 8658/10000 (87%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0006, Accuracy: 6092/10000 (61%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0006, Accuracy: 6092/10000 (61%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0006, Accuracy: 6092/10000 (61%)

Attacking with FGSM (Fast gradient sign method) Attack


  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0002, Accuracy: 8921/10000 (89%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0004, Accuracy: 7137/10000 (71%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0008, Accuracy: 4780/10000 (48%)



  0%|          | 0/5 [00:00<?, ?it/s]


Adversarial Test set: Average loss: 0.0012, Accuracy: 2296/10000 (23%)

