Perceptual Loss - Musterlösung
perceptual.py
1import torch
2from torch import nn
3from torchvision.models import vgg16
4
5
6class VGG16PerceptualLoss(nn.Module):
7 def __init__(self):
8 """Initialize the VGG16 perceptual loss model.
9
10 It computes the perceptual loss as the mean squared error between the features.
11
12 The model is set to evaluation mode and the parameters are frozen.
13
14 **TODO**:
15
16 - Load the VGG16 model with pretrained weights. Use `torchvision.models.vgg16(pretrained=True)`.
17
18 - Restrict the VGG16 model to the first 16 layers by using `self.vgg = vgg16(pretrained=True).features[:16]`.
19
20 - Set the model to evaluation mode using `.eval()`.
21
22 - Freeze the parameters of the VGG16 model by setting `param.requires_grad = False` for all parameters.
23 NOTE: Iterate through all parameters by using the `self.vgg.parameters()`-Iterator.
24
25 - Initialize the L2 loss function using `nn.MSELoss()`.
26 """
27 super(VGG16PerceptualLoss, self).__init__()
28 self.vgg = vgg16(pretrained=True).features[:16].eval().cuda()
29
30 for param in self.vgg.parameters():
31 param.requires_grad = False
32
33 self.l1_loss = nn.L1Loss()
34
35 def forward(self, output, target):
36 """Compute the perceptual loss between two images.
37
38 Parameters:
39 -----------
40 output (torch.Tensor):
41 The output image tensor from the upscaler network.
42
43 target (torch.Tensor):
44 The target image tensor from ground truth.
45
46 Returns:
47 --------
48 torch.Tensor:
49 The computed perceptual loss as the mean squared error between the features of the two images.
50
51 **TODO**:
52 - Pass `output` through the VGG16 model to get the features `f1`.
53
54 - Pass `target` through the VGG16 model to get the features `f2`. Note: You should use `torch.no_grad()` to avoid computing gradients for the target image.
55
56 - Compute and return the L2 loss between `f1` and `f2` using `self.l2_loss(f1, f2)`.
57 """
58 # output = torch.nn.functional.interpolate(
59 # output, size=(224, 224), mode="bilinear", align_corners=False
60 # )
61 # target = torch.nn.functional.interpolate(
62 # target, size=(224, 224), mode="bilinear", align_corners=False
63 # )
64
65 f1 = self.vgg(output)
66
67 with torch.no_grad():
68 f2 = self.vgg(target)
69
70 return self.l1_loss(f1, f2)
71
72
73class TVLoss(nn.Module):
74 def __init__(self):
75 super(TVLoss, self).__init__()
76
77 def forward(self, img):
78 return torch.mean(torch.abs(img[:, :, :-1, :] - img[:, :, 1:, :])) + torch.mean(
79 torch.abs(img[:, :, :, :-1] - img[:, :, :, 1:])
80 )
upscale2x.py
1import torch
2from torch import nn
3from torchvision.models import vgg16
4from misc import get_dataloader, train, ResNetBlock
5from perceptual_solution import VGG16PerceptualLoss, TVLoss
6
7
8class Upscale2x(nn.Module):
9 def __init__(self):
10 """Initialize the Upscale2x model.
11
12 This model performs 2x upscaling using a series of ResNet blocks and an upsampling layer.
13
14 **TODO**:
15
16 - Call the `__init__` method of the base class `nn.Module`.
17
18 - Define an upsampling layer using `nn.Upsample(scale_factor=2, mode="bilinear", align_corners=True) <https://docs.pytorch.org/docs/stable/generated/torch.nn.Upsample.html>`_.
19
20 - Define a sequential model consisting of:
21
22 - Four `ResNetBlock` layers with 3->16, 16->32 and 32->64 and 64->128 channels as well as kernel sizes 7.
23
24 - A PixelShuffle layer with an upscale factor of 2.
25
26 - A final convolutional layer with 32 input channels, 3 output channels and kernel size 7 with padding 3.
27 """
28 super(Upscale2x, self).__init__()
29 self.upsample = nn.Upsample(scale_factor=2, mode="bilinear", align_corners=True)
30 self.model = nn.Sequential(
31 ResNetBlock(3, 16, kernel_size=7),
32 ResNetBlock(16, 32, kernel_size=7),
33 ResNetBlock(32, 64, kernel_size=7),
34 ResNetBlock(64, 128, kernel_size=7),
35 nn.PixelShuffle(upscale_factor=2), # First upsample
36 nn.Conv2d(32, 3, kernel_size=7, padding=3), # Final conv to reduce channels
37 )
38
39 def forward(self, x):
40 """Perform the forward pass of the Upscale2x model.
41
42 Parameters:
43 -----------
44 x (torch.Tensor):
45 The input tensor to be upscaled.
46
47 Returns:
48 --------
49 torch.Tensor:
50 The upscaled output tensor.
51
52 **TODO**:
53
54 - Pass the input tensor through the model.
55
56 - Also, apply the upsampling layer to the input tensor `x`.
57
58 - Add the upsampled tensor to the output of the model.
59 """
60 up = self.upsample(x)
61 x = up + self.model(x)
62 return x
63
64
65class GeneratorLoss(nn.Module):
66 def __init__(self):
67 super(GeneratorLoss, self).__init__()
68 self.perceptualLoss = VGG16PerceptualLoss()
69 self.mseLoss = nn.MSELoss()
70 self.tvLoss = TVLoss()
71
72 def forward(self, output, target):
73 return self.perceptualLoss(output, target) + 0.1 * self.tvLoss(output)
74
75
76if __name__ == "__main__":
77 prefix = "upscale2x_perceptual"
78
79 upscaler = Upscale2x().cuda()
80 dataloader = get_dataloader(inputSize=128, outputSize=256, batch_size=32)
81
82 loss = GeneratorLoss().cuda()
83
84 # TODO Aufgabe 3: Use mseLoss instead of perceptualLoss for training
85 train(prefix, upscaler, dataloader, loss)
upscale4x.py
1import torch
2from torch import nn
3from torchvision.models import vgg16
4from misc import get_dataloader, train, ResNetBlock
5from perceptual_solution import VGG16PerceptualLoss, TVLoss
6
7
8class Upscale4x(nn.Module):
9 def __init__(self):
10 """Initialize the Upscale4x model.
11
12 This model performs 4x upscaling using a series of ResNet blocks and an upsampling layer.
13
14 **TODO**:
15
16 - Call the `__init__` method of the base class `nn.Module`.
17
18 - Define an upsampling layer using `nn.Upsample(scale_factor=4, mode="bilinear", align_corners=True) <https://docs.pytorch.org/docs/stable/generated/torch.nn.Upsample.html>`_.
19
20 - Define a sequential model consisting of:
21
22 - Five `ResNetBlock` layers with 3->16, 16->32, 32->64, 64->128 and 128->256 channels as well as kernel sizes 7.
23
24 - A PixelShuffle layer with an upscale factor of 4.
25
26 - A final convolutional layer with 16 input channels, 3 output channels and kernel size 5 with padding 2.
27 """
28 super(Upscale4x, self).__init__()
29 self.upsample = nn.Upsample(scale_factor=4, mode="bilinear", align_corners=True)
30 self.model = nn.Sequential(
31 ResNetBlock(3, 16, kernel_size=7),
32 ResNetBlock(16, 32, kernel_size=7),
33 ResNetBlock(32, 64, kernel_size=7),
34 ResNetBlock(64, 128, kernel_size=7),
35 ResNetBlock(128, 256, kernel_size=7),
36 nn.PixelShuffle(upscale_factor=4), # First upsample
37 nn.Conv2d(16, 3, kernel_size=7, padding=3), # Final conv to reduce channels
38 )
39
40 def forward(self, x):
41 """Perform the forward pass of the Upscale2x model.
42
43 Parameters:
44 -----------
45 x (torch.Tensor):
46 The input tensor to be upscaled.
47
48 Returns:
49 --------
50 torch.Tensor:
51 The upscaled output tensor.
52
53 **TODO**:
54
55 - Pass the input tensor through the model.
56
57 - Also, apply the upsampling layer to the input tensor `x`.
58
59 - Add the upsampled tensor to the output of the model.
60 """
61 up = self.upsample(x)
62 x = up + self.model(x)
63 return x
64
65
66class GeneratorLoss(nn.Module):
67 def __init__(self):
68 super(GeneratorLoss, self).__init__()
69 self.perceptualLoss = VGG16PerceptualLoss()
70 self.mseLoss = nn.MSELoss()
71 self.tvLoss = TVLoss()
72
73 def forward(self, output, target):
74 return self.perceptualLoss(output, target) + 0.1 * self.tvLoss(output)
75
76
77if __name__ == "__main__":
78 prefix = "upscale4x_perceptual"
79
80 upscaler = Upscale4x().cuda()
81 dataloader = get_dataloader(inputSize=64, outputSize=256, batch_size=32)
82 loss = GeneratorLoss().cuda()
83
84 # TODO Aufgabe 3: Use mseLoss instead of perceptualLoss for training
85 train(prefix, upscaler, dataloader, loss)