Perceptual Loss - Musterlösung

perceptual.py

 1import torch
 2from torch import nn
 3from torchvision.models import vgg16
 4
 5
 6class VGG16PerceptualLoss(nn.Module):
 7    def __init__(self):
 8        """Initialize the VGG16 perceptual loss model.
 9
10        It computes the perceptual loss as the mean squared error between the features.
11
12        The model is set to evaluation mode and the parameters are frozen.
13
14        **TODO**:
15
16        - Load the VGG16 model with pretrained weights. Use `torchvision.models.vgg16(pretrained=True)`.
17
18        - Restrict the VGG16 model to the first 16 layers by using `self.vgg = vgg16(pretrained=True).features[:16]`.
19
20        - Set the model to evaluation mode using `.eval()`.
21
22        - Freeze the parameters of the VGG16 model by setting `param.requires_grad = False` for all parameters.
23          NOTE: Iterate through all parameters by using the `self.vgg.parameters()`-Iterator.
24
25        - Initialize the L2 loss function using `nn.MSELoss()`.
26        """
27        super(VGG16PerceptualLoss, self).__init__()
28        self.vgg = vgg16(pretrained=True).features[:16].eval().cuda()
29
30        for param in self.vgg.parameters():
31            param.requires_grad = False
32
33        self.l1_loss = nn.L1Loss()
34
35    def forward(self, output, target):
36        """Compute the perceptual loss between two images.
37
38        Parameters:
39        -----------
40            output (torch.Tensor):
41              The output image tensor from the upscaler network.
42
43            target (torch.Tensor):
44              The target image tensor from ground truth.
45
46        Returns:
47        --------
48            torch.Tensor:
49              The computed perceptual loss as the mean squared error between the features of the two images.
50
51        **TODO**:
52        - Pass `output` through the VGG16 model to get the features `f1`.
53
54        - Pass `target` through the VGG16 model to get the features `f2`. Note: You should use `torch.no_grad()` to avoid computing gradients for the target image.
55
56        - Compute and return the L2 loss between `f1` and `f2` using `self.l2_loss(f1, f2)`.
57        """
58        # output = torch.nn.functional.interpolate(
59        #     output, size=(224, 224), mode="bilinear", align_corners=False
60        # )
61        # target = torch.nn.functional.interpolate(
62        #     target, size=(224, 224), mode="bilinear", align_corners=False
63        # )
64
65        f1 = self.vgg(output)
66
67        with torch.no_grad():
68            f2 = self.vgg(target)
69
70        return self.l1_loss(f1, f2)
71
72
73class TVLoss(nn.Module):
74    def __init__(self):
75        super(TVLoss, self).__init__()
76
77    def forward(self, img):
78        return torch.mean(torch.abs(img[:, :, :-1, :] - img[:, :, 1:, :])) + torch.mean(
79            torch.abs(img[:, :, :, :-1] - img[:, :, :, 1:])
80        )

upscale2x.py

 1import torch
 2from torch import nn
 3from torchvision.models import vgg16
 4from misc import get_dataloader, train, ResNetBlock
 5from perceptual_solution import VGG16PerceptualLoss, TVLoss
 6
 7
 8class Upscale2x(nn.Module):
 9    def __init__(self):
10        """Initialize the Upscale2x model.
11
12        This model performs 2x upscaling using a series of ResNet blocks and an upsampling layer.
13
14        **TODO**:
15
16        - Call the `__init__` method of the base class `nn.Module`.
17
18        - Define an upsampling layer using `nn.Upsample(scale_factor=2, mode="bilinear", align_corners=True) <https://docs.pytorch.org/docs/stable/generated/torch.nn.Upsample.html>`_.
19
20        - Define a sequential model consisting of:
21
22        - Four `ResNetBlock` layers with 3->16, 16->32 and 32->64 and 64->128 channels as well as kernel sizes 7.
23
24        - A PixelShuffle layer with an upscale factor of 2.
25
26        - A final convolutional layer with 32 input channels, 3 output channels and kernel size 7 with padding 3.
27        """
28        super(Upscale2x, self).__init__()
29        self.upsample = nn.Upsample(scale_factor=2, mode="bilinear", align_corners=True)
30        self.model = nn.Sequential(
31            ResNetBlock(3, 16, kernel_size=7),
32            ResNetBlock(16, 32, kernel_size=7),
33            ResNetBlock(32, 64, kernel_size=7),
34            ResNetBlock(64, 128, kernel_size=7),
35            nn.PixelShuffle(upscale_factor=2),  # First upsample
36            nn.Conv2d(32, 3, kernel_size=7, padding=3),  # Final conv to reduce channels
37        )
38
39    def forward(self, x):
40        """Perform the forward pass of the Upscale2x model.
41
42        Parameters:
43        -----------
44            x (torch.Tensor):
45              The input tensor to be upscaled.
46
47        Returns:
48        --------
49            torch.Tensor:
50              The upscaled output tensor.
51
52        **TODO**:
53
54        - Pass the input tensor through the model.
55
56        - Also, apply the upsampling layer to the input tensor `x`.
57
58        - Add the upsampled tensor to the output of the model.
59        """
60        up = self.upsample(x)
61        x = up + self.model(x)
62        return x
63
64
65class GeneratorLoss(nn.Module):
66    def __init__(self):
67        super(GeneratorLoss, self).__init__()
68        self.perceptualLoss = VGG16PerceptualLoss()
69        self.mseLoss = nn.MSELoss()
70        self.tvLoss = TVLoss()
71
72    def forward(self, output, target):
73        return self.perceptualLoss(output, target) + 0.1 * self.tvLoss(output)
74
75
76if __name__ == "__main__":
77    prefix = "upscale2x_perceptual"
78
79    upscaler = Upscale2x().cuda()
80    dataloader = get_dataloader(inputSize=128, outputSize=256, batch_size=32)
81
82    loss = GeneratorLoss().cuda()
83
84    # TODO Aufgabe 3: Use mseLoss instead of perceptualLoss for training
85    train(prefix, upscaler, dataloader, loss)

upscale4x.py

 1import torch
 2from torch import nn
 3from torchvision.models import vgg16
 4from misc import get_dataloader, train, ResNetBlock
 5from perceptual_solution import VGG16PerceptualLoss, TVLoss
 6
 7
 8class Upscale4x(nn.Module):
 9    def __init__(self):
10        """Initialize the Upscale4x model.
11
12        This model performs 4x upscaling using a series of ResNet blocks and an upsampling layer.
13
14        **TODO**:
15
16        - Call the `__init__` method of the base class `nn.Module`.
17
18        - Define an upsampling layer using `nn.Upsample(scale_factor=4, mode="bilinear", align_corners=True) <https://docs.pytorch.org/docs/stable/generated/torch.nn.Upsample.html>`_.
19
20        - Define a sequential model consisting of:
21
22        - Five `ResNetBlock` layers with 3->16, 16->32, 32->64, 64->128 and 128->256 channels as well as kernel sizes 7.
23
24        - A PixelShuffle layer with an upscale factor of 4.
25
26        - A final convolutional layer with 16 input channels, 3 output channels and kernel size 5 with padding 2.
27        """
28        super(Upscale4x, self).__init__()
29        self.upsample = nn.Upsample(scale_factor=4, mode="bilinear", align_corners=True)
30        self.model = nn.Sequential(
31            ResNetBlock(3, 16, kernel_size=7),
32            ResNetBlock(16, 32, kernel_size=7),
33            ResNetBlock(32, 64, kernel_size=7),
34            ResNetBlock(64, 128, kernel_size=7),
35            ResNetBlock(128, 256, kernel_size=7),
36            nn.PixelShuffle(upscale_factor=4),  # First upsample
37            nn.Conv2d(16, 3, kernel_size=7, padding=3),  # Final conv to reduce channels
38        )
39
40    def forward(self, x):
41        """Perform the forward pass of the Upscale2x model.
42
43        Parameters:
44        -----------
45            x (torch.Tensor):
46              The input tensor to be upscaled.
47
48        Returns:
49        --------
50            torch.Tensor:
51              The upscaled output tensor.
52
53        **TODO**:
54
55        - Pass the input tensor through the model.
56
57        - Also, apply the upsampling layer to the input tensor `x`.
58
59        - Add the upsampled tensor to the output of the model.
60        """
61        up = self.upsample(x)
62        x = up + self.model(x)
63        return x
64
65
66class GeneratorLoss(nn.Module):
67    def __init__(self):
68        super(GeneratorLoss, self).__init__()
69        self.perceptualLoss = VGG16PerceptualLoss()
70        self.mseLoss = nn.MSELoss()
71        self.tvLoss = TVLoss()
72
73    def forward(self, output, target):
74        return self.perceptualLoss(output, target) + 0.1 * self.tvLoss(output)
75
76
77if __name__ == "__main__":
78    prefix = "upscale4x_perceptual"
79
80    upscaler = Upscale4x().cuda()
81    dataloader = get_dataloader(inputSize=64, outputSize=256, batch_size=32)
82    loss = GeneratorLoss().cuda()
83
84    # TODO Aufgabe 3: Use mseLoss instead of perceptualLoss for training
85    train(prefix, upscaler, dataloader, loss)