CS231n2017 Assignment3 PyTorch部分-EW帮帮网

StyleTransfer-PyTorch.ipynb

content_loss:

计算原始图像的特征图与生成图像的特征图之间的差距，公式：

$L_c = \omega_c\times \sum_{i,j}(F^l_{ij}-P^l_{ij})^2$

def content_loss(content_weight, content_current, content_original):
    """
    Compute the content loss for style transfer.
    
    Inputs:
    - content_weight: Scalar giving the weighting for the content loss.
    - content_current: features of the current image; this is a PyTorch Tensor of shape
      (1, C_l, H_l, W_l).
    - content_target: features of the content image, Tensor with shape (1, C_l, H_l, W_l).
    
    Returns:
    - scalar content loss
    """
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    scalar_content_loss = content_weight*torch.sum((content_current-content_original)**2)

    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    return scalar_content_loss

gram_matrix:

normalize:

除以HWC，除以HW是因为求和有这么多项，求平均值，除以C是对C取平均保证数值与通道数C无关

def gram_matrix(features, normalize=True):
    """
    Compute the Gram matrix from features.
    
    Inputs:
    - features: PyTorch Tensor of shape (N, C, H, W) giving features for
      a batch of N images.
    - normalize: optional, whether to normalize the Gram matrix
        If True, divide the Gram matrix by the number of neurons (H * W * C)
    
    Returns:
    - gram: PyTorch Tensor of shape (N, C, C) giving the
      (optionally normalized) Gram matrices for the N input images.
    """
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    N, C, H, W = features.shape
    features = features.reshape(N, C, -1)
    gram = torch.zeros((N, C, C))
    for i in range(N):
      gram[i] = torch.mm(features[i], features[i].t())
    if normalize:
        gram /= (H*W*C)
    return gram

    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

style_loss:

计算每一次层的current gram matrix 与 target gram matrix的loss之和

def style_loss(feats, style_layers, style_targets, style_weights):
    """
    Computes the style loss at a set of layers.
    
    Inputs:
    - feats: list of the features at every layer of the current image, as produced by
      the extract_features function.
    - style_layers: List of layer indices into feats giving the layers to include in the
      style loss.
    - style_targets: List of the same length as style_layers, where style_targets[i] is
      a PyTorch Tensor giving the Gram matrix of the source style image computed at
      layer style_layers[i].
    - style_weights: List of the same length as style_layers, where style_weights[i]
      is a scalar giving the weight for the style loss at layer style_layers[i].
      
    Returns:
    - style_loss: A PyTorch Tensor holding a scalar giving the style loss.
    """
    # Hint: you can do this with one for loop over the style layers, and should
    # not be very much code (~5 lines). You will need to use your gram_matrix function.
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    style_loss = 0
    N_ = len(style_layers)
    for i in range(N_):
        G = gram_matrix(feats[style_layers[i]])
        style_loss += style_weights[i] * torch.sum((G - style_targets[i])**2)
    return style_loss

    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

tv_loss:

为了增加生成图像的平滑性，我们使用Total-variation regularization来控制相邻像素之间的差异，用差的平方来衡量

def tv_loss(img, tv_weight):
    """
    Compute total variation loss.
    
    Inputs:
    - img: PyTorch Variable of shape (1, 3, H, W) holding an input image.
    - tv_weight: Scalar giving the weight w_t to use for the TV loss.
    
    Returns:
    - loss: PyTorch Variable holding a scalar giving the total variation loss
      for img weighted by tv_weight.
    """
    # Your implementation should be vectorized and not require any loops!
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    h_shift = img[...,1:,:] - img[...,:-1,:] # 计算垂直方向相邻像素的差值
    w_shift = img[...,1:] - img[...,:-1] # 水平方向
    loss = tv_weight*(torch.sum(h_shift**2) + torch.sum(w_shift**2))
    return loss

    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

NetworkVisualization.ipynb

compute_saliency_maps:

计算得分关于X的梯度，在channel维度上取梯度最大值

def compute_saliency_maps(X, y, model):
    """
    Compute a class saliency map using the model for images X and labels y.

    Input:
    - X: Input images; Tensor of shape (N, 3, H, W)
    - y: Labels for X; LongTensor of shape (N,)
    - model: A pretrained CNN that will be used to compute the saliency map.

    Returns:
    - saliency: A Tensor of shape (N, H, W) giving the saliency maps for the input
    images.
    """
    # Make sure the model is in "test" mode
    model.eval() # 设置为评估模式
    
    # Make input tensor require gradient
    X.requires_grad_() # 让输入图像需要计算梯度
    
    saliency = None
    ##############################################################################
    # TODO: Implement this function. Perform a forward and backward pass through #
    # the model to compute the gradient of the correct class score with respect  #
    # to each input image. You first want to compute the loss over the correct   #
    # scores (we'll combine losses across a batch by summing), and then compute  #
    # the gradients with a backward pass.                                        #
    ##############################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    scores = model(X) ## 通过模型得到分类得分
    scores = (scores.gather(1, y.view(-1, 1)).squeeze()) # 提取正确类别的得分
    grad = np.ones_like(scores.detach().numpy()) 
    scores.backward(torch.from_numpy(grad)) # 反向传播求梯度
    
    saliency, _ = torch.max(X.grad.data.abs(), dim=1) # 在channel维度取最大值
    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    ##############################################################################
    #                             END OF YOUR CODE                               #
    ##############################################################################
    return saliency

question:

能否使用saliency_maps替代梯度进行梯度上升生成图像？

answer:

不能，因为saliency_map用的是channel维度上的最大值（取绝对值之后的），其丢失了channel维度信息以及梯度方向信息

make_fooling_image:

使用梯度上升，对原本预测标签为A的图像，将其生成为预测标签为B

def make_fooling_image(X, target_y, model):
    """
    Generate a fooling image that is close to X, but that the model classifies
    as target_y.

    Inputs:
    - X: Input image; Tensor of shape (1, 3, 224, 224)
    - target_y: An integer in the range [0, 1000)
    - model: A pretrained CNN

    Returns:
    - X_fooling: An image that is close to X, but that is classifed as target_y
    by the model.
    """
    # Initialize our fooling image to the input image, and make it require gradient
    X_fooling = X.clone()
    X_fooling = X_fooling.requires_grad_()
    
    learning_rate = 1
    ##############################################################################
    # TODO: Generate a fooling image X_fooling that the model will classify as   #
    # the class target_y. You should perform gradient ascent on the score of the #
    # target class, stopping when the model is fooled.                           #
    # When computing an update step, first normalize the gradient:               #
    #   dX = learning_rate * g / ||g||_2                                         #
    #                                                                            #
    # You should write a training loop.                                          #
    #                                                                            #
    # HINT: For most examples, you should be able to generate a fooling image    #
    # in fewer than 100 iterations of gradient ascent.                           #
    # You can print your progress over iterations to check your algorithm.       #
    ##############################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    for i in range(100):
      scores = model(X_fooling)
      _, pred_y = scores.max(1)
      if(pred_y==target_y):
        print('Done in %d iterations'%(i))
        break
      scores[0,target_y].backward()
      dX = learning_rate*X_fooling.grad.data/torch.norm(X_fooling.grad.data)
      X_fooling.data += dX.data
      X_fooling.grad.data.zero_()
      

    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    ##############################################################################
    #                             END OF YOUR CODE                               #
    ##############################################################################
    return X_fooling

create_class_visualization:

给定model，生成一张图片使得target_y的预测值最大

使用梯度上升即可，同时使用L2正则化

def create_class_visualization(target_y, model, dtype, **kwargs):
    """
    Generate an image to maximize the score of target_y under a pretrained model.
    
    Inputs:
    - target_y: Integer in the range [0, 1000) giving the index of the class
    - model: A pretrained CNN that will be used to generate the image
    - dtype: Torch datatype to use for computations
    
    Keyword arguments:
    - l2_reg: Strength of L2 regularization on the image
    - learning_rate: How big of a step to take
    - num_iterations: How many iterations to use
    - blur_every: How often to blur the image as an implicit regularizer
    - max_jitter: How much to gjitter the image as an implicit regularizer
    - show_every: How often to show the intermediate result
    """
    model.type(dtype)
    l2_reg = kwargs.pop('l2_reg', 1e-3)
    learning_rate = kwargs.pop('learning_rate', 25)
    num_iterations = kwargs.pop('num_iterations', 100)
    blur_every = kwargs.pop('blur_every', 10)
    max_jitter = kwargs.pop('max_jitter', 16)
    show_every = kwargs.pop('show_every', 25)

    # Randomly initialize the image as a PyTorch Tensor, and make it requires gradient.
    img = torch.randn(1, 3, 224, 224).mul_(1.0).type(dtype).requires_grad_()

    for t in range(num_iterations):
        # Randomly jitter the image a bit; this gives slightly nicer results
        ox, oy = random.randint(0, max_jitter), random.randint(0, max_jitter)
        img.data.copy_(jitter(img.data, ox, oy))

        ########################################################################
        # TODO: Use the model to compute the gradient of the score for the     #
        # class target_y with respect to the pixels of the image, and make a   #
        # gradient step on the image using the learning rate. Don't forget the #
        # L2 regularization term!                                              #
        # Be very careful about the signs of elements in your code.            #
        ########################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        scores = model(img)
        scores = scores[:,target_y] - (l2_reg * torch.norm(img))
        scores.backward()
        img.data += (learning_rate*img.grad.data/torch.norm(img.grad.data))
        img.grad.data.zero_()

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ########################################################################
        #                             END OF YOUR CODE                         #
        ########################################################################
        
        # Undo the random jitter
        img.data.copy_(jitter(img.data, -ox, -oy))

        # As regularizer, clamp and periodically blur the image
        for c in range(3):
            lo = float(-SQUEEZENET_MEAN[c] / SQUEEZENET_STD[c])
            hi = float((1.0 - SQUEEZENET_MEAN[c]) / SQUEEZENET_STD[c])
            img.data[:, c].clamp_(min=lo, max=hi)
        if t % blur_every == 0:
            blur_image(img.data, sigma=0.5)
        
        # Periodically show the image
        if t == 0 or (t + 1) % show_every == 0 or t == num_iterations - 1:
            plt.imshow(deprocess(img.data.clone().cpu()))
            class_name = class_names[target_y]
            plt.title('%s\nIteration %d / %d' % (class_name, t + 1, num_iterations))
            plt.gcf().set_size_inches(4, 4)
            plt.axis('off')
            plt.show()

    return deprocess(img.data.cpu())

Generative_Adversarial_Networks_PyTorch.ipynb

sample_noise:

生成指定size和范围的noise

def sample_noise(batch_size, dim):
    """
    Generate a PyTorch Tensor of uniform random noise.

    Input:
    - batch_size: Integer giving the batch size of noise to generate.
    - dim: Integer giving the dimension of noise to generate.
    
    Output:
    - A PyTorch Tensor of shape (batch_size, dim) containing uniform
      random noise in the range (-1, 1).
    """
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    return torch.rand(batch_size, dim) * 2 - 1

    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

discriminator:

按照指示搭建网络即可

def discriminator():
    """
    Build and return a PyTorch model implementing the architecture above.
    """
    model = nn.Sequential(
            # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

            nn.Linear(784, 256),
            nn.LeakyReLU(0.01),
            nn.Linear(256,256),
            nn.LeakyReLU(0.01),
            nn.Linear(256,1)

            # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    )
    return model

generator:

同理

def generator(noise_dim=NOISE_DIM):
    """
    Build and return a PyTorch model implementing the architecture above.
    """
    model = nn.Sequential(
            # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

            nn.Linear(noise_dim,1024),
            nn.ReLU(),
            nn.Linear(1024,1024),
            nn.ReLU(),
            nn.Linear(1024, 784),
            nn.Tanh()

            # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    )
    return model

discriminator_loss:

给定真样本和假样本的得分，希望计算discriminator的loss

上文已给出bce_loss的实现，即

$bce(s,y) = -ylog(s)-(1-y)log(1-s)$

则对于discriminator，其loss由两部分组成，分别是希望将真样本分到1产生的loss，和将假样本分到0产生的loss

def discriminator_loss(logits_real, logits_fake):
    """
    Computes the discriminator loss described above.
    
    Inputs:
    - logits_real: PyTorch Tensor of shape (N,) giving scores for the real data.
    - logits_fake: PyTorch Tensor of shape (N,) giving scores for the fake data.
    
    Returns:
    - loss: PyTorch Tensor containing (scalar) the loss for the discriminator.
    """
    # 真实图像的标签为1，假图像的标签为0
    real_labels = torch.ones_like(logits_real).type(dtype)
    fake_labels = torch.zeros_like(logits_fake).type(dtype)
    
    # 计算真实图像和假图像的BCE损失
    loss_real = bce_loss(logits_real, real_labels)
    loss_fake = bce_loss(logits_fake, fake_labels)
    
    # 判别器总损失是两者之和
    loss = loss_real + loss_fake
    return loss

generator_loss:

给定假样本的得分，计算generator的loss

其loss为希望将假样本分到1所产生的loss

def generator_loss(logits_fake):
    """
    Computes the generator loss described above.

    Inputs:
    - logits_fake: PyTorch Tensor of shape (N,) giving scores for the fake data.
    
    Returns:
    - loss: PyTorch Tensor containing the (scalar) loss for the generator.
    """
    # 生成器希望判别器将假图像分类为真实图像（标签为1）
    fake_labels = torch.ones_like(logits_fake).type(dtype)
    
    # 计算生成器损失
    loss = bce_loss(logits_fake, fake_labels)
    return loss

get_optimizer:

根据给定model，返回其对应的给定超参的optimizer

def get_optimizer(model):
    """
    Construct and return an Adam optimizer for the model with learning rate 1e-3,
    beta1=0.5, and beta2=0.999.
    
    Input:
    - model: A PyTorch model that we want to optimize.
    
    Returns:
    - An Adam optimizer for the model with the desired hyperparameters.
    """
    optimizer = None
    optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.5, 0.999))
    return optimizer

迭代3000轮后的GAN：

Least Squares GAN:

使用MSE loss来代替cross entropy loss

即：

$L_G = \frac{1}{2}E_{z\sim p(z)}[(D(G(z))-1)^2]$

$L_D = \frac{1}{2}E_{x\sim p_{data}}[(D(x)-1)^2] + \frac{1}{2}E_{z\sim p(z)}[(D(G(z)))^2]$

def ls_discriminator_loss(scores_real, scores_fake):
    """
    Compute the Least-Squares GAN loss for the discriminator.
    
    Inputs:
    - scores_real: PyTorch Tensor of shape (N,) giving scores for the real data.
    - scores_fake: PyTorch Tensor of shape (N,) giving scores for the fake data.
    
    Outputs:
    - loss: A PyTorch Tensor containing the loss.
    """
    loss = None
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    loss = 0.5 * (torch.mean((scores_real - 1) ** 2) + torch.mean(scores_fake ** 2))

    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    return loss

def ls_generator_loss(scores_fake):
    """
    Computes the Least-Squares GAN loss for the generator.
    
    Inputs:
    - scores_fake: PyTorch Tensor of shape (N,) giving scores for the fake data.
    
    Outputs:
    - loss: A PyTorch Tensor containing the loss.
    """
    loss = None

    loss = 0.5*torch.mean((scores_fake - 1) ** 2)

    return loss

迭代3750轮后的LSGAN:

Deeply Convolutional GAN:

上文的建模GAN使用的都是FC层，缺少对空间信息的处理，因此我们这里选用CNN建模

def build_dc_classifier():
    """
    Build and return a PyTorch model for the DCGAN discriminator implementing
    the architecture above.
    """
    return nn.Sequential(
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        Unflatten(N=-1, C=1, H=28, W=28),  # 将784维向量重塑为1×28×28图像
        nn.Conv2d(1, 32, 5, 1),           
        nn.LeakyReLU(0.01),
        nn.MaxPool2d(2, 2),               
        nn.Conv2d(32, 64, 5, 1),         
        nn.LeakyReLU(0.01),
        nn.MaxPool2d(2, 2),               
        Flatten(),                        
        nn.Linear(1024, 1024),            
        nn.LeakyReLU(0.01),
        nn.Linear(1024, 1),              

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    )

data = next(enumerate(loader_train))[-1][0].type(dtype)
b = build_dc_classifier().type(dtype)
out = b(data)
print(out.size())

def build_dc_generator(noise_dim=NOISE_DIM):
    """
    Build and return a PyTorch model implementing the DCGAN generator using
    the architecture described above.
    """
    return nn.Sequential(
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        nn.Linear(noise_dim, 1024),
        nn.ReLU(),
        nn.BatchNorm1d(1024),
        nn.Linear(1024, 128 * 7 * 7),
        nn.ReLU(),
        nn.BatchNorm1d(128*7*7),
        Unflatten(N=-1,C=128,H=7,W=7),
        nn.ConvTranspose2d(128, 64, 4, 2, 1), #反卷积，上采样
        nn.ReLU(),
        nn.BatchNorm2d(64),
        nn.ConvTranspose2d(64,1,4,2,1),
        nn.Tanh(),
        Flatten(),

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    )

test_g_gan = build_dc_generator().type(dtype)
test_g_gan.apply(initialize_weights)

fake_seed = torch.randn(batch_size, NOISE_DIM).type(dtype)
fake_images = test_g_gan.forward(fake_seed)
fake_images.size()

迭代1250轮的效果：

QUESTION:

对于GAN的交替梯度更新目标函数，会导致不稳定的震荡动态行为，这就是GAN难以训练的根本原因

其次，discriminator的loss恒高，而generator的loss下降，这并不是一件好事，因为这表明discriminator训练失败，无法有效区分真假图像，从而导致generator无法进一步学习，导致模式崩塌

CS231n2017 Assignment3 PyTorch部分