【GANs】Deep Convolution Generative Adversarial Network
3 DCGAN
Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks
3.1 简介
深度卷积神经网络(Deep Convolution Generative Adversarial Nets)的生成网络
在DCGAN中,
- 判别网络是一个传统的深度卷积神经网络,但使用了带步长的卷积来实现下采样操作,不用 m a x p o o l i n g maxpooling maxpooling操作;
- 生成网络使用了一个特殊的深度卷积网络来实现。如上图,使用微步卷积来生成 64 × 64 64×64 64×64大小的图像。第一层是全连接层,输入是从均匀分布中随机采样的100维向量 z z z,输出是 4 × 4 × 1024 4×4×1024 4×4×1024的向量,后面是四层微步卷积,没有汇聚层。
DCGAN的主要优点是通过一些经验性的网络结构设计使得对抗训练更加稳定。比如:
- 使用带步长的卷积(在判别网络中)和微步卷积(在生成网络中)来代替汇聚操作,以免损失信息;
- 使用批量归一化;
- 去除卷积层之后的全连接层;
- 在生成网络中,除了最后一层使用了 T a n h Tanh Tanh激活函数以外,其余层都使用 R e L U ReLU ReLU函数;
- 在判别网络中,都是用 L e a k y R e L U LeakyReLU LeakyReLU函数。
3.2 DGGAN实现
# DCGAN_2016.py
import os
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, LeakyReLU, BatchNormalization, Reshape, Input
from tensorflow.keras.layers import UpSampling2D, Conv2D, Activation, ZeroPadding2D, GlobalAveragePooling2D
from tensorflow.keras.datasets import mnist
from tensorflow.keras.optimizers import Adam
class DCGAN():
def __init__(self):
# 输入shape
self.img_rows = 28
self.img_cols = 28
self.channels = 1
self.img_shape = (self.img_rows, self.img_cols, self.channels)
# 分十类
self.num_classes = 10
self.latent_dim = 100
# adam优化器
optimizer = Adam(0.0002, 0.5)
# 判别模型
self.discriminator = self.build_discriminator()
self.discriminator.compile(loss=['binary_crossentropy'],
optimizer=optimizer,
metrics=['accuracy'])
# 生成模型
self.generator = self.build_generator()
# conbine是生成模型和判别模型的结合
# 判别模型的trainable为False
# 用于训练生成模型
z = Input(shape=(self.latent_dim,))
img = self.generator(z)
self.discriminator.trainable = False
valid = self.discriminator(img)
self.combined = Model(z, valid)
self.combined.compile(loss='binary_crossentropy', optimizer=optimizer)
def build_generator(self):
model = Sequential()
# 先全连接到64*7*7的维度上
model.add(Dense(32 * 7 * 7, activation="relu", input_dim=self.latent_dim))
# reshape成特征层的样式
model.add(Reshape((7, 7, 32)))
# 7, 7, 64
model.add(Conv2D(64, kernel_size=3, padding="same"))
model.add(BatchNormalization(momentum=0.8))
model.add(Activation("relu"))
# 上采样
# 7, 7, 64 -> 14, 14, 64
model.add(UpSampling2D())
model.add(Conv2D(128, kernel_size=3, padding="same"))
model.add(BatchNormalization(momentum=0.8))
model.add(Activation("relu"))
# 上采样
# 14, 14, 128 -> 28, 28, 64
model.add(UpSampling2D())
model.add(Conv2D(64, kernel_size=3, padding="same"))
model.add(BatchNormalization(momentum=0.8))
model.add(Activation("relu"))
# 上采样
# 28, 28, 64 -> 28, 28, 1
model.add(Conv2D(self.channels, kernel_size=3, padding="same"))
model.add(Activation("tanh"))
model.summary()
noise = Input(shape=(self.latent_dim,))
img = model(noise)
return Model(noise, img)
def build_discriminator(self):
model = Sequential()
# 28, 28, 1 -> 14, 14, 32
model.add(Conv2D(32, kernel_size=3, strides=2, input_shape=self.img_shape, padding="same"))
model.add(BatchNormalization(momentum=0.8))
model.add(LeakyReLU(alpha=0.2))
# 14, 14, 32 -> 7, 7, 64
model.add(Conv2D(64, kernel_size=3, strides=2, padding="same"))
model.add(BatchNormalization(momentum=0.8))
model.add(LeakyReLU(alpha=0.2))
# 7, 7, 64 -> 4, 4, 128
model.add(ZeroPadding2D(((0, 1), (0, 1))))
model.add(Conv2D(128, kernel_size=3, strides=2, padding="same"))
model.add(BatchNormalization(momentum=0.8))
model.add(LeakyReLU(alpha=0.2))
model.add(GlobalAveragePooling2D())
# 全连接
model.add(Dense(1, activation='sigmoid'))
model.summary()
img = Input(shape=self.img_shape)
validity = model(img)
return Model(img, validity)
def train(self, epochs, batch_size=128, save_interval=50):
(X_train, _), (_, _) = mnist.load_data() # 载入数据
X_train = X_train / 127.5 - 1. # 归一化
X_train = np.expand_dims(X_train, axis=3)
# Adversarial ground truths
valid = np.ones((batch_size, 1))
fake = np.zeros((batch_size, 1))
for epoch in range(epochs):
# --------------------- #
# 训练判别模型
# --------------------- #
idx = np.random.randint(0, X_train.shape[0], batch_size)
imgs = X_train[idx]
noise = np.random.normal(0, 1, (batch_size, self.latent_dim))
gen_imgs = self.generator.predict(noise)
# 训练并计算loss
d_loss_real = self.discriminator.train_on_batch(imgs, valid)
d_loss_fake = self.discriminator.train_on_batch(gen_imgs, fake)
d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
# ---------------------
# 训练生成模型
# ---------------------
g_loss = self.combined.train_on_batch(noise, valid)
print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100 * d_loss[1], g_loss))
if epoch % save_interval == 0:
self.save_imgs(epoch)
def save_imgs(self, epoch):
r, c = 5, 5
noise = np.random.normal(0, 1, (r * c, self.latent_dim))
gen_imgs = self.generator.predict(noise)
gen_imgs = 0.5 * gen_imgs + 0.5
fig, axs = plt.subplots(r, c)
cnt = 0
for i in range(r):
for j in range(c):
axs[i, j].imshow(gen_imgs[cnt, :, :, 0], cmap='gray')
axs[i, j].axis('off')
cnt += 1
fig.savefig("images/mnist_%d.png" % epoch)
plt.close()
if __name__ == '__main__':
# if not os.path.exists("./images"):
# os.makedirs("./images")
dcgan = DCGAN()
dcgan.train(epochs=20000, batch_size=256, save_interval=500)