使用MNIST数据集训练生成对抗网络

生成对抗网络

  生成对抗网络是一类神经网络,是一种机器学习中的无监督学习方式。在生成对抗网络中至少有生成器(Generator)和判别器(Discriminator)两个部分。两者属于一种零和二元博弈模型,即判别器要尽量识别出生成器生成的内容,而生成器则要尽力欺骗判别器,最终,生成器和判别器会达到一个平衡。

  本文参看文章利用tensorflow训练简单的生成对抗网络GAN,对文章中的源码提出了一些自己的见解,并且使代码可以运行(原文代码跑不起来啊啊啊)。文中使用TensorFlow实现GAN。

MNIST数据集

  MNIST数据集是一个常见的机器学习数据集,数据集中有分辨率为28\times28的手写数字图片,分为10个分类。这篇文章想要使用MNIST数据集训练模型输出手写数字。

代码实现

  首先,创建三个占位符

def model_inputs(image_width, image_height, image_channels, z_dim):
    # 真实图片的输入,即MNIST数据集中的数据
    inputs_real = tf.placeholder(tf.float32, (None, image_width, image_height, image_channels), name='input_real')

    # 虚假图片的输入,即自己生成的图片
    inputs_z = tf.placeholder(tf.float32, (None, z_dim), name='input_z')

    # 学习率
    learning_rate = tf.placeholder(tf.float32, name='lr')

    return inputs_real, inputs_z, learning_rate

  判别器代码

def discriminator(images, reuse=False):
    """
    Create the discriminator network
    :param images: Tensor of input image(s) 输入图片
    :param reuse: Boolean if the weights should be reused 权重重用
    :return: Tuple of (tensor output of the discriminator, tensor logits of the discriminator) 判别器的输出以及其sigmoid之后的结果
    """
    # TODO: Implement Function

    # scope here

    with tf.variable_scope('discriminator', reuse=reuse):
        alpha = 0.2  # LeakRelu参数

        # Dropout的保留率
        keep_prob = 0.8

        # 输入层为28*28的,进行一次5*5卷积,步长为2,输出一个14*14的矩阵
        x1 = tf.layers.conv2d(images, 128, 5, strides=2, padding='same', kernel_initializer=tf.contrib.layers.xavier_initializer(seed=2))
        # 计算Leak Relu
        relu1 = tf.maximum(alpha * x1, x1)
        # Dropout
        drop1 = tf.nn.dropout(relu1, keep_prob=keep_prob)
        # 输出形状14 * 14 * 128

        # 再进行一个5*5的卷积,步长为2,输出为7*7
        x2 = tf.layers.conv2d(drop1, 256, 5, strides=2, padding='same',kernel_initializer=tf.contrib.layers.xavier_initializer(seed=2))
        # 标准化,增大梯度,防止梯度消失,加快训练
        bn2 = tf.layers.batch_normalization(x2, training=True)
        # 计算leak relu
        relu2 = tf.maximum(alpha * bn2, bn2)
        drop2 = tf.nn.dropout(relu2, keep_prob=keep_prob)

        # 输出7 * 7 * 256

        # 进行5*5步长为2的卷积,输出为4*4的矩阵
        x3 = tf.layers.conv2d(drop2, 512, 5, strides=2, padding='same',
                              kernel_initializer=tf.contrib.layers.xavier_initializer(seed=2))
        bn3 = tf.layers.batch_normalization(x3, training=True)
        relu3 = tf.maximum(alpha * bn3, bn3)
        drop3 = tf.nn.dropout(relu3, keep_prob=keep_prob)
        # 输出4 * 4 * 512

        # 输出
        # 将最后一次relu之后的结果展平
        flatten = tf.reshape(relu3, (-1, 4 * 4 * 512))
        # 进行密集连接,变为一个变量
        logits = tf.layers.dense(flatten, 1)
        # 使用sigmoid函数将结果变为[-1,1]内
        out = tf.nn.sigmoid(logits)

    return out, logits

  生成器代码

def generator(input_z, out_channel_dim, is_train=True):
    with tf.variable_scope('generator', reuse=not is_train):
        # 将输入变为 4*4*512的矩阵
        x0 = tf.layers.dense(input_z, 4 * 4 * 512)
        x0 = tf.reshape(x0, (-1, 4, 4, 512))
        bn0 = tf.layers.batch_normalization(x0, training=is_train)
        relu0 = tf.nn.relu(bn0)

        # 反卷积,输入4*4,卷积核为4*4,步长为1,输出为7*7
        x1 = tf.layers.conv2d_transpose(relu0, 256, 4, strides=1, padding='valid')
        bn1 = tf.layers.batch_normalization(x1, training=is_train)
        relu1 = tf.nn.relu(bn1)

        # 反卷积,输入7*7,卷积核3*3,步长为2,输出为14*14
        x2 = tf.layers.conv2d_transpose(relu1, 512, 3, strides=2, padding='same')
        bn2 = tf.layers.batch_normalization(x2, training=is_train)
        relu2 = tf.nn.relu(bn2)

        # 反卷积,输入14*14,卷积核3*3,步长为2,输出28*28
        logits = tf.layers.conv2d_transpose(relu2, out_channel_dim, 3, strides=2, padding='same')
        out = tf.tanh(logits)

    return out

  然后定义损失函数

def model_loss(input_real, input_z, out_channel_dim):
    """
    Get the loss for the discriminator and generator
    :param input_real: Images from the real dataset
    :param input_z: Z input
    :param out_channel_dim: The number of channels in the output image
    :return: A tuple of (discriminator loss, generator loss, generator result)
    """

    # 生成器结果
    g_model = generator(input_z, out_channel_dim, is_train=True)

    g_model1 = generator(input_z, out_channel_dim, is_train=False)

    #判别器结果
    d_model_real, d_logits_real = discriminator(input_real, reuse=False)

    d_model_fake, d_logits_fake = discriminator(g_model, reuse=True)

    ## add smooth here

    smooth = 0.1
    # 用交叉熵作为损失函数
    d_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_real,labels=tf.ones_like(d_model_real) * (1 - smooth)))

    d_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_fake, labels=tf.zeros_like(d_model_fake)))

    g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_fake, labels=tf.ones_like(d_model_fake)))

    d_loss = d_loss_real + d_loss_fake

    return d_loss, g_loss, g_model1

  定义模型优化过程

def model_opt(d_loss, g_loss, learning_rate, beta1):
    """
    Get optimization operations
    :param d_loss: Discriminator loss Tensor
    :param g_loss: Generator loss Tensor
    :param learning_rate: Learning Rate Placeholder
    :param beta1: The exponential decay rate for the 1st moment in the optimizer
    :return: A tuple of (discriminator training operation, generator training operation)
    """

    t_vars = tf.trainable_variables()
    d_vars = [var for var in t_vars if var.name.startswith('discriminator')]
    g_vars = [var for var in t_vars if var.name.startswith('generator')]

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

    with tf.control_dependencies(update_ops):
        d_train_opt = tf.train.AdamOptimizer(learning_rate, beta1=beta1).minimize(d_loss, var_list=d_vars)
        g_train_opt = tf.train.AdamOptimizer(learning_rate, beta1=beta1).minimize(g_loss, var_list=g_vars)

    return d_train_opt, g_train_opt

  接下来定义训练的过程

def train(epoch_count, batch_size, z_dim, learning_rate, beta1, data_shape):
    input_real, input_z, lr = model_inputs(data_shape[1], data_shape[2], data_shape[3], z_dim)
    d_loss, g_loss, g_out = model_loss(input_real, input_z, data_shape[-1])
    d_opt, g_opt = model_opt(d_loss, g_loss, learning_rate, beta1)
    steps = 0
    losses = []

    # 读取MNIST数据集,数据集位于当前目录下MNIST_data目录里
    mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for epoch_i in range(epoch_count):
            steps += 1
            batch = mnist.train.next_batch(batch_size)
            labels = batch[1]
            batch = batch[0]
            train_batch = []
            for index, label in enumerate(labels):
                # 选出某一标签的数据,比如说这边选的是2
                if label[2] == 1:
                    train_batch.append(batch[index])
            input_batch_size = len(train_batch)
            train_batch = np.array(train_batch)
            train_batch = np.reshape(train_batch, (-1, data_shape[1], data_shape[2], data_shape[3]))
            # 随机噪点作为输入给生成器
            batch_z = np.random.uniform(-1, 1, size=(input_batch_size, z_dim))
            _ = sess.run(d_opt, feed_dict={input_real: train_batch, input_z: batch_z, lr: learning_rate})
            _ = sess.run(g_opt, feed_dict={input_real: train_batch, input_z: batch_z, lr: learning_rate})
            if steps % 10 == 0:
                train_loss_d = d_loss.eval({input_real: train_batch, input_z: batch_z})
                train_loss_g = g_loss.eval({input_real: train_batch, input_z: batch_z})

                losses.append((train_loss_d, train_loss_g))

                print("Epoch {}/{}...".format(epoch_i + 1, epochs),
                      "Discriminator Loss: {:.4f}...".format(train_loss_d),
                      "Generator Loss: {:.4f}".format(train_loss_g))
            if steps % 100 == 0:
                # 保存图片并输出
                img = g_out.eval({input_z: batch_z})
                img = img[-1] * 128 + 128
                img = img.astype(int)
                img = img[:, :, 0]
                im = Image.fromarray(img).convert('L')
                im.save("result_{}.png".format(steps))
                # im.show()

  最后定义一些全局参数,引用一些库

import tensorflow as tf
import input_data
import numpy as np
from PIL import Image

batch_size = 64
z_dim = 100
learning_rate = 0.001
beta1 = 0.5
epochs = 5000

if __name__ == "__main__":
    train(epochs, batch_size, z_dim, learning_rate, beta1, [0, 28, 28, 1])

完整代码

import tensorflow as tf
import input_data
import numpy as np
from PIL import Image

batch_size = 64
z_dim = 100
learning_rate = 0.001
beta1 = 0.5
epochs = 5000


def model_inputs(image_width, image_height, image_channels, z_dim):
    # Real imag
    inputs_real = tf.placeholder(tf.float32, (None, image_width, image_height, image_channels), name='input_real')

    # input z

    inputs_z = tf.placeholder(tf.float32, (None, z_dim), name='input_z')

    # Learning rate
    learning_rate = tf.placeholder(tf.float32, name='lr')

    return inputs_real, inputs_z, learning_rate


def generator(input_z, out_channel_dim, is_train=True):
    with tf.variable_scope('generator', reuse=not is_train):
        x0 = tf.layers.dense(input_z, 4 * 4 * 512)
        x0 = tf.reshape(x0, (-1, 4, 4, 512))
        bn0 = tf.layers.batch_normalization(x0, training=is_train)
        relu0 = tf.nn.relu(bn0)

        # 反卷积
        x1 = tf.layers.conv2d_transpose(relu0, 256, 4, strides=1, padding='valid')
        bn1 = tf.layers.batch_normalization(x1, training=is_train)
        relu1 = tf.nn.relu(bn1)

        x2 = tf.layers.conv2d_transpose(relu1, 512, 3, strides=2, padding='same')
        bn2 = tf.layers.batch_normalization(x2, training=is_train)
        relu2 = tf.nn.relu(bn2)

        logits = tf.layers.conv2d_transpose(relu2, out_channel_dim, 3, strides=2, padding='same')
        out = tf.tanh(logits)

    return out


def discriminator(images, reuse=False):
    """
    Create the discriminator network
    :param images: Tensor of input image(s)
    :param reuse: Boolean if the weights should be reused
    :return: Tuple of (tensor output of the discriminator, tensor logits of the discriminator)
    """
    # TODO: Implement Function

    # scope here

    with tf.variable_scope('discriminator', reuse=reuse):
        alpha = 0.2  # leak relu coeff

        # drop out probability
        keep_prob = 0.8

        # input layer 28 * 28 * color channel
        x1 = tf.layers.conv2d(images, 128, 5, strides=2, padding='same',
                              kernel_initializer=tf.contrib.layers.xavier_initializer(seed=2))
        # No batch norm here
        # leak relu here / alpha = 0.2
        relu1 = tf.maximum(alpha * x1, x1)
        # applied drop out here
        drop1 = tf.nn.dropout(relu1, keep_prob=keep_prob)
        # 14 * 14 * 128

        # Layer 2
        x2 = tf.layers.conv2d(drop1, 256, 5, strides=2, padding='same',
                              kernel_initializer=tf.contrib.layers.xavier_initializer(seed=2))
        # employ batch norm here
        bn2 = tf.layers.batch_normalization(x2, training=True)
        # leak relu
        relu2 = tf.maximum(alpha * bn2, bn2)
        drop2 = tf.nn.dropout(relu2, keep_prob=keep_prob)

        # 7 * 7 * 256

        # Layer3
        x3 = tf.layers.conv2d(drop2, 512, 5, strides=2, padding='same',
                              kernel_initializer=tf.contrib.layers.xavier_initializer(seed=2))
        bn3 = tf.layers.batch_normalization(x3, training=True)
        relu3 = tf.maximum(alpha * bn3, bn3)
        drop3 = tf.nn.dropout(relu3, keep_prob=keep_prob)
        # 4 * 4 * 512

        # Output
        # Flatten
        flatten = tf.reshape(relu3, (-1, 4 * 4 * 512))
        logits = tf.layers.dense(flatten, 1)
        # activation
        out = tf.nn.sigmoid(logits)

    return out, logits


def model_loss(input_real, input_z, out_channel_dim):
    """
    Get the loss for the discriminator and generator
    :param input_real: Images from the real dataset
    :param input_z: Z input
    :param out_channel_dim: The number of channels in the output image
    :return: A tuple of (discriminator loss, generator loss)
    """
    # TODO: Implement Function

    g_model = generator(input_z, out_channel_dim, is_train=True)

    g_model1 = generator(input_z, out_channel_dim, is_train=False)

    d_model_real, d_logits_real = discriminator(input_real, reuse=False)

    d_model_fake, d_logits_fake = discriminator(g_model, reuse=True)

    ## add smooth here

    smooth = 0.1
    d_loss_real = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_real,
                                                labels=tf.ones_like(d_model_real) * (1 - smooth)))

    d_loss_fake = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_fake, labels=tf.zeros_like(d_model_fake)))

    g_loss = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_fake,
                                                labels=tf.ones_like(d_model_fake)))

    d_loss = d_loss_real + d_loss_fake

    return d_loss, g_loss, g_model1


def model_opt(d_loss, g_loss, learning_rate, beta1):
    """
    Get optimization operations
    :param d_loss: Discriminator loss Tensor
    :param g_loss: Generator loss Tensor
    :param learning_rate: Learning Rate Placeholder
    :param beta1: The exponential decay rate for the 1st moment in the optimizer
    :return: A tuple of (discriminator training operation, generator training operation)
    """

    t_vars = tf.trainable_variables()
    d_vars = [var for var in t_vars if var.name.startswith('discriminator')]
    g_vars = [var for var in t_vars if var.name.startswith('generator')]

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

    with tf.control_dependencies(update_ops):
        d_train_opt = tf.train.AdamOptimizer(learning_rate, beta1=beta1).minimize(d_loss, var_list=d_vars)
        g_train_opt = tf.train.AdamOptimizer(learning_rate, beta1=beta1).minimize(g_loss, var_list=g_vars)

    return d_train_opt, g_train_opt


def train(epoch_count, batch_size, z_dim, learning_rate, beta1, data_shape):
    input_real, input_z, lr = model_inputs(data_shape[1], data_shape[2], data_shape[3], z_dim)
    d_loss, g_loss, g_out = model_loss(input_real, input_z, data_shape[-1])
    d_opt, g_opt = model_opt(d_loss, g_loss, learning_rate, beta1)
    steps = 0
    losses = []
    mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for epoch_i in range(epoch_count):
            steps += 1
            batch = mnist.train.next_batch(batch_size)
            labels = batch[1]
            batch = batch[0]
            train_batch = []
            for index, label in enumerate(labels):
                if label[2] == 1:
                    train_batch.append(batch[index])
            input_batch_size = len(train_batch)
            train_batch = np.array(train_batch)
            train_batch = np.reshape(train_batch, (-1, data_shape[1], data_shape[2], data_shape[3]))
            batch_z = np.random.uniform(-1, 1, size=(input_batch_size, z_dim))
            _ = sess.run(d_opt, feed_dict={input_real: train_batch, input_z: batch_z, lr: learning_rate})
            _ = sess.run(g_opt, feed_dict={input_real: train_batch, input_z: batch_z, lr: learning_rate})
            if steps % 10 == 0:
                train_loss_d = d_loss.eval({input_real: train_batch, input_z: batch_z})
                train_loss_g = g_loss.eval({input_real: train_batch, input_z: batch_z})

                losses.append((train_loss_d, train_loss_g))

                print("Epoch {}/{}...".format(epoch_i + 1, epochs),
                      "Discriminator Loss: {:.4f}...".format(train_loss_d),
                      "Generator Loss: {:.4f}".format(train_loss_g))
            if steps % 100 == 0:
                img = g_out.eval({input_z: batch_z})
                img = img[-1] * 128 + 128
                img = img.astype(int)
                img = img[:, :, 0]
                im = Image.fromarray(img).convert('L')
                im.save("result_{}.png".format(steps))
                # im.show()

if __name__ == "__main__":
    train(epochs, batch_size, z_dim, learning_rate, beta1, [0, 28, 28, 1])

训练结果

训练的轮数不是很多,也遇到了一些问题,一个比较理想的结果如下

这里的主人

一个苦逼的程序员,一不小心入了安全的坑,从此再也无法从坑里走出来。每天被大佬按在地上摩擦,希望大佬放过这个简陋的小站,别日了。

留下你的评论

*评论支持代码高亮<pre class="prettyprint linenums">代码</pre>

相关推荐

暂无内容!