这一节我们主要学习

根据上节课学习的深度神经网络训练技巧，改善表现不好的手写数字识别网络。

# 导入相应的工具包

import numpy as np 
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.layers import Conv2D, MaxPooling2D, Flatten
from keras.optimizers import SGD, Adam
from keras.utils import np_utils
from keras.datasets import mnist
#categorical_crossentropy

# 这是我们上节课训练坏掉的模型
def load_data():
    (x_train, y_train) , (x_test, y_test) = mnist.load_data()
    number = 10000
    x_train = x_train[0:number]
    y_train = y_train[0:number]
    x_train = x_train.reshape(number, 28*28)
    x_test = x_test.reshape(x_test.shape[0], 28*28)
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    # convert class vectors to binary class matrices
    y_train = np_utils.to_categorical(y_train,10)
    y_test = np_utils.to_categorical(y_test,10)
    x_train = x_train
    x_test = x_test
    # x_test = np.random.normal(x_test)
    x_train = x_train / 255
    x_test = x_test / 255
    return (x_train, y_train), (x_test, y_test)

(x_train,y_train), (x_test,y_test) = load_data()

model = Sequential()
model.add(Dense(units=689,activation='sigmoid'))
model.add(Dense(units=689,activation='sigmoid'))
#for i in range(10):
#   model.add(Dense(units=689,activation='sigmoid'))  
model.add(Dense(units=10,activation='softmax'))

model.compile(loss='mse',optimizer=SGD(lr=0.1),metrics=['accuracy'])

model.fit(x_train,y_train,batch_size=100,epochs=20)

result = model.evaluate(x_test,y_test,batch_size=10000)
print("\nTest Acc:"+"{:.4f}".format(result[1]))

A local file was found, but it seems to be incomplete or outdated because the auto file hash does not match the original value of 8a61469f7ea1b51cbae51d4f78837e45 so we will re-download the data.
Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz
  679936/11490434 [>.............................] - ETA: 9:55:44```
</div>
</div>
<div class="output_wrapper" markdown="1">
<div class="output_subarea" markdown="1">
{:.output_traceback_line}

---------------------------------------------------------------------------

ConnectionResetError                      Traceback (most recent call last)

<ipython-input-2-df067d22b24b> in <module>
     19     return (x_train, y_train), (x_test, y_test)
     20 
---> 21 (x_train,y_train), (x_test,y_test) = load_data()
     22 
     23 model = Sequential()


<ipython-input-2-df067d22b24b> in load_data()
      1 # 这是我们上节课训练坏掉的模型
      2 def load_data():
----> 3     (x_train, y_train) , (x_test, y_test) = mnist.load_data()
      4     number = 10000
      5     x_train = x_train[0:number]


~/.virtualenvs/basenv/lib/python3.5/site-packages/keras/datasets/mnist.py in load_data(path)
     21     path = get_file(path,
     22                     origin='https://s3.amazonaws.com/img-datasets/mnist.npz',
---> 23                     file_hash='8a61469f7ea1b51cbae51d4f78837e45')
     24     f = np.load(path)
     25     x_train, y_train = f['x_train'], f['y_train']


~/.virtualenvs/basenv/lib/python3.5/site-packages/keras/utils/data_utils.py in get_file(fname, origin, untar, md5_hash, file_hash, cache_subdir, hash_algorithm, extract, archive_format, cache_dir)
    220         try:
    221             try:
--> 222                 urlretrieve(origin, fpath, dl_progress)
    223             except HTTPError as e:
    224                 raise Exception(error_msg.format(origin, e.code, e.msg))


/usr/lib/python3.5/urllib/request.py in urlretrieve(url, filename, reporthook, data)
    215 
    216             while True:
--> 217                 block = fp.read(bs)
    218                 if not block:
    219                     break


/usr/lib/python3.5/http/client.py in read(self, amt)
    446             # Amount is given, implement using readinto
    447             b = bytearray(amt)
--> 448             n = self.readinto(b)
    449             return memoryview(b)[:n].tobytes()
    450         else:


/usr/lib/python3.5/http/client.py in readinto(self, b)
    486         # connection, and the user is reading more bytes than will be provided
    487         # (for example, reading in 1k chunks)
--> 488         n = self.fp.readinto(b)
    489         if not n and b:
    490             # Ideally, we would raise IncompleteRead if the content-length


/usr/lib/python3.5/socket.py in readinto(self, b)
    573         while True:
    574             try:
--> 575                 return self._sock.recv_into(b)
    576             except timeout:
    577                 self._timeout_occurred = True


/usr/lib/python3.5/ssl.py in recv_into(self, buffer, nbytes, flags)
    927                   "non-zero flags not allowed in calls to recv_into() on %s" %
    928                   self.__class__)
--> 929             return self.read(nbytes, buffer)
    930         else:
    931             return socket.recv_into(self, buffer, nbytes, flags)


/usr/lib/python3.5/ssl.py in read(self, len, buffer)
    789             raise ValueError("Read on closed or unwrapped SSL socket.")
    790         try:
--> 791             return self._sslobj.read(len, buffer)
    792         except SSLError as x:
    793             if x.args[0] == SSL_ERROR_EOF and self.suppress_ragged_eofs:


/usr/lib/python3.5/ssl.py in read(self, len, buffer)
    573         """
    574         if buffer is not None:
--> 575             v = self._sslobj.read(len, buffer)
    576         else:
    577             v = self._sslobj.read(len)


ConnectionResetError: [Errno 104] Connection reset by peer

</div>
</div>
</div>

<div markdown="1" class="cell code_cell">
<div class="input_area" markdown="1">
```python
# 第一步，检查训练集上的表现：
# 其实 keras 在训练的时候就已经告诉你模型在训练集上的效果，不过我们特别把它打印出来
result = model.evaluate(x_train,y_train,batch_size=10000)
print("\nTrain Acc:"+"{:.4f}".format(result[1]))

# 我们会发现训练集正确率很低，所以训练的时候就没有训练好。
# 所以我们要想办法提高模型在训练集上的表现。
# 这里可以用到一些基本的网络训练知识、换激活函数和自适应学习率（梯度下降策略）的办法。

# 第二步，换合适的损失函数：
# 代码中的问题是损失函数设的不对，之前已经讲过在分类问题中用均方误差（mse）是不能得到好的结果的，这里不再解释。
# 所以，把损失函数换成交叉熵（categorical_crossentropy）运行一遍。会发现模型在训练集上的表现明显提升。
model.compile(loss='categorical_crossentropy',optimizer=SGD(lr=0.1),metrics=['accuracy'])

# 第三步，改变 batch_size 的影响：
# batch_size 设大（10000），运行速度会加快，但是表现会变差。
# batch_size 设小（1），运行速度会变慢。
# 所以想要利用 GPU 加速，要把 batch_size 设大一点。

# 第四步，改变网络深度的影响：
# 发现网络加深，训练效果不好。
for i in range(10):
    model.add(Dense(units=689,activation='sigmoid'))  

# 第五步，换激活函数：
# 把 sigmoid 换成 relu. 发现训练集上正确率有很大提升，测试集上正确率也很不错。
model.add(Dense(units=689,activation='relu'))
model.add(Dense(units=689,activation='relu'))
for i in range(10):
    model.add(Dense(units=689,activation='relu'))  

# 第六步，数据规范化（normalization）的重要性：
# 把 x_train = x_train / 255 和 x_test = x_test / 255 注释掉，发现模型训练失败。
# 然后把规范化重新加上。

# 第七步，改变梯度下降策略：

# 1.先把网络深度变小（把 10 层注释掉），跑一次。
#for i in range(10):
#    model.add(Dense(units=689,activation='relu')) 

# 2.再改一下梯度下降的策略，从 SGD 改成 ‘adam’ 跑一遍。会发现训练时正确率上升的速度变快。
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

# 第八步，添加噪声，使用 Dropout：

# 1.对测试集加噪声，发现测试集正确率下降。
x_test = np.random.normal(x_test)

# 2. Dropout 的效果。
# 加上 Dropout 之后，会发现每一个 epoch 训练时训练集上的效果变差。
# 但计算训练集的正确率和测试的时候是用整个网络测试，所以效果变好。
model.add(Dense(input_dim=28*28,units=689,activation='relu'))
model.add(Dropout(0.7))
model.add(Dense(units=689,activation='relu'))
model.add(Dropout(0.7))
model.add(Dense(units=689,activation='relu'))
model.add(Dropout(0.7))