初始化

改善深层神经网络-week1编程题(初始化、正则化、梯度校验)

分别使用0、随机数和抑梯度异常初始化参数,比较发现抑梯度异常初始化参数可以得到更高的准确度。

原始数据:

import numpy as np
import matplotlib.pyplot as plt
import sklearn
import sklearn.datasets
from init_utils import sigmoid, relu, compute_loss, forward_propagation, backward_propagation
from init_utils import update_parameters, predict, load_dataset, plot_decision_boundary, predict_dec
from math import sqrt

# %matplotlib inline
plt.rcParams['figure.figsize'] = (7.0, 4.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# load image dataset: blue/red dots in circles
train_X, train_Y, test_X, test_Y = load_dataset()

改善深层神经网络-week1编程题(初始化、正则化、梯度校验)

 使用抑梯度异常初始化代码如下:

  1 #three layers
  2 def model(X, Y, learning_rate=0.01, num_iterations=15000, print_cost=True, initialization="he"):
  3     """
  4     Implements a three-layer neural network: LINEAR->RELU->LINEAR->RELU->LINEAR->SIGMOID.
  5 
  6     Arguments:
  7     X -- input data, of shape (2, number of examples)
  8     Y -- true "label" vector (containing 0 for red dots; 1 for blue dots), of shape (1, number of examples)
  9     learning_rate -- learning rate for gradient descent
 10     num_iterations -- number of iterations to run gradient descent
 11     print_cost -- if True, print the cost every 1000 iterations
 12     initialization -- flag to choose which initialization to use ("zeros","random" or "he")
 13 
 14     Returns:
 15     parameters -- parameters learnt by the model
 16     """
 17 
 18     grads = {}
 19     costs = []     # to keep track of the loss
 20     m = X.shape[1] # number of examples
 21     layers_dims = [X.shape[0], 10, 5, 1]
 22 
 23     # Initialize parameters dictionary.
 24     parameters = initialize_parameters_he(layers_dims)
 25 
 26     # Loop (gradient descent)
 27     for i in range(0, num_iterations):
 28 
 29         # Forward propagation: LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID.
 30         a3, cache = forward_propagation(X, parameters)
 31 
 32         # Loss
 33         cost = compute_loss(a3, Y)
 34 
 35         # Backward propagation.
 36         grads = backward_propagation(X, Y, cache)
 37 
 38         # Update parameters.
 39         parameters = update_parameters(parameters, grads, learning_rate)
 40 
 41         # Print the loss every 1000 iterations
 42         if print_cost and i % 1000 == 0:
 43             print("Cost after iteration {}: {}".format(i, cost))
 44             costs.append(cost)
 45 
 46     # plot the loss
 47     plt.plot(costs)
 48     plt.ylabel('cost')
 49     plt.xlabel('iterations (per hundreds)')
 50     plt.title("Learning rate =" + str(learning_rate))
 51     plt.show()
 52 
 53     return parameters
 54 
 55 
 56 # GRADED FUNCTION: initialize_parameters_he
 57 def initialize_parameters_he(layers_dims):
 58     """
 59     Arguments:
 60     layer_dims -- python array (list) containing the size of each layer.
 61 
 62     Returns:
 63     parameters -- python dictionary containing your parameters "W1", "b1", ..., "WL", "bL":
 64                     W1 -- weight matrix of shape (layers_dims[1], layers_dims[0])
 65                     b1 -- bias vector of shape (layers_dims[1], 1)
 66                     ...
 67                     WL -- weight matrix of shape (layers_dims[L], layers_dims[L-1])
 68                     bL -- bias vector of shape (layers_dims[L], 1)
 69     """
 70 
 71     np.random.seed(3)
 72     parameters = {}
 73     L = len(layers_dims) - 1 # integer representing the number of layers
 74 
 75     for l in range(1, L + 1):
 76         ### START CODE HERE ### (≈ 2 lines of code)
 77         parameters['W'+str(l)]=np.random.randn(layers_dims[l], layers_dims[l-1])*sqrt(2./layers_dims[l-1])
 78         parameters['b'+str(l)]=np.zeros((layers_dims[l], 1))
 79         ### END CODE HERE ###
 80     return parameters
 81 
 82 parameters = initialize_parameters_he([2, 4, 1])
 83 print("W1 = " + str(parameters["W1"]))
 84 print("b1 = " + str(parameters["b1"]))
 85 print("W2 = " + str(parameters["W2"]))
 86 print("b2 = " + str(parameters["b2"]))
 87 
 88 
 89 parameters = model(train_X, train_Y, initialization = "he")
 90 print("On the train set:")
 91 predictions_train = predict(train_X, train_Y, parameters)
 92 print("On the test set:")
 93 predictions_test = predict(test_X, test_Y, parameters)
 94 
 95 
 96 plt.title("Model with He initialization")
 97 axes = plt.gca()
 98 axes.set_xlim([-1.5, 1.5])
 99 axes.set_ylim([-1.5, 1.5])
100 plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, train_Y)
View Code

相关文章: