初始化
分别使用0、随机数和抑梯度异常初始化参数,比较发现抑梯度异常初始化参数可以得到更高的准确度。
原始数据:
import numpy as np import matplotlib.pyplot as plt import sklearn import sklearn.datasets from init_utils import sigmoid, relu, compute_loss, forward_propagation, backward_propagation from init_utils import update_parameters, predict, load_dataset, plot_decision_boundary, predict_dec from math import sqrt # %matplotlib inline plt.rcParams['figure.figsize'] = (7.0, 4.0) # set default size of plots plt.rcParams['image.interpolation'] = 'nearest' plt.rcParams['image.cmap'] = 'gray' # load image dataset: blue/red dots in circles train_X, train_Y, test_X, test_Y = load_dataset()
使用抑梯度异常初始化代码如下:
1 #three layers 2 def model(X, Y, learning_rate=0.01, num_iterations=15000, print_cost=True, initialization="he"): 3 """ 4 Implements a three-layer neural network: LINEAR->RELU->LINEAR->RELU->LINEAR->SIGMOID. 5 6 Arguments: 7 X -- input data, of shape (2, number of examples) 8 Y -- true "label" vector (containing 0 for red dots; 1 for blue dots), of shape (1, number of examples) 9 learning_rate -- learning rate for gradient descent 10 num_iterations -- number of iterations to run gradient descent 11 print_cost -- if True, print the cost every 1000 iterations 12 initialization -- flag to choose which initialization to use ("zeros","random" or "he") 13 14 Returns: 15 parameters -- parameters learnt by the model 16 """ 17 18 grads = {} 19 costs = [] # to keep track of the loss 20 m = X.shape[1] # number of examples 21 layers_dims = [X.shape[0], 10, 5, 1] 22 23 # Initialize parameters dictionary. 24 parameters = initialize_parameters_he(layers_dims) 25 26 # Loop (gradient descent) 27 for i in range(0, num_iterations): 28 29 # Forward propagation: LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID. 30 a3, cache = forward_propagation(X, parameters) 31 32 # Loss 33 cost = compute_loss(a3, Y) 34 35 # Backward propagation. 36 grads = backward_propagation(X, Y, cache) 37 38 # Update parameters. 39 parameters = update_parameters(parameters, grads, learning_rate) 40 41 # Print the loss every 1000 iterations 42 if print_cost and i % 1000 == 0: 43 print("Cost after iteration {}: {}".format(i, cost)) 44 costs.append(cost) 45 46 # plot the loss 47 plt.plot(costs) 48 plt.ylabel('cost') 49 plt.xlabel('iterations (per hundreds)') 50 plt.title("Learning rate =" + str(learning_rate)) 51 plt.show() 52 53 return parameters 54 55 56 # GRADED FUNCTION: initialize_parameters_he 57 def initialize_parameters_he(layers_dims): 58 """ 59 Arguments: 60 layer_dims -- python array (list) containing the size of each layer. 61 62 Returns: 63 parameters -- python dictionary containing your parameters "W1", "b1", ..., "WL", "bL": 64 W1 -- weight matrix of shape (layers_dims[1], layers_dims[0]) 65 b1 -- bias vector of shape (layers_dims[1], 1) 66 ... 67 WL -- weight matrix of shape (layers_dims[L], layers_dims[L-1]) 68 bL -- bias vector of shape (layers_dims[L], 1) 69 """ 70 71 np.random.seed(3) 72 parameters = {} 73 L = len(layers_dims) - 1 # integer representing the number of layers 74 75 for l in range(1, L + 1): 76 ### START CODE HERE ### (≈ 2 lines of code) 77 parameters['W'+str(l)]=np.random.randn(layers_dims[l], layers_dims[l-1])*sqrt(2./layers_dims[l-1]) 78 parameters['b'+str(l)]=np.zeros((layers_dims[l], 1)) 79 ### END CODE HERE ### 80 return parameters 81 82 parameters = initialize_parameters_he([2, 4, 1]) 83 print("W1 = " + str(parameters["W1"])) 84 print("b1 = " + str(parameters["b1"])) 85 print("W2 = " + str(parameters["W2"])) 86 print("b2 = " + str(parameters["b2"])) 87 88 89 parameters = model(train_X, train_Y, initialization = "he") 90 print("On the train set:") 91 predictions_train = predict(train_X, train_Y, parameters) 92 print("On the test set:") 93 predictions_test = predict(test_X, test_Y, parameters) 94 95 96 plt.title("Model with He initialization") 97 axes = plt.gca() 98 axes.set_xlim([-1.5, 1.5]) 99 axes.set_ylim([-1.5, 1.5]) 100 plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, train_Y)