Overall thinking
- First, write a deep learning.py file to train and test the neural network.
- The parameters to be optimized in deep_learning.py (here we optimize the convolution layers and the number of neurons in each layer) are uniformly written into a list num.
- Then, the GA.py is written. The list num which needs to be passed into the deep learning.py file is used as the chromosome, and the optimized parameter is used as the gene on the chromosome.
deep_learning.py file
In order to write all parameters to be optimized into a list, two functions need to be defined in this file. They are the create ﹣ CNN (inputs, filters, is ﹣ pool = false) function and the create ﹣ deny (inputs, units) function.
Function: create_cnn(inputs, filters, is_pool=False)
Input:
- inputs: the input to this volume accumulation layer. If this volume accumulation layer is the first layer, the variable name of layers.Input() is passed in; otherwise, the input should be the pooling layer (if any) or dropout layer in the previous volume accumulation layer.
- filters: how many convolution cores are there in this convolution layer.
- Is there a pooling layer in this volume accumulation layer.
Output:
- If there is a pooling layer, output the volume layer, BN layer, dropout layer and pooling layer.
- If there is no pooling layer, only the volume layer, BN layer and dropout layer are output.
# Define CNN layer functions def create_cnn(inputs, filters, is_pool=False): cnn = layers.Conv2D(filters, kernel_size=(3,3), strides=(1,1), padding='same', activation='relu')(inputs) print('Cnn', cnn.shape) batch_layer = layers.BatchNormalization()(cnn) cnn_dropout = layers.Dropout(0.2)(batch_layer) if is_pool: pool = layers.MaxPool2D(pool_size=(2,2))(cnn_dropout) print('Pool', pool.shape) return cnn, batch_layer, cnn_dropout, pool else: return cnn, batch_layer, cnn_dropout
Function: create [deny (inputs, units)
Input:
- inputs: the input passed into the full connection layer. If the full connection layer is the first full connection layer, the variable name of layers.Flatten() is passed in; otherwise, the previous full connection layer should be passed in.
- units: how many neurons are there in this all connected layer.
Output:
- Output full connection layer, BN layer and dropout layer.
# Define the Dense layer function def create_dense(inputs, units): dense = layers.Dense(units, kernel_regularizer=keras.regularizers.l2(0.001), activation='relu')(inputs) print('Dense', dense.shape) dense_dropout = layers.Dropout(0.2)(dense) dense_batch = layers.BatchNormalization()(dense_dropout) return dense, dense_dropout, dense_batch
Setting parameters
When setting the parameters of the convolution layer, the height and width of the shape after the sample passes through the pooled layer will be reduced by half, so as long as the last two layers of the convolution layer have the pooled layer, others have no pooled layer.
# Set CNN layer parameters cnn_num_layers = 2 filters = [32, 64] if cnn_num_layers == 1: pool = [1] else: pool = list(np.zeros((cnn_num_layers-2,))) pool.append(1) pool.append(1) cnn_name = list(np.zeros((cnn_num_layers,))) batch_layer_name = list(np.zeros((cnn_num_layers,))) cnn_dropout_name = list(np.zeros((cnn_num_layers,))) pool_name = list(np.zeros((cnn_num_layers,)))
# Set CNN dense layer parameters cnn_dense_num_layers = 2 cnn_dense_units = [128, 64] cnn_dense_name = list(np.zeros((cnn_dense_num_layers,))) cnn_dense_dropout_name = list(np.zeros((cnn_dense_num_layers,))) cnn_dense_batch_name = list(np.zeros((cnn_dense_num_layers,)))
Call function to build model
Construct the network model according to the explanation when introducing the function.
inputs_cnn = layers.Input(shape=(x_train.shape[1], x_train.shape[2], x_train.shape[3]), name='inputs') print('Inputs: ', inputs_cnn.shape) for i in range(cnn_num_layers): if i == 0: inputs = inputs_cnn else: if pool[i-1]: inputs = pool_name[i-1] else: inputs = cnn_dropout_name[i-1] if pool[i]: cnn_name[i], batch_layer_name[i], cnn_dropout_name[i], pool_name[i] = create_cnn(inputs, filters[i], is_pool=pool[i]) else: cnn_name[i], batch_layer_name[i], cnn_dropout_name[i] = create_cnn(inputs, filters[i], is_pool=pool[i]) flatten = layers.Flatten()(pool_name[cnn_num_layers-1]) print('Flatten: ', flatten.shape) for i in range(cnn_dense_num_layers): if i == 0: inputs = flatten else: inputs = cnn_dense_batch_name[i-1] cnn_dense_name[i], cnn_dense_dropout_name[i], cnn_dense_batch_name[i] = create_dense(inputs, cnn_dense_units[i]) outputs_cnn = layers.Dense(10, activation='softmax')(cnn_dense_batch_name[cnn_dense_num_layers-1]) print('Outputs: ', outputs_cnn.shape)
Complete code
The list num is not used in the above, but the number of layers is set to 2. The number of convolution cores is also given directly for the convenience of explanation. The complete code is given below:
import tensorflow as tf import tensorflow.keras as keras from tensorflow.keras import models, layers, optimizers import matplotlib.pyplot as plt # Define CNN layer functions def create_cnn(inputs, filters, is_pool=False): cnn = layers.Conv2D(filters, kernel_size=(3,3), strides=(1,1), padding='same', activation='relu')(inputs) print('Cnn', cnn.shape) batch_layer = layers.BatchNormalization()(cnn) cnn_dropout = layers.Dropout(0.2)(batch_layer) if is_pool: pool = layers.MaxPool2D(pool_size=(2,2))(cnn_dropout) print('Pool', pool.shape) return cnn, batch_layer, cnn_dropout, pool else: return cnn, batch_layer, cnn_dropout # Define the Dense layer function def create_dense(inputs, units): dense = layers.Dense(units, kernel_regularizer=keras.regularizers.l2(0.001), activation='relu')(inputs) print('Dense', dense.shape) dense_dropout = layers.Dropout(0.2)(dense) dense_batch = layers.BatchNormalization()(dense_dropout) return dense, dense_dropout, dense_batch def load(): (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() # Simple normalization of Mnist data set x_train, x_test = x_train / 255.0, x_test / 255.0 x_train = x_train.reshape((-1,28,28,1)) x_test = x_test.reshape((-1,28,28,1)) return x_train, y_train, x_test, y_test def classify(x_train, y_train, x_test, y_test, num): # Set CNN layer parameters cnn_num_layers = num[0] filters = num[2: 2+cnn_num_layers] if cnn_num_layers == 1: pool = [1] else: pool = list(np.zeros((cnn_num_layers-2,))) pool.append(1) pool.append(1) cnn_name = list(np.zeros((cnn_num_layers,))) batch_layer_name = list(np.zeros((cnn_num_layers,))) cnn_dropout_name = list(np.zeros((cnn_num_layers,))) pool_name = list(np.zeros((cnn_num_layers,))) # Set CNN dense layer parameters cnn_dense_num_layers = num[1] cnn_dense_units = num[2+cnn_num_layers: 2+cnn_num_layers+cnn_dense_num_layers] cnn_dense_name = list(np.zeros((cnn_dense_num_layers,))) cnn_dense_dropout_name = list(np.zeros((cnn_dense_num_layers,))) cnn_dense_batch_name = list(np.zeros((cnn_dense_num_layers,))) inputs_cnn = layers.Input(shape=(x_train.shape[1], x_train.shape[2], x_train.shape[3]), name='inputs') for i in range(cnn_num_layers): if i == 0: inputs = inputs_cnn else: if pool[i-1]: inputs = pool_name[i-1] else: inputs = cnn_dropout_name[i-1] if pool[i]: cnn_name[i], batch_layer_name[i], cnn_dropout_name[i], pool_name[i] = create_cnn(inputs, filters[i], is_pool=pool[i]) else: cnn_name[i], batch_layer_name[i], cnn_dropout_name[i] = create_cnn(inputs, filters[i], is_pool=pool[i]) flatten = layers.Flatten()(pool_name[cnn_num_layers-1]) for i in range(cnn_dense_num_layers): if i == 0: inputs = flatten else: inputs = cnn_dense_batch_name[i-1] cnn_dense_name[i], cnn_dense_dropout_name[i], cnn_dense_batch_name[i] = create_dense(inputs, cnn_dense_units[i]) outputs_cnn = layers.Dense(10, activation='softmax')(cnn_dense_batch_name[cnn_dense_num_layers-1]) CNN_model = keras.Model(inputs_cnn, outputs_cnn) CNN_model.compile(optimizer=keras.optimizers.Adam(), # loss=keras.losses.CategoricalCrossentropy(), # To be used loss=keras.losses.SparseCategoricalCrossentropy(), metrics=['accuracy']) history = CNN_model.fit(x_train, y_train, batch_size=64, epochs=5, validation_split=0.1, verbose=0) results = CNN_model.evaluate(x_test, y_test, verbose=0) return results[1]
The first two elements in the list num represent the number of layers of the convolution layer and the number of layers of the full connection layer respectively, and the later elements represent the number of convolution nuclei / neurons in each layer.
The value returned is the accuracy of the test set.
GA.py
For the introduction of conventional genetic algorithm, please refer to another article of mine Detailed explanation of genetic algorithm solving the maximum value problem (with python code).
problem
On the problem of optimizing convolution neural network, it is not easy to use conventional genetic algorithm. The reasons are as follows:
- 1. In the traditional genetic algorithm, the length of each chromosome is the same, but when the convolution neural network is optimized, the length of chromosome will vary with the number of layers. For example, if chromosome a has a convolution layer and a full connection layer, there are four genes (two for layers and two for neurons in each layer) on this chromosome; if chromosome b has two convolution layers and two full connection layers, there are six genes (two for layers and four for neurons in each layer) on this chromosome.
- 2. In the traditional genetic algorithm, the value range of genes on chromosome is the same, but when optimizing convolution neural network, we need to keep the genes representing the number of layers in one range and the genes representing the number of neurons in another range. For example, the number of convolution layers is between one and four, the number of full connection layers is between one and three, and the number of neurons is between 32 and 256.
- 3. Because of the first problem (i.e. different chromosome length), the crossover function and mutation function need to be modified.
Solution
- 1. Set each chromosome to the same length (because the number of convolution layers is up to four, the number of full connection layers is up to three, plus the first two genes representing the number of layers, so there are 4 + 3 + 2 = 9 genes on each chromosome), which can not meet the length requirements of the latter zero filling.
- 2. First, set the first two genes to range from one to four and from one to three, and then determine the number of genes related to the number of neurons in each layer.
- 3. For the modification of crossover function, first determine the position to be exchanged on the two removed chromosomes (set as a chromosome and b chromosome), and then traverse the genes of the two chromosomes at these positions. If the gene at this position on any chromosome is 0 or the gene to be exchanged is about the number of layers, then cancel the exchange of this position.
- 4. For the modification of the variation function, only the gene variation of the number of neurons is concerned, and the gene of the number of layers is not.
Complete code
import numpy as np import deep_learning as project DNA_SIZE = 2 DNA_SIZE_MAX = 12 POP_SIZE = 20 CROSS_RATE = 0.5 MUTATION_RATE = 0.01 N_GENERATIONS = 40 train_x, train_y, test_x, test_y = project.load() def get_fitness(x): return project.classify(train_x, train_y, test_x, test_y, num=x) def select(pop, fitness): idx = np.random.choice(np.arange(POP_SIZE), size=POP_SIZE, replace=True, p=fitness / fitness.sum()) return pop[idx] def crossover(parent, pop): if np.random.rand() < CROSS_RATE: i_ = np.random.randint(0, POP_SIZE, size=1) cross_points = np.random.randint(0, 2, size=DNA_SIZE_MAX).astype(np.bool) for i, point in enumerate(cross_points): if point == True and pop[i_, i]*parent[i] == 0: cross_points[i] = False if point == True and i < 2: cross_points[i] = False parent[cross_points] = pop[i_, cross_points] return parent def mutate(child): for point in range(DNA_SIZE_MAX): if np.random.rand() < MUTATION_RATE: if point >= 3: if child[point] != 0: child[point] = np.random.randint(32, 512) return child pop_layers = np.zeros((POP_SIZE, DNA_SIZE), np.int32) pop_layers[:, 0] = np.random.randint(1, 5, size=(POP_SIZE,)) pop_layers[:, 1] = np.random.randint(1, 4, size=(POP_SIZE,)) pop = np.zeros((POP_SIZE, DNA_SIZE_MAX)) for i in range(POP_SIZE): pop_neurons = np.random.randint(32, 257, size=(pop_layers[i].sum(),)) pop_stack = np.hstack((pop_layers[i], pop_neurons)) for j, gene in enumerate(pop_stack): pop[i][j] = gene for each_generation in range(N_GENERATIONS): fitness = np.zeros([POP_SIZE, ]) for i in range(POP_SIZE): pop_list = list(pop[i]) for j, each in enumerate(pop_list): if each == 0.0: index = j pop_list = pop_list[:j] for k, each in enumerate(pop_list): each_int = int(each) pop_list[k] = each_int fitness[i] = get_fitness(pop_list) print('The first%d Dai di%d The fitness of each chromosome is%f' % (each_generation+1, i+1, fitness[i])) print('This chromosome is:', pop_list) print("Generation:", each_generation+1, "Most fitted DNA: ", pop[np.argmax(fitness), :], "The fitness was:", fitness[np.argmax(fitness)]) pop = select(pop, fitness) pop_copy = pop.copy() for parent in pop: child = crossover(parent, pop_copy) child = mutate(child) parent = child
Among them, the function of the following code is to delete the 0 element in the array. Please refer to another article for the specific implementation process Delete all zero elements in the nd array array.
for each_generation in range(N_GENERATIONS): fitness = np.zeros([POP_SIZE, ]) for i in range(POP_SIZE): pop_list = list(pop[i]) for j, each in enumerate(pop_list): if each == 0.0: index = j pop_list = pop_list[:j] for k, each in enumerate(pop_list): each_int = int(each) pop_list[k] = each_int