python實現高效的遺傳算法

遺傳算法屬於一種優化算法。

如果你有一個待優化函數,可以考慮次算法。假設你有一個變量x,通過某個函數可以求出對應的y,那麼你通過預設的x可求出y_pred,y_pred差距與你需要的y當然越接近越好,這就需要引入適應度(fitness)的概念。假設

fitness = 1/(1+ads(y_pred – y)),那麼誤差越小,適應度越大,即該個體越易於存活。

設計該算法的思路如下:

(1)初始化種群,即在我需要的區間如[-100,100]內random一堆初始個體[x1,x2,x3…],這些個體是10進制形式的,為瞭後面的交叉與變異我們不妨將其轉化為二進制形式。那麼現在的問題是二進制取多少位合適呢?即編碼(code)的長度是多少呢?

這就涉及一些信號方面的知識,比如兩位的二進制表示的最大值是3(11),可以將區間化為4分,那麼每一份區間range長度range/4,我們隻需要讓range/n小於我們定義的精度即可。n是二進制需要表示的最大,可以反解出二進制位數 。

(2)我們需要編寫編碼與解碼函數。即code:將x1,x2…化為二進制,decode:在交叉變異後重新得到十進制數,用於計算fitness。

(3)交叉後變異函數編寫都很簡單,random一個point,指定兩個x在point位置進行切片交換即是交叉。變異也是random一個point,讓其值0變為1,1變為0。

(4)得到交叉變異後的個體,需要計算fitness進行種群淘汰,保留fitness最高的一部分種群。

(5)將最優的個體繼續上面的操作,直到你定義的iteration結束為止。

不說瞭,上代碼:

import numpy as np
import pandas as pd
import random
from scipy.optimize import fsolve
import matplotlib.pyplot as plt
import heapq
from sklearn.model_selection import train_test_split
from tkinter import _flatten
from sklearn.utils import shuffle
from sklearn import preprocessing
from sklearn.decomposition import PCA
from matplotlib import rcParams
 
 
 
# 求染色體長度
def getEncodeLength(decisionvariables, delta):
 # 將每個變量的編碼長度放入數組
 lengths = []
 for decisionvar in decisionvariables:
  uper = decisionvar[1]
  low = decisionvar[0]
  # res()返回一個數組
  res = fsolve(lambda x: ((uper - low) / delta - 2 ** x + 1), 30)
  # ceil()向上取整
  length = int(np.ceil(res[0]))
  lengths.append(length)
 # print("染色體長度:", lengths)
 return lengths
 
 
# 隨機生成初始化種群
def getinitialPopulation(length, populationSize):
 chromsomes = np.zeros((populationSize, length), dtype=np.int)
 for popusize in range(populationSize):
  # np.random.randit()產生[0,2)之間的隨機整數,第三個參數表示隨機數的數量
  chromsomes[popusize, :] = np.random.randint(0, 2, length)
 return chromsomes
 
 
# 染色體解碼得到表現形的解
def getDecode(population, encodelength, decisionvariables, delta):
 # 得到population中有幾個元素
 populationsize = population.shape[0]
 length = len(encodelength)
 decodeVariables = np.zeros((populationsize, length), dtype=np.float)
 # 將染色體拆分添加到解碼數組decodeVariables中
 for i, populationchild in enumerate(population):
  # 設置起始點
  start = 0 
  for j, lengthchild in enumerate(encodelength):
   power = lengthchild - 1
   decimal = 0
   start_end = start + lengthchild
   for k in range(start, start_end):
    # 二進制轉為十進制
    decimal += populationchild[k] * (2 ** power)
    power = power - 1
   # 從下一個染色體開始
   start = start_end
   lower = decisionvariables[j][0]
   uper = decisionvariables[j][1]
   # 轉換為表現形
   decodevalue = lower + decimal * (uper - lower) / (2 ** lengthchild - 1)
   # 將解添加到數組中
   decodeVariables[i][j] = decodevalue
   
 return decodeVariables
 
 
# 選擇新的種群
def selectNewPopulation(decodepopu, cum_probability):
 # 獲取種群的規模和
 m, n = decodepopu.shape
 # 初始化新種群
 newPopulation = np.zeros((m, n))
 for i in range(m):
  # 產生一個0到1之間的隨機數
  randomnum = np.random.random()
  # 輪盤賭選擇
  for j in range(m):
   if (randomnum < cum_probability[j]):
    newPopulation[i] = decodepopu[j]
    break
 return newPopulation
 
 
# 新種群交叉
def crossNewPopulation(newpopu, prob):
 m, n = newpopu.shape
 # uint8將數值轉換為無符號整型
 numbers = np.uint8(m * prob)
 # 如果選擇的交叉數量為奇數,則數量加1
 if numbers % 2 != 0:
  numbers = numbers + 1
 # 初始化新的交叉種群
 updatepopulation = np.zeros((m, n), dtype=np.uint8)
 # 隨機生成需要交叉的染色體的索引號
 index = random.sample(range(m), numbers)
 # 不需要交叉的染色體直接復制到新的種群中
 for i in range(m):
  if not index.__contains__(i):
   updatepopulation[i] = newpopu[i]
 # 交叉操作
 j = 0
 while j < numbers:
  # 隨機生成一個交叉點,np.random.randint()返回的是一個列表
  crosspoint = np.random.randint(0, n, 1)
  crossPoint = crosspoint[0]
  # a = index[j]
  # b = index[j+1]
  updatepopulation[index[j]][0:crossPoint] = newpopu[index[j]][0:crossPoint]
  updatepopulation[index[j]][crossPoint:] = newpopu[index[j + 1]][crossPoint:]
  updatepopulation[index[j + 1]][0:crossPoint] = newpopu[j + 1][0:crossPoint]
  updatepopulation[index[j + 1]][crossPoint:] = newpopu[index[j]][crossPoint:]
  j = j + 2
 return updatepopulation
 
 
# 變異操作
def mutation(crosspopulation, mutaprob):
 # 初始化變異種群
 mutationpopu = np.copy(crosspopulation)
 m, n = crosspopulation.shape
 # 計算需要變異的基因數量
 mutationnums = np.uint8(m * n * mutaprob)
 # 隨機生成變異基因的位置
 mutationindex = random.sample(range(m * n), mutationnums)
 # 變異操作
 for geneindex in mutationindex:
  # np.floor()向下取整返回的是float型
  row = np.uint8(np.floor(geneindex / n))
  colume = geneindex % n
  if mutationpopu[row][colume] == 0:
   mutationpopu[row][colume] = 1
  else:
   mutationpopu[row][colume] = 0
 return mutationpopu
 
 
# 找到重新生成的種群中適應度值最大的染色體生成新種群
def findMaxPopulation(population, maxevaluation, maxSize):
 #將數組轉換為列表
 #maxevalue = maxevaluation.flatten()
 maxevaluelist = maxevaluation
 # 找到前100個適應度最大的染色體的索引
 maxIndex = map(maxevaluelist.index, heapq.nlargest(maxSize, maxevaluelist))
 index = list(maxIndex)
 colume = population.shape[1]
 # 根據索引生成新的種群
 maxPopulation = np.zeros((maxSize, colume))
 i = 0
 for ind in index:
  maxPopulation[i] = population[ind]
  i = i + 1
 return maxPopulation
 
 
 
# 得到每個個體的適應度值及累計概率
def getFitnessValue(decode,x_train,y_train):
 # 得到種群的規模和決策變量的個數
 popusize, decisionvar = decode.shape
 
 fitnessValue = []
 for j in range(len(decode)):
  W1 = decode[j][0:20].reshape(4,5)
  V1 = decode[j][20:25].T
  W2 = decode[j][25:45].reshape(5,4)
  V2 = decode[j][45:].T
  error_all = []
  for i in range(len(x_train)):
   #get values of hidde layer
   X2 = sigmoid(x_train[i].T.dot(W1)+V1)
   #get values of prediction y
   Y_hat = sigmoid(X2.T.dot(W2)+V2)
   #get error when input dimension is i
   error = sum(abs(Y_hat - y_train[i]))
   error_all.append(error)
 
  #get fitness when W and V is j
  fitnessValue.append(1/(1+sum(error_all)))
 
 # 得到每個個體被選擇的概率
 probability = fitnessValue / np.sum(fitnessValue)
 # 得到每個染色體被選中的累積概率,用於輪盤賭算子使用
 cum_probability = np.cumsum(probability)
 return fitnessValue, cum_probability
 
 
 
def getFitnessValue_accuracy(decode,x_train,y_train):
 # 得到種群的規模和決策變量的個數
 popusize, decisionvar = decode.shape
 
 fitnessValue = []
 for j in range(len(decode)):
  W1 = decode[j][0:20].reshape(4,5)
  V1 = decode[j][20:25].T
  W2 = decode[j][25:45].reshape(5,4)
  V2 = decode[j][45:].T
  accuracy = []
  for i in range(len(x_train)):
   #get values of hidde layer
   X2 = sigmoid(x_train[i].T.dot(W1)+V1)
   #get values of prediction y
   Y_hat = sigmoid(X2.T.dot(W2)+V2)
   #get error when input dimension is i
   accuracy.append(sum(abs(np.round(Y_hat) - y_train[i])))
  fitnessValue.append(sum([m == 0 for m in accuracy])/len(accuracy))
 # 得到每個個體被選擇的概率
 probability = fitnessValue / np.sum(fitnessValue)
 # 得到每個染色體被選中的累積概率,用於輪盤賭算子使用
 cum_probability = np.cumsum(probability)
 return fitnessValue, cum_probability
 
 
def getXY():
 # 要打開的文件名
 data_set = pd.read_csv('all-bp.csv', header=None)
 # 取出“特征”和“標簽”,並做瞭轉置,將列轉置為行
 X_minMax1 = data_set.iloc[:, 0:12].values
 # 前12列是特征
 min_max_scaler = preprocessing.MinMaxScaler()
 X_minMax = min_max_scaler.fit_transform(X_minMax1) # 0-1 range
 transfer = PCA(n_components=0.9)
 data1 = transfer.fit_transform(X_minMax)
 #print('PCA processed shape:',data1.shape)
 X = data1
 Y = data_set.iloc[ : , 12:16].values # 後3列是標簽
 
 # 分訓練和測試集
 x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.3)
 return x_train, x_test, y_train, y_test
 
 
def sigmoid(z):
 return 1 / (1 + np.exp(-z))

上面的計算適應度函數需要自己更具實際情況調整。

optimalvalue = []
optimalvariables = []
 
# 兩個決策變量的上下界,多維數組之間必須加逗號
decisionVariables = [[-100,100]]*49
# 精度
delta = 0.001
# 獲取染色體長度
EncodeLength = getEncodeLength(decisionVariables, delta)
# 種群數量
initialPopuSize = 100
# 初始生成100個種群,20,5,20,4分別對用W1,V1,W2,V2
population = getinitialPopulation(sum(EncodeLength), initialPopuSize)
print("polpupation.shape:",population.shape)
# 最大進化代數
maxgeneration = 4000
# 交叉概率
prob = 0.8
# 變異概率
mutationprob = 0.5
# 新生成的種群數量
maxPopuSize = 30
x_train, x_test, y_train, y_test = getXY()
 
 
for generation in range(maxgeneration):
 # 對種群解碼得到表現形
 print(generation)
 decode = getDecode(population, EncodeLength, decisionVariables, delta)
 #print('the shape of decode:',decode.shape
 
 # 得到適應度值和累計概率值
 evaluation, cum_proba = getFitnessValue_accuracy(decode,x_train,y_train)
 # 選擇新的種群
 newpopulations = selectNewPopulation(population, cum_proba)
 # 新種群交叉
 crossPopulations = crossNewPopulation(newpopulations, prob)
 # 變異操作
 mutationpopulation = mutation(crossPopulations, mutationprob)
 
 # 將父母和子女合並為新的種群
 totalpopulation = np.vstack((population, mutationpopulation))
 # 最終解碼
 final_decode = getDecode(totalpopulation, EncodeLength, decisionVariables, delta)
 # 適應度評估
 final_evaluation, final_cumprob = getFitnessValue_accuracy(final_decode,x_train,y_train)
 #選出適應度最大的100個重新生成種群
 population = findMaxPopulation(totalpopulation, final_evaluation, maxPopuSize)
 
 # 找到本輪中適應度最大的值
 optimalvalue.append(np.max(final_evaluation))
 index = np.where(final_evaluation == max(final_evaluation))
 optimalvariables.append(list(final_decode[index[0][0]]))
fig = plt.figure(dpi = 160,figsize=(5,4)) 
config = {
"font.family":"serif", #serif
"font.size": 10,
"mathtext.fontset":'stix',
}
rcParams.update(config)
plt.plot(np.arange(len(optimalvalue)), optimalvalue, color="y", lw=0.8, ls='-', marker='o', ms=8)
# 圖例設置
plt.xlabel('Iteration')
plt.ylabel('Accuracy')
plt.show()

以上就是python實現高效的遺傳算法的詳細內容,更多關於python遺傳算法的資料請關註WalkonNet其它相關文章!