Python实现支持向量机(基于双月数据集)
生活随笔
收集整理的這篇文章主要介紹了
Python实现支持向量机(基于双月数据集)
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
1、生成數據集
class moon_data_class(object):def __init__(self,N,d,r,w):self.N=Nself.w=wself.d=dself.r=rdef sgn(self,x):if(x>0):return 1;else:return -1;def sig(self,x):return 1.0/(1+np.exp(x))def dbmoon(self):N1 = 10*self.NN = self.Nr = self.rw2 = self.w/2d = self.ddone = Truedata = np.empty(0)while done:#generate Rectangular datatmp_x = 2*(r+w2)*(np.random.random([N1, 1])-0.5)tmp_y = (r+w2)*np.random.random([N1, 1])tmp = np.concatenate((tmp_x, tmp_y), axis=1)tmp_ds = np.sqrt(tmp_x*tmp_x + tmp_y*tmp_y)#generate double moon data ---upperidx = np.logical_and(tmp_ds > (r-w2), tmp_ds < (r+w2))idx = (idx.nonzero())[0]if data.shape[0] == 0:data = tmp.take(idx, axis=0)else:data = np.concatenate((data, tmp.take(idx, axis=0)), axis=0)if data.shape[0] >= N:done = False#print (data)db_moon = data[0:N, :]#print (db_moon)#generate double moon data ----downdata_t = np.empty([N, 2])data_t[:, 0] = data[0:N, 0] + rdata_t[:, 1] = -data[0:N, 1] - ddb_moon = np.concatenate((db_moon, data_t), axis=0)return db_moon2、SVM算法
class SVM:def __init__(self, dataSet, labels, C, toler, kernel_option):self.train_x = dataSet # 訓練特征self.train_y = labels # 訓練標簽self.C = C # 懲罰參數self.toler = toler # 迭代的終止條件之一self.n_samples = np.shape(dataSet)[0] # 訓練樣本的個數self.alphas = np.mat(np.zeros((self.n_samples, 1))) # 拉格朗日乘子self.b = 0self.error_tmp = np.mat(np.zeros((self.n_samples, 2))) # 保存E的緩存self.kernel_opt = kernel_option # 選用的核函數及其參數self.kernel_mat = calc_kernel(self.train_x, self.kernel_opt) # 核函數的輸出def cal_kernel_value(train_x, train_x_i, kernel_option):'''樣本之間的核函數的值input: train_x(mat):訓練樣本train_x_i(mat):第i個訓練樣本kernel_option(tuple):核函數的類型以及參數output: kernel_value(mat):樣本之間的核函數的值'''kernel_type = kernel_option[0] # 核函數的類型,分為rbf和其他m = np.shape(train_x)[0] # 樣本的個數kernel_value = np.mat(np.zeros((m, 1)))if kernel_type == 'rbf': # rbf核函數sigma = kernel_option[1]if sigma == 0:sigma = 1.0for i in range(m):diff = train_x[i, :] - train_x_ikernel_value[i] = np.exp(diff * diff.T / (sigma))else: # 不使用核函數kernel_value = train_x * train_x_i.Treturn kernel_valuedef calc_kernel(train_x, kernel_option):'''計算核函數矩陣input: train_x(mat):訓練樣本的特征值kernel_option(tuple):核函數的類型以及參數output: kernel_matrix(mat):樣本的核函數的值'''m = np.shape(train_x)[0] # 樣本的個數kernel_matrix = np.mat(np.zeros((m, m))) # 初始化樣本之間的核函數值for i in range(m):kernel_matrix[:, i] = cal_kernel_value(train_x, train_x[i, :], kernel_option)return kernel_matrixdef cal_error(svm, alpha_k):'''誤差值的計算input: svm:SVM模型alpha_k(int):選擇出的變量output: error_k(float):誤差值'''output_k = float(np.multiply(svm.alphas, svm.train_y).T * svm.kernel_mat[:, alpha_k] + svm.b)error_k = output_k - float(svm.train_y[alpha_k])return error_kdef update_error_tmp(svm, alpha_k):'''重新計算誤差值input: svm:SVM模型alpha_k(int):選擇出的變量output: 對應誤差值'''error = cal_error(svm, alpha_k)svm.error_tmp[alpha_k] = [1, error]def select_second_sample_j(svm, alpha_i, error_i):'''選擇第二個樣本input: svm:SVM模型alpha_i(int):選擇出的第一個變量error_i(float):E_ioutput: alpha_j(int):選擇出的第二個變量error_j(float):E_j'''# 標記為已被優化svm.error_tmp[alpha_i] = [1, error_i]candidateAlphaList = np.nonzero(svm.error_tmp[:, 0].A)[0]maxStep = 0alpha_j = 0error_j = 0if len(candidateAlphaList) > 1:for alpha_k in candidateAlphaList:if alpha_k == alpha_i: continueerror_k = cal_error(svm, alpha_k)if abs(error_k - error_i) > maxStep:maxStep = abs(error_k - error_i)alpha_j = alpha_kerror_j = error_kelse: # 隨機選擇 alpha_j = alpha_iwhile alpha_j == alpha_i:alpha_j = int(np.random.uniform(0, svm.n_samples))error_j = cal_error(svm, alpha_j)return alpha_j, error_jdef choose_and_update(svm, alpha_i):'''判斷和選擇兩個alpha進行更新input: svm:SVM模型alpha_i(int):選擇出的第一個變量'''error_i = cal_error(svm, alpha_i) # 計算第一個樣本的E_i# 判斷選擇出的第一個變量是否違反了KKT條件if (svm.train_y[alpha_i] * error_i < -svm.toler) and (svm.alphas[alpha_i] < svm.C) or\(svm.train_y[alpha_i] * error_i > svm.toler) and (svm.alphas[alpha_i] > 0):# 1、選擇第二個變量alpha_j, error_j = select_second_sample_j(svm, alpha_i, error_i)alpha_i_old = svm.alphas[alpha_i].copy()alpha_j_old = svm.alphas[alpha_j].copy()# 2、計算上下界if svm.train_y[alpha_i] != svm.train_y[alpha_j]:L = max(0, svm.alphas[alpha_j] - svm.alphas[alpha_i])H = min(svm.C, svm.C + svm.alphas[alpha_j] - svm.alphas[alpha_i])else:L = max(0, svm.alphas[alpha_j] + svm.alphas[alpha_i] - svm.C)H = min(svm.C, svm.alphas[alpha_j] + svm.alphas[alpha_i])if L == H:return 0# 3、計算etaeta = 2.0 * svm.kernel_mat[alpha_i, alpha_j] - svm.kernel_mat[alpha_i, alpha_i] \- svm.kernel_mat[alpha_j, alpha_j]if eta >= 0:return 0# 4、更新alpha_jsvm.alphas[alpha_j] -= svm.train_y[alpha_j] * (error_i - error_j) / eta# 5、確定最終的alpha_jif svm.alphas[alpha_j] > H:svm.alphas[alpha_j] = Hif svm.alphas[alpha_j] < L:svm.alphas[alpha_j] = L# 6、判斷是否結束 if abs(alpha_j_old - svm.alphas[alpha_j]) < 0.00001:update_error_tmp(svm, alpha_j)return 0# 7、更新alpha_isvm.alphas[alpha_i] += svm.train_y[alpha_i] * svm.train_y[alpha_j] \* (alpha_j_old - svm.alphas[alpha_j])# 8、更新bb1 = svm.b - error_i - svm.train_y[alpha_i] * (svm.alphas[alpha_i] - alpha_i_old) \* svm.kernel_mat[alpha_i, alpha_i] \- svm.train_y[alpha_j] * (svm.alphas[alpha_j] - alpha_j_old) \* svm.kernel_mat[alpha_i, alpha_j]b2 = svm.b - error_j - svm.train_y[alpha_i] * (svm.alphas[alpha_i] - alpha_i_old) \* svm.kernel_mat[alpha_i, alpha_j] \- svm.train_y[alpha_j] * (svm.alphas[alpha_j] - alpha_j_old) \* svm.kernel_mat[alpha_j, alpha_j]if (0 < svm.alphas[alpha_i]) and (svm.alphas[alpha_i] < svm.C):svm.b = b1elif (0 < svm.alphas[alpha_j]) and (svm.alphas[alpha_j] < svm.C):svm.b = b2else:svm.b = (b1 + b2) / 2.0# 9、更新errorupdate_error_tmp(svm, alpha_j)update_error_tmp(svm, alpha_i)return 1else:return 0def SVM_training(train_x, train_y, C, toler, max_iter, kernel_option = ('rbf', 0.431029)):'''SVM的訓練input: train_x(mat):訓練數據的特征train_y(mat):訓練數據的標簽C(float):懲罰系數toler(float):迭代的終止條件之一max_iter(int):最大迭代次數kerner_option(tuple):核函數的類型及其參數output: svm模型'''# 1、初始化SVM分類器svm = SVM(train_x, train_y, C, toler, kernel_option)# 2、開始訓練entireSet = Truealpha_pairs_changed = 0iteration = 0while (iteration < max_iter) and ((alpha_pairs_changed > 0) or entireSet):print("\t iterration: ", iteration)alpha_pairs_changed = 0if entireSet:# 對所有的樣本for x in range(svm.n_samples):alpha_pairs_changed += choose_and_update(svm, x)iteration += 1else:# 非邊界樣本bound_samples = []for i in range(svm.n_samples):if svm.alphas[i,0] > 0 and svm.alphas[i,0] < svm.C:bound_samples.append(i)for x in bound_samples:alpha_pairs_changed += choose_and_update(svm, x)iteration += 1# 在所有樣本和非邊界樣本之間交替if entireSet:entireSet = Falseelif alpha_pairs_changed == 0:entireSet = Truereturn svmdef svm_predict(svm, test_sample_x):'''利用SVM模型對每一個樣本進行預測input: svm:SVM模型test_sample_x(mat):樣本output: predict(float):對樣本的預測'''# 1、計算核函數矩陣kernel_value = cal_kernel_value(svm.train_x, test_sample_x, svm.kernel_opt)# 2、計算預測值predict = kernel_value.T * np.multiply(svm.train_y, svm.alphas) #+ svm.breturn predictdef cal_accuracy(svm, test_x, test_y):'''計算預測的準確性input: svm:SVM模型test_x(mat):測試的特征test_y(mat):測試的標簽output: accuracy(float):預測的準確性'''n_samples = np.shape(test_x)[0] # 樣本的個數correct = 0.0for i in range(n_samples):# 對每一個樣本得到預測值predict=svm_predict(svm, test_x[i, :])# 判斷每一個樣本的預測值與真實值是否一致if np.sign(predict) == np.sign(test_y[i]):correct += 1accuracy = correct / n_samplesreturn accuracy3、運行測試
if __name__ == "__main__":# 1、導入訓練數據#dataSet, labels = load_data_libsvm("heart_scale")step=0color=['.r','.g','.b','.y']#顏色種類dcolor=['*r','*g','*b','*y']#顏色種類frames = []N = 400d = -5r = 10width = 6data_source = moon_data_class(N, d, r, width)data = data_source.dbmoon()# x0 = [1 for x in range(1,401)]input_cells = np.array([np.reshape(data[0:2*N, 0], len(data)), np.reshape(data[0:2*N, 1], len(data))]).transpose()labels_pre = [[-1.] for y in range(1, 401)]labels_pos = [[1. ] for y in range(1, 401)]label=labels_pre+labels_posdataSet = np.mat(input_cells)labels = np.mat(label)# 2、訓練SVM模型C = 0.001toler = 0.1maxIter = 1000kernel_option = ('rbf', -10)svm_model = SVM_training(dataSet, labels, C, toler, maxIter,kernel_option)# 3、計算訓練的準確性accuracy = cal_accuracy(svm_model, dataSet, labels) print("The training accuracy is: %.3f%%" % (accuracy * 100))# 4、保存最終的SVM模型print("------------ 4、save model ----------------")#svm.save_svm_model(svm_model, "model_file")test_x = []test_y = []test_p = []predict = 0y_p_old = 0 for x in np.arange(-15.,25.,1):for y in np.arange(-15.,25.,1):predict = svm_predict(svm_model, np.array([x, y]))#print(predict)y_p = np.sign(predict)[0, 0]#y_p =get_prediction(np.array([x, y]),svm_model)#y_p =float(y_p)if(y_p_old > 0 and y_p < 0):test_x.append(x)test_y.append(y)test_p.append([y_p_old,y_p])y_p_old = y_p#畫決策邊界plt.plot( test_x, test_y, 'g--') plt.plot(data[0:N, 0], data[0:N, 1], 'r*', data[N:2*N, 0], data[N:2*N, 1], 'b*')plt.show()4、運行結果
5、利用tensorflow實現
6、運行結果
總結
以上是生活随笔為你收集整理的Python实现支持向量机(基于双月数据集)的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: flutter AppBar背景渐变色
- 下一篇: vimdiff 命令使用介绍