python实现logistic_使用python实现logistic二分类
這段時間做了一個二分類的任務(wù),訓練數(shù)據(jù)是8000個包含1000個特征和一個label的數(shù)據(jù)。下面記錄一下使用python實現(xiàn)logistic二分類的代碼。
import os
import time
import numpy as np
# 獲取路徑
def get_directory(file_name):
path = os.getcwd()
directory = os.path.join(path,file_name)
return directory
# 讀取速度相對較慢
def get_train_data(directory):
data = np.loadtxt(directory,delimiter=',')
print(data.shape)
# 讀取速度相對更快
def loadDataSet(file_name, label_existed_flag):
feats = []
labels = []
fr = open(file_name)
lines = fr.readlines()
for line in lines:
temp = []
allInfo = line.strip().split(',')
dims = len(allInfo)
if label_existed_flag == 1:
for index in range(dims - 1):
temp.append(float(allInfo[index]))
feats.append(temp)
labels.append(float(allInfo[dims - 1]))
else:
for index in range(dims):
temp.append(float(allInfo[index]))
feats.append(temp)
fr.close()
feats = np.array(feats)
labels = np.array(labels)
return feats, labels
# 讀取tset的label
def loadLabels(file_name):
labels = []
fr = open(file_name)
lines = fr.readlines()
for line in lines:
allInfo = line.strip().split(',')
labels.append(float(allInfo[0]))
fr.close()
labels = np.array(labels)
return labels
class logistic():
def __init__(self,train_data,train_label,test_data,test_label,train_num,learning_rate):
self.train_data =train_data
self.train_label = train_label
self.test_data = test_data
self.test_label = test_label
self.train_num = train_num
self.learning_rate = learning_rate
self.weight = np.ones(len(train_data[0])+1, dtype=np.float)
def add_bias(self,data):
temp = np.ones(len(data))
new_data_transpose = np.row_stack((np.transpose(data),temp))
new_data = np.transpose(new_data_transpose)
return new_data
def sigmoid(self,x):
return 1 / (1 + np.exp(-x))
def compute(self,data):
#print(self.weight)
z = np.dot(data, np.transpose(self.weight))
# print(z)
predict = self.sigmoid(z)
return predict
def error(self,predict,label):
return np.power(predict - label, 2).sum()
def update(self,data,diff):
self.weight += self.learning_rate * np.dot(diff,data)/len(data)
def train(self):
data = self.add_bias(self.train_data)
for i in range(self.train_num):
predict = self.compute(data)
#print(predict)
error = self.error(predict,self.train_label)
diff = self.train_label - predict
self.update(data,diff)
print(error)
def calculate_predict(self,my_data):
data = self.add_bias(my_data)
predict = self.compute(data)
my_predict = np.zeros(len(predict))
for i in range(len(predict)):
if predict[i] > 0.5:
my_predict[i] = 1
else:
my_predict[i] = 0
return my_predict
def accuracy(self,predict):
label = self.train_label
num = 0
for i in range(len(predict)):
if predict[i] == label[i]:
num += 1
accuracy_num = num / len(predict)
return accuracy_num
def test(self):
predict = self.calculate_predict(self.test_data)
label = self.test_label
num = 0
for i in range(len(predict)):
if predict[i] == label[i]:
num += 1
accuracy_num = num / len(predict)
return accuracy_num
if __name__ == "__main__":
time1 = time.time()
# 輸入文件名
train_file_name = "train_data.txt"
test_data_name = "test_data.txt"
test_label_name = "answer.txt"
# 獲取絕對路徑
train_directory = get_directory(train_file_name)
test_data_directory = get_directory(test_data_name)
test_label_directory = get_directory(test_label_name)
# 加載數(shù)據(jù)
train_feats, train_labels = loadDataSet(train_directory,1)
test_feats, test_labels = loadDataSet(test_data_directory,0)
real_test_label = loadLabels(test_label_directory)
# 學習率
train_num = 10000
learning_rate = 0.05
# logistic 分類
my_logistic = logistic(train_feats,train_labels,test_feats,real_test_label,train_num,learning_rate)
my_logistic.train()
my_predict = my_logistic.calculate_predict(train_feats)
my_accuracy = my_logistic.accuracy(my_predict)
print("train accuracy")
print(my_accuracy)
test_accuracy = my_logistic.test()
print("test accuracy")
print(test_accuracy)
time2 = time.time()
elapse = time2 - time1
print(elapse)
原文鏈接:https://blog.csdn.net/bofu_sun/article/details/105370473
總結(jié)
以上是生活随笔為你收集整理的python实现logistic_使用python实现logistic二分类的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: Windows上将linux目录映射网络
- 下一篇: 如何在Flash cs3利用动作补间制作