當前位置：首頁 > 编程语言 > python >内容正文

python

python实现logistic_使用python实现logistic二分类

發(fā)布時間：2023/12/15 python 28 豆豆

生活随笔收集整理的這篇文章主要介紹了 python实现logistic_使用python实现logistic二分类小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.

這段時間做了一個二分類的任務(wù)，訓練數(shù)據(jù)是8000個包含1000個特征和一個label的數(shù)據(jù)。下面記錄一下使用python實現(xiàn)logistic二分類的代碼。

import os

import time

import numpy as np

# 獲取路徑

def get_directory(file_name):

path = os.getcwd()

directory = os.path.join(path,file_name)

return directory

# 讀取速度相對較慢

def get_train_data(directory):

data = np.loadtxt(directory,delimiter=',')

print(data.shape)

# 讀取速度相對更快

def loadDataSet(file_name, label_existed_flag):

feats = []

labels = []

fr = open(file_name)

lines = fr.readlines()

for line in lines:

temp = []

allInfo = line.strip().split(',')

dims = len(allInfo)

if label_existed_flag == 1:

for index in range(dims - 1):

temp.append(float(allInfo[index]))

feats.append(temp)

labels.append(float(allInfo[dims - 1]))

else:

for index in range(dims):

temp.append(float(allInfo[index]))

feats.append(temp)

fr.close()

feats = np.array(feats)

labels = np.array(labels)

return feats, labels

# 讀取tset的label

def loadLabels(file_name):

labels = []

fr = open(file_name)

lines = fr.readlines()

for line in lines:

allInfo = line.strip().split(',')

labels.append(float(allInfo[0]))

fr.close()

labels = np.array(labels)

return labels

class logistic():

def __init__(self,train_data,train_label,test_data,test_label,train_num,learning_rate):

self.train_data =train_data

self.train_label = train_label

self.test_data = test_data

self.test_label = test_label

self.train_num = train_num

self.learning_rate = learning_rate

self.weight = np.ones(len(train_data[0])+1, dtype=np.float)

def add_bias(self,data):

temp = np.ones(len(data))

new_data_transpose = np.row_stack((np.transpose(data),temp))

new_data = np.transpose(new_data_transpose)

return new_data

def sigmoid(self,x):

return 1 / (1 + np.exp(-x))

def compute(self,data):

#print(self.weight)

z = np.dot(data, np.transpose(self.weight))

# print(z)

predict = self.sigmoid(z)

return predict

def error(self,predict,label):

return np.power(predict - label, 2).sum()

def update(self,data,diff):

self.weight += self.learning_rate * np.dot(diff,data)/len(data)

def train(self):

data = self.add_bias(self.train_data)

for i in range(self.train_num):

predict = self.compute(data)

#print(predict)

error = self.error(predict,self.train_label)

diff = self.train_label - predict

self.update(data,diff)

print(error)

def calculate_predict(self,my_data):

data = self.add_bias(my_data)

predict = self.compute(data)

my_predict = np.zeros(len(predict))

for i in range(len(predict)):

if predict[i] > 0.5:

my_predict[i] = 1

else:

my_predict[i] = 0

return my_predict

def accuracy(self,predict):

label = self.train_label

num = 0

for i in range(len(predict)):

if predict[i] == label[i]:

num += 1

accuracy_num = num / len(predict)

return accuracy_num

def test(self):

predict = self.calculate_predict(self.test_data)

label = self.test_label

num = 0

for i in range(len(predict)):

if predict[i] == label[i]:

num += 1

accuracy_num = num / len(predict)

return accuracy_num

if __name__ == "__main__":

time1 = time.time()

# 輸入文件名

train_file_name = "train_data.txt"

test_data_name = "test_data.txt"

test_label_name = "answer.txt"

# 獲取絕對路徑

train_directory = get_directory(train_file_name)

test_data_directory = get_directory(test_data_name)

test_label_directory = get_directory(test_label_name)

# 加載數(shù)據(jù)

train_feats, train_labels = loadDataSet(train_directory,1)

test_feats, test_labels = loadDataSet(test_data_directory,0)

real_test_label = loadLabels(test_label_directory)

# 學習率

train_num = 10000

learning_rate = 0.05

# logistic 分類

my_logistic = logistic(train_feats,train_labels,test_feats,real_test_label,train_num,learning_rate)

my_logistic.train()

my_predict = my_logistic.calculate_predict(train_feats)

my_accuracy = my_logistic.accuracy(my_predict)

print("train accuracy")

print(my_accuracy)

test_accuracy = my_logistic.test()

print("test accuracy")

print(test_accuracy)

time2 = time.time()

elapse = time2 - time1

print(elapse)

原文鏈接:https://blog.csdn.net/bofu_sun/article/details/105370473

總結(jié)

以上是生活随笔為你收集整理的python实现logistic_使用python实现logistic二分类的全部內(nèi)容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網(wǎng)站內(nèi)容還不錯，歡迎將生活随笔推薦給好友。

上一篇： Windows上将linux目录映射网络
下一篇：如何在Flash cs3利用动作补间制作