Scikit-Learn 机器学习笔记 -- 模型训练
生活随笔
收集整理的這篇文章主要介紹了
Scikit-Learn 机器学习笔记 -- 模型训练
小編覺得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.
Scikit-Learn 機(jī)器學(xué)習(xí)筆記 – 模型訓(xùn)練
參考文檔: handson-ml
import numpy as np from matplotlib import pyplot as plt# 創(chuàng)建線性回歸數(shù)據(jù)集 def create_dataset():X = 2 * np.random.rand(100, 1)# 結(jié)果加上高斯噪聲y = 4 + 3*X + np.random.randn(100, 1)return X, y# 線性回歸解析法:使用正態(tài)方程求解,直接得到全局最優(yōu)解 def linear_regression_analysis(X, y):# 特征向量為參數(shù)b添加值為1的特征X_b = np.c_[np.ones((100, 1)), X]# 用正態(tài)方程解得全局最優(yōu)解theta_best = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)print("線性回歸解析解為:", theta_best)# 預(yù)測sample = np.array([[0], [2]])sample_b = np.c_[np.ones((2, 1)), sample]predict = sample_b.dot(theta_best)# print('解析解方程預(yù)測為:', predict)# 繪制線性回歸模型圖像plt.plot(sample, predict, 'r-')plt.plot(X, y, 'b.')plt.axis([0, 2, 0, 15])plt.show()return X_b# 使用sk-learn的線性回歸模型,默認(rèn)使用解析法 def linear_regression_sk(X, y):from sklearn.linear_model import LinearRegression# 創(chuàng)建線性回歸模型實(shí)例lin_reg = LinearRegression()lin_reg.fit(X, y)print('sk-learn線性回歸解析解:', 'b:', lin_reg.intercept_, 'w:', lin_reg.coef_)# 線性回歸批量梯度下降法(batch gradient descent) def linear_regression_batch_gd(X_b, y):# 學(xué)習(xí)率不變、迭代次數(shù)和樣本數(shù)learning_rate = 0.1max_iterations = 1000m = 100# 隨機(jī)初始值theta = np.random.randn(2, 1)# 開始迭代for n in range(max_iterations):gradients = 2/m * X_b.T.dot(X_b.dot(theta)-y)theta = theta - learning_rate*gradientsprint('線性回歸批量梯度下降法解:', theta)# 線性回歸隨機(jī)梯度下降法(stochastic gradient descent) def linear_regression_stochastic_gd(X_b, y):# epoch次數(shù),樣本數(shù)n_epochs = 50m = 100theta = np.random.randn(2, 1)for epoch in range(n_epochs):for i in range(m):random_index = np.random.randint(m)xi = X_b[random_index:random_index+1]yi = y[random_index:random_index+1]gradients = 2 * xi.T.dot(xi.dot(theta) - yi)learning_rate = 1.0/(epoch*m + i + 10)theta = theta - learning_rate*gradientsprint('線性回歸隨機(jī)梯度下降法解:', theta)# sk-learn 線性回歸隨機(jī)梯度下降 def linear_regression_stochastic_gd_sk(X, y):from sklearn.linear_model import SGDRegressorsgd_reg = SGDRegressor(n_iter=50, penalty=None, eta0=0.1)sgd_reg.fit(X, y.ravel())print('sk-learn線性回歸隨機(jī)梯度下降法解:', 'b:', sgd_reg.intercept_, 'w:', sgd_reg.coef_)# 創(chuàng)建線性回歸數(shù)據(jù)集 def create_dataset_poly():m = 100X1 = 6 * np.random.rand(m, 1) - 3y1 = 0.5 * X1 ** 2 + X1 + 2 + np.random.randn(m, 1)return X1, y1# 多項(xiàng)式回歸 def polynomial_regression(X, y):# 添加二次特征from sklearn.preprocessing import PolynomialFeaturesfrom sklearn.linear_model import LinearRegressionpoly_features = PolynomialFeatures(degree=2, include_bias=False)X_poly = poly_features.fit_transform(X)lin_reg_poly = LinearRegression()lin_reg_poly.fit(X_poly, y)print('多項(xiàng)式回歸解:', 'b:', lin_reg_poly.intercept_, 'w:', lin_reg_poly.coef_)return lin_reg_poly# 繪制關(guān)于訓(xùn)練集規(guī)模的學(xué)習(xí)曲線 def plot_learning_curves(model, X, y):from sklearn.metrics import mean_squared_errorfrom sklearn.model_selection import train_test_splitX_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)train_errors, val_errors = [], []for m in range(1, len(X_train)):model.fit(X_train[:m], y_train[:m])y_train_predict = model.predict(X_train[:m])y_val_predict = model.predict(X_val)train_errors.append(mean_squared_error(y_train_predict, y_train[:m]))val_errors.append(mean_squared_error(y_val_predict, y_val))plt.plot(np.sqrt(train_errors), "r-+", linewidth=2, label="train")plt.plot(np.sqrt(val_errors), "b-", linewidth=3, label="val")plt.show()# 嶺回歸,l2正則化,封閉方程求解 def ridge_regression_analysis(X, y):from sklearn.linear_model import Ridgeridge_reg = Ridge(alpha=1, solver="cholesky")ridge_reg.fit(X, y)print('嶺回歸解:', 'b:', ridge_reg.intercept_, 'w:', ridge_reg.coef_)# Lasso 回歸,l2正則化,封閉方程求解 def lasso_regression_analysis(X, y):from sklearn.linear_model import Lassolasso_reg = Lasso(alpha=0.1)lasso_reg.fit(X, y)print('Lasso 回歸解:', 'b:', lasso_reg.intercept_, 'w:', lasso_reg.coef_)# l2,l1正則化,梯度下降求解 def regularization_regression_gd(X, y):from sklearn.linear_model import SGDRegressor# l1正則化把 penalty="l2" 改為 penalty="l1"sgd_reg = SGDRegressor(penalty="l2")sgd_reg.fit(X, y.ravel())print('l2梯度下降法解:', 'b:', sgd_reg.intercept_, 'w:', sgd_reg.coef_)# 彈性網(wǎng)路正則化,即l1、l2混合正則化 def elasticnet_regression_gd(X, y):from sklearn.linear_model import ElasticNet# l1_ratio 指的就是混合率, 即l1正則化占的比例elastic_net = ElasticNet(alpha=0.1, l1_ratio=0.5)elastic_net.fit(X, y)print('彈性網(wǎng)絡(luò)解:', 'b:', elastic_net.intercept_, 'w:', elastic_net.coef_)# 早期停止法(Early Stopping) def early_stoping(X, y):from sklearn.base import clonefrom sklearn.linear_model import SGDRegressorfrom sklearn.metrics import mean_squared_errorfrom sklearn.model_selection import train_test_split# 當(dāng)warm_start=True時(shí),調(diào)用fit()方法后,訓(xùn)練會(huì)從停下來的地方繼續(xù),而不是從頭重新開始。sgd_reg = SGDRegressor(max_iter=1, warm_start=True, penalty=None, learning_rate="constant", eta0=0.0005)X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)minimum_val_error = float("inf")best_epoch = Nonebest_model = Nonefor epoch in range(1000):sgd_reg.fit(X_train, y_train.ravel())y_val_predict = sgd_reg.predict(X_val)val_error = mean_squared_error(y_val_predict, y_val)if val_error < minimum_val_error:minimum_val_error = val_errorbest_epoch = epochbest_model = clone(sgd_reg)print('stopping in:', best_epoch)# 加載鳶尾花數(shù)據(jù)集 def load_dataset_flower():from sklearn import datasetsiris = datasets.load_iris()# X_f = iris['data']# y_f = iris['target']# print('加載鳶尾花數(shù)據(jù)集成功:', iris)return iris# logistic 回歸 def logistic_classify(iris):from sklearn.linear_model import LogisticRegressionX = iris["data"][:, 3:] # petal widthy = (iris["target"] == 2).astype(np.int)log_reg = LogisticRegression()log_reg.fit(X, y)# 繪圖X_new = np.linspace(0, 3, 1000).reshape(-1, 1)y_proba = log_reg.predict_proba(X_new)plt.plot(X_new, y_proba[:, 1], "g-", label="Iris-Virginica")plt.plot(X_new, y_proba[:, 0], "b--", label="Not Iris-Virginica")plt.show()# softmax 回歸多分類 def softmax_classify(iris):from sklearn.linear_model import LogisticRegression# 劃分?jǐn)?shù)據(jù)集X = iris["data"][:, (2, 3)] # petal length, petal widthy = iris["target"]# 創(chuàng)建 softmax 回歸實(shí)例softmax_reg = LogisticRegression(multi_class="multinomial", solver="lbfgs", C=10)softmax_reg.fit(X, y)# 預(yù)測predict = softmax_reg.predict([[5, 2]])predict_pro = softmax_reg.predict_proba([[5, 2]])print('softmax回歸預(yù)測為:', predict, '各類概率為', predict_pro)if __name__ == '__main__':# 獲得線性回歸數(shù)據(jù)集X, y = create_dataset()# 線性回歸解析法# X_b = linear_regression_analysis(X, y)# sk-learn線性回歸解# linear_regression_sk(X, y)# 線性回歸批量梯度下降法# linear_regression_batch_gd(X_b, y)# 線性回歸隨機(jī)梯度下降法# linear_regression_stochastic_gd(X_b, y)# sk-learn線性回歸隨機(jī)梯度下降法# linear_regression_stochastic_gd_sk(X, y)# 獲得多項(xiàng)式回歸數(shù)據(jù)集# X1, y1 = create_dataset_poly()# 多項(xiàng)式回歸解# lin_reg_poly = polynomial_regression(X1, y1)# 獲得關(guān)于訓(xùn)練集規(guī)模的學(xué)習(xí)曲線# plot_learning_curves(lin_reg_poly, X1, y1)# 嶺回歸,l2正則化# ridge_regression_analysis(X, y)# lasso回歸,l1正則化# lasso_regression_analysis(X, y)# 梯度下降法的正則化# regularization_regression_gd(X, y)# 彈性網(wǎng)絡(luò)# elasticnet_regression_gd(X, y)# 早期停止# early_stoping(X1, y1)# 加載花的數(shù)據(jù)集iris = load_dataset_flower()# logistic 回歸二分類logistic_classify(iris)# softmax 多分類softmax_classify(iris)
總結(jié)
以上是生活随笔為你收集整理的Scikit-Learn 机器学习笔记 -- 模型训练的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: TensorFlow 笔记6--迁移学习
- 下一篇: 深入学习二叉树(二) 线索二叉树