sklearn中的验证
生活随笔
收集整理的這篇文章主要介紹了
sklearn中的验证
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
代碼如下:
from sklearn.model_selection import train_test_split,cross_val_score,cross_validate # 交叉驗證所需的函數 from sklearn.model_selection import KFold,LeaveOneOut,LeavePOut,ShuffleSplit # 交叉驗證所需的子集劃分方法 from sklearn.model_selection import StratifiedKFold,StratifiedShuffleSplit # 分層分割 from sklearn.model_selection import GroupKFold,LeaveOneGroupOut,LeavePGroupsOut,GroupShuffleSplit # 分組分割 from sklearn.model_selection import TimeSeriesSplit # 時間序列分割 from sklearn import datasets # 自帶數據集 from sklearn import svm # SVM算法 from sklearn import preprocessing # 預處理模塊 from sklearn.metrics import recall_score # 模型度量# 代碼來自: # https://blog.csdn.net/luanpeng825485697/article/details/79836262iris = datasets.load_iris() # 加載數據集 print('樣本集大小:',iris.data.shape,iris.target.shape)# ===================================數據集隨機劃分,訓練模型========================== X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.4, random_state=0) # 交叉驗證劃分訓練集和驗證集.test_size為測試集所占的比例print('訓練集大小:',X_train.shape,y_train.shape) # 訓練集樣本大小 print('測試集大小:',X_test.shape,y_test.shape) # 測試集樣本大小 clf = svm.SVC(kernel='linear', C=1).fit(X_train, y_train) # 使用訓練集訓練模型print('準確率:',clf.score(X_test, y_test))#測試完了順便計算下score # 上述代碼中的測試指的是驗證的意思。# 如果涉及到歸一化,則在測試集上也要使用訓練集模型提取的歸一化函數。 scaler = preprocessing.StandardScaler().fit(X_train) # 通過訓練集獲得歸一化函數模型。(也就是先減幾,再除以幾的函數)。在訓練集和測試集上都使用這個歸一化函數 X_train_transformed = scaler.transform(X_train) clf = svm.SVC(kernel='linear', C=1).fit(X_train_transformed, y_train) # 使用訓練集訓練模型X_test_transformed = scaler.transform(X_test) print(clf.score(X_test_transformed, y_test)) # 計算驗證集的度量值(準確度) # X_test_transformed是輸入的驗證數據 # y_test是驗證數據集的標簽print("---------------------------K折交叉驗證、留一交叉驗證、留p交叉驗證、隨機排列交叉驗證---------------------------") # k折劃分子集 kf = KFold(n_splits=3) for train, test in kf.split(iris.data):print("k折劃分:%s %s" % (train.shape, test.shape))breakprint("---------------------------留一劃分子集---------------------------") loo = LeaveOneOut() for train, test in loo.split(iris.data):print("留一劃分:%s %s" % (train.shape, test.shape))breakprint("---------------------------留p劃分子集---------------------------") lpo = LeavePOut(p=5) for train, test in lpo.split(iris.data):print("留p劃分:%s %s" % (train.shape, test.shape))breakprint("---------------------------隨機排列劃分子集????---------------------------") ss = ShuffleSplit(n_splits=3, test_size=0.25,random_state=0) for train_index, test_index in ss.split(iris.data):print("隨機排列劃分:%s %s" % (train.shape, test.shape))breakprint("---------------------------分層K折交叉驗證、分層隨機交叉驗證---------------------------") skf = StratifiedKFold(n_splits=3) #各個類別的比例大致和完整數據集中相同 for train, test in skf.split(iris.data, iris.target):print("分層K折劃分:%s %s" % (train.shape, test.shape))breakprint("---------------------------分層K折交叉驗證、分層隨機交叉驗證(shuffle洗牌版本)---------------------------") skf = StratifiedShuffleSplit(n_splits=3) # 劃分中每個類的比例和完整數據集中的相同 for train, test in skf.split(iris.data, iris.target):print("分層隨機劃分:%s %s" % (train.shape, test.shape))breakprint("##############################組 k-fold交叉驗證、留一組交叉驗證、留 P 組交叉驗證、Group Shuffle Split##############################") X = [0.1, 0.2, 2.2, 2.4, 2.3, 4.55, 5.8, 8.8, 9, 10]#10個數據 y = ["a", "b", "b", "b", "c", "c", "c", "d", "d", "d"] groups = [1, 1, 1, 2, 2, 2, 3, 3, 3, 3]#這個groups的含義是: #n_splits這里表示分成3份,其中兩份歸train,一份數據歸test,train和test不能在同一組(也就是每條數據對應的group數值必須不一樣) # 但是有的時候n_splits的變化并不會生效。 print("-----------k折分組------------") gkf = GroupKFold(n_splits=3) # 訓練集和測試集屬于不同的組 # 分割的時候n_splits≤groups中的取值種數 for train, test in gkf.split(X, y, groups=groups):print("組 k-fold分割:%s %s" % (train, test))print("train=",train)print("test=",test)print("-----------留一分組------------") logo = LeaveOneGroupOut() for train, test in logo.split(X, y, groups=groups):print("留一組分割:%s %s" % (train, test))print("-----------留p分組------------") lpgo = LeavePGroupsOut(n_groups=2) for train, test in lpgo.split(X, y, groups=groups):print("留 P 組分割:%s %s" % (train, test))print("-----------隨機分組------------") gss = GroupShuffleSplit(n_splits=4, test_size=0.5, random_state=0) for train, test in gss.split(X, y, groups=groups):print("隨機分割:%s %s" % (train, test))# # print("##############################時間序列分割##############################") # #整個數據集按時間順序切3刀,得到4份,最后一份留作驗證 # tscv = TimeSeriesSplit(n_splits=3) # # TimeSeriesSplit(max_train_size=None, n_splits=3) # for train, test in tscv.split(iris.data): # print("時間序列分割:%s %s" % (train, test))?
總結
以上是生活随笔為你收集整理的sklearn中的验证的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 直接获取submission结果
- 下一篇: join操作-内联,左外联,右外联,交叉