defknncls():"""K-近鄰預測用戶簽到位置:return:None"""# 讀取數據data = pd.read_csv("./data/FBlocation/train.csv")# print(data.head(10))# 處理數據# 1、縮小數據,查詢數據曬訊data = data.query("x > 1.0 & x < 1.25 & y > 2.5 & y < 2.75")# 處理時間的數據time_value = pd.to_datetime(data['time'], unit='s')print(time_value)# 把日期格式轉換成 字典格式time_value = pd.DatetimeIndex(time_value)# 構造一些特征data['day']= time_value.daydata['hour']= time_value.hourdata['weekday']= time_value.weekday# 把時間戳特征刪除data = data.drop(['time'], axis=1)print(data)# 把簽到數量少于n個目標位置刪除place_count = data.groupby('place_id').count()tf = place_count[place_count.row_id >3].reset_index()data = data[data['place_id'].isin(tf.place_id)]# 取出數據當中的特征值和目標值y = data['place_id']x = data.drop(['place_id'], axis=1)# 進行數據的分割訓練集合測試集x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)# 特征工程(標準化)std = StandardScaler()# 對測試集和訓練集的特征值進行標準化x_train = std.fit_transform(x_train)x_test = std.transform(x_test)# 進行算法流程 # 超參數knn = KNeighborsClassifier()# # fit, predict,score# knn.fit(x_train, y_train)## # 得出預測結果# y_predict = knn.predict(x_test)## print("預測的目標簽到位置為:", y_predict)## # 得出準確率# print("預測的準確率:", knn.score(x_test, y_test))# 構造一些參數的值進行搜索param ={"n_neighbors":[3,5,10]}# 進行網格搜索gc = GridSearchCV(knn, param_grid=param, cv=2)gc.fit(x_train, y_train)# 預測準確率print("在測試集上準確率:", gc.score(x_test, y_test))print("在交叉驗證當中最好的結果:", gc.best_score_)print("選擇最好的模型是:", gc.best_estimator_)print("每個超參數每次交叉驗證的結果:", gc.cv_results_)returnNone
C:\Users\HP\Anaconda3\python.exe D:/PycharmProjects/untitled2/算法/算法3.pyrow_id x y accuracy time place_id
000.79419.0809544707028523065625115.95674.7968131865551757726713228.30787.0407743226481137537235337.36652.5165657045876567393236444.09611.1307314721307440663949553.80991.9586751780656289802927666.33364.3720136668299931249544775.74096.7697853690025662813655884.31146.941031663848471780938996.34140.07586540006012538031566001970-01-0118:09:409571970-01-1002:11:1043451970-01-0515:08:0247351970-01-0623:03:0355801970-01-0911:26:5060901970-01-0216:25:0762341970-01-0415:52:5763501970-01-0110:13:3674681970-01-0915:26:0684781970-01-0823:52:0293571970-01-0416:53:19121251970-01-0703:55:07149371970-01-0603:46:38206601970-01-0803:08:15209301970-01-0221:31:48217311970-01-0708:52:19265841970-01-0415:48:09279371970-01-0803:51:54307981970-01-0120:58:30331841970-01-0615:31:39338771970-01-0214:58:01343401970-01-0414:03:40374051970-01-0415:35:01389681970-01-0808:56:00418611970-01-0103:13:36421351970-01-0202:36:41427291970-01-0116:03:37442831970-01-0806:48:09445491970-01-0701:10:01446941970-01-0814:30:07...290702211970-01-0702:55:07290703221970-01-0118:13:24290709341970-01-0303:44:08290717121970-01-0804:19:17290721651970-01-0412:42:07290735721970-01-0720:29:38290741211970-01-0802:30:21290775791970-01-0818:08:30290777161970-01-0911:51:11290790701970-01-0700:33:24290794161970-01-0510:48:15290799311970-01-0205:35:45290832411970-01-0200:35:10290837891970-01-0509:39:49290847391970-01-0612:04:17290854971970-01-0311:31:33290861671970-01-0801:04:37290870941970-01-0422:25:01290890041970-01-0123:26:24290904431970-01-0309:00:22290936771970-01-0710:03:36290945471970-01-0911:44:34290961551970-01-0408:07:44290994201970-01-0415:47:47290996861970-01-0801:24:11291002031970-01-0110:33:56291084431970-01-0723:22:04291099931970-01-0815:03:14291115391970-01-0400:53:41291121541970-01-0823:01:07
Name: time, Length:17710, dtype: datetime64[ns]
D:/PycharmProjects/untitled2/算法/算法3.py:36: SettingWithCopyWarning:
A value is trying to be set on a copy of a slicefrom a DataFrame.
Try using .loc[row_indexer,col_indexer]= value insteadSee the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copydata['day']=time_value.day
D:/PycharmProjects/untitled2/算法/算法3.py:37: SettingWithCopyWarning:
A value is trying to be set on a copy of a slicefrom a DataFrame.
Try using .loc[row_indexer,col_indexer]= value insteadSee the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copydata['hour']=time_value.hour
D:/PycharmProjects/untitled2/算法/算法3.py:38: SettingWithCopyWarning:
A value is trying to be set on a copy of a slicefrom a DataFrame.
Try using .loc[row_indexer,col_indexer]= value insteadSee the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copydata['weekday']=time_value.weekdayrow_id x y accuracy place_id day hour weekday
6006001.22142.702317668342674211839579571.18322.68915866834267421025434543451.19352.65501168897906535150473547351.14522.60744968223597526231558055801.00892.72871915279219059114609060901.11402.62621140001538672164623462341.14492.50033437414844054156635063501.08442.74366559636937981103746874681.00582.50966690766957039154847884781.20152.51877239925890158233935793571.19162.73231705163401947416612125121251.13882.502969753697500273214937149371.14262.744111678038662663120660206601.23872.595965368308783383320930209301.05192.5208676399991653221421731217311.21712.726399804898579978226584265841.12352.6282635606572086415627937279371.12872.6332588560657208683330798307981.04222.6474491435128522120333184331841.01282.5865751913341282615133877338771.14372.69729726683426742214434340343401.15132.58241762355236719414637405374051.21222.7106102946102544415638968389681.14962.6298166959837792588341861418611.08862.684010331246374613342135421351.04982.68405331246374622442729427291.06942.5829101812226671116344283442831.23842.739860804898579986344549445491.20772.537076399258901571244694446941.03802.531515250352684178143...........................29070221290702211.16782.560566235523671972229070322290703221.04932.7010743312463746118329070934290709341.18992.517628219922395833529071712290717121.22602.73674294610254484329072165290721651.01752.6220425283227804412629073572290735721.24672.7316648048985799720229074121290741211.20712.6646161527052291882329077579290775791.24792.6474422006503124818329077716290777161.18982.701356683426742911429079070290790701.18822.547628173130615370229079416290794161.23352.5903726766324666510029079931290799311.02132.6554167527052291825429083241290832411.06002.672271963298055920429083789290837891.06742.618488109720086959029084739290847391.23192.6767632327054745612129085497290854971.05502.59971751097200869311529086167290861671.05152.675857623756949681329087094290870941.00882.5978711097200869422629089004290890041.18602.69261532215268322123329090443290904431.05682.695958246009329639529093677290936771.00162.5252169013153173710229094547290945471.11012.6530245270522918911429096155290961551.01222.645065817861937748629099420290994201.16752.555692355236719415629099686290996861.04052.672313331246374681329100203291002031.01292.6775123312463746110329108443291084431.14742.6840363533177779723229109993291099931.02402.7238626424972551815329111539291115391.20322.679687353317777940629112154291121541.10702.541917849325782458233[17710 rows x 8 columns]
C:\Users\HP\Anaconda3\lib\site-packages\sklearn\preprocessing\data.py:645: DataConversionWarning: Data withinput dtype int64, float64 were all converted to float64 by StandardScaler.return self.partial_fit(X, y)
C:\Users\HP\Anaconda3\lib\site-packages\sklearn\base.py:464: DataConversionWarning: Data withinput dtype int64, float64 were all converted to float64 by StandardScaler.return self.fit(X,**fit_params).transform(X)
D:/PycharmProjects/untitled2/算法/算法3.py:62: DataConversionWarning: Data withinput dtype int64, float64 were all converted to float64 by StandardScaler.x_test=std.transform(x_test)
C:\Users\HP\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py:652: Warning: The least populated classin y has only 1 members, which is too few. The minimum number of members inanyclasscannot be less than n_splits=2.%(min_groups, self.n_splits)), Warning)
在測試集上準確率 0.42174940898345153
在交叉驗證中最好結果 0.3899747793190416
選擇最好的模型 KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',metric_params=None, n_jobs=None, n_neighbors=10, p=2,weights='uniform')
每個超參數每次交叉驗證的結果 {'mean_fit_time': array([0.01645494,0.01138353,0.01047194]),'std_fit_time': array([0.00448656,0.00058341,0.00249255]),'mean_score_time': array([0.75878692,0.61585569,0.6273967]),'std_score_time': array([0.01371908,0.09724212,0.01204288]),'param_n_neighbors': masked_array(data=[3,5,10],mask=[False,False,False],fill_value='?',dtype=object),'params':[{'n_neighbors':3},{'n_neighbors':5},{'n_neighbors':10}],'split0_test_score': array([0.33999688,0.36842105,0.38841168]),'split1_test_score': array([0.34622116,0.37549722,0.39156722]),'mean_test_score': array([0.34308008,0.37192623,0.38997478]),'std_test_score': array([0.00311201,0.00353793,0.0015777]),'rank_test_score': array([3,2,1]),'split0_train_score': array([0.60875099,0.56022275,0.49880668]),'split1_train_score': array([0.59253475,0.53959082,0.48633453]),'mean_train_score': array([0.60064287,0.54990678,0.49257061]),'std_train_score': array([0.00810812,0.01031597,0.00623608])}Process finished with exit code 0