論文為:J. Zico Kolter and Matthew J. Johnson. REDD: A public data set for energy disaggregation research. In proceedings of the SustKDD workshop on Data Mining Applications in Sustainability, 2011. [pdf]
from nilmtk.dataset_converters import convert_reddconvert_redd(r'C:\Users\admin\Anaconda3\nilm_metadata\low_freq',r'C:\Users\admin\Anaconda3\nilm_metadata\low_freq\redd_low_new.h5')
from __future__ import print_function, division
import pandas as pd
import numpy as np
from nilmtk.dataset import DataSet
#from nilmtk.metergroup import MeterGroup
#from nilmtk.datastore import HDFDataStore
#from nilmtk.timeframe import TimeFrame
from nilmtk.disaggregate.combinatorial_optimisation import CombinatorialOptimisation
from nilmtk.legacy.disaggregate.fhmm_exact import FHMMtrain = DataSet('C:/Users/admin/PycharmProjects/nilmtktest/low_freq/redd_low.h5') # 讀取數(shù)據(jù)集
test = DataSet('C:/Users/admin/PycharmProjects/nilmtktest/low_freq/redd_low.h5') # 讀取數(shù)據(jù)集
building = 1 ## 選擇家庭house
train.set_window(end="30-4-2011") ## 劃分?jǐn)?shù)據(jù)集,2011年4月20號之前的作為訓(xùn)練集
test.set_window(start="30-4-2011") ## 四月40號之后的作為測試集## elec包含了這個家庭中的所有的電器信息和總功率信息,building=1-6個家庭
train_elec = train.buildings[1].elec
test_elec = test.buildings[1].electop_5_train_elec = train_elec.submeters().select_top_k(k=5) ## 選擇用電量排在前5的來進(jìn)行訓(xùn)練和測試
選取了第一個家庭,用電量在前5的電器數(shù)據(jù)進(jìn)行測試。
計(jì)算:
def predict(clf, test_elec, sample_period, timezone): ## 定義預(yù)測的方法pred = {}gt= {}#獲取總的負(fù)荷數(shù)據(jù)for i, chunk in enumerate(test_elec.mains().load(sample_period=sample_period)):chunk_drop_na = chunk.dropna() ### 丟到缺省值pred[i] = clf.disaggregate_chunk(chunk_drop_na) #### 分解,disaggregate_chunk #通過調(diào)用這個方法實(shí)現(xiàn)分解,這部分代碼在下面可以見到gt[i]={} ## 這是groudtruth,即真實(shí)的單個電器的消耗功率for meter in test_elec.submeters().meters:# Only use the meters that we trained on (this saves time!) gt[i][meter] = next(meter.load(sample_period=sample_period)) gt[i] = pd.DataFrame({k:v.squeeze() for k,v in gt[i].items()}, index=next(iter(gt[i].values())).index).dropna() #### 上面這一塊主要是為了得到pandas格式的gt數(shù)據(jù)# If everything can fit in memorygt_overall = pd.concat(gt) gt_overall.index = gt_overall.index.droplevel()pred_overall = pd.concat(pred)pred_overall.index = pred_overall.index.droplevel()# Having the same order of columnsgt_overall = gt_overall[pred_overall.columns]#Intersection of indexgt_index_utc = gt_overall.index.tz_convert("UTC")pred_index_utc = pred_overall.index.tz_convert("UTC")common_index_utc = gt_index_utc.intersection(pred_index_utc)common_index_local = common_index_utc.tz_convert(timezone)gt_overall = gt_overall.ix[common_index_local]pred_overall = pred_overall.ix[common_index_local]appliance_labels = [m.label() for m in gt_overall.columns.values]gt_overall.columns = appliance_labelspred_overall.columns = appliance_labelsreturn gt_overall, pred_overallclassifiers = { 'CO':CombinatorialOptimisation(),'FHMM':FHMM()} ### 設(shè)置了兩種算法,一種是CO,一種是FHMM
predictions = {}
sample_period = 120 ## 采樣周期是兩分鐘
for clf_name, clf in classifiers.items():print("*"*20)print(clf_name)print("*" *20)clf.train(top_5_train_elec, sample_period=sample_period) ### 訓(xùn)練部分gt, predictions[clf_name] = predict(clf, test_elec, 120, train.metadata['timezone'])