#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
##############################################################################
#
# MODULE: r.futures.demand
#
# AUTHOR(S): Anna Petrasova (kratochanna gmail.com)
#
# PURPOSE: create demand table for FUTURES
#
# COPYRIGHT: (C) 2015 by the GRASS Development Team
#
# This program is free software under the GNU General Public
# License (version 2). Read the file COPYING that comes with GRASS
# for details.
#
###############################################################################%module
#% description: Script for creating demand table which determines the quantity of land change expected.
#% keyword: raster
#% keyword: demand
#%end
#%option G_OPT_R_INPUTS
#% key: development
#% description: Names of input binary raster maps representing development
#% guisection: Input maps
#%end
#%option G_OPT_R_INPUT
#% key: subregions
#% description: Raster map of subregions
#% guisection: Input maps
#%end
#%option G_OPT_F_INPUT
#% key: observed_population
#% description: CSV file with observed population in subregions at certain times
#% guisection: Input population
#%end
#%option G_OPT_F_INPUT
#% key: projected_population
#% description: CSV file with projected population in subregions at certain times
#% guisection: Input population
#%end
#%option
#% type: integer
#% key: simulation_times
#% multiple: yes
#% required: yes
#% description: For which times demand is projected
#% guisection: Output
#%end
#%option
#% type: string
#% key: method
#% multiple: yes
#% required: yes
#% description: Relationship between developed cells (dependent) and population (explanatory)
#% options: linear, logarithmic, exponential, exp_approach, logarithmic2
#% descriptions:linear;y = A + Bx;logarithmic;y = A + Bln(x);exponential;y = Ae^(BX);exp_approach;y = (1 - e^(-A(x - B))) + C (SciPy);logarithmic2;y = A + B * ln(x - C) (SciPy)
#% answer: linear,logarithmic
#% guisection: Optional
#%end
#%option G_OPT_F_OUTPUT
#% key: plot
#% required: no
#% label: Save plotted relationship between developed cells and population into a file
#% description: File type is given by extension (.pfd, .png, .svg)
#% guisection: Output
#%end
#%option G_OPT_F_OUTPUT
#% key: demand
#% description: Output CSV file with demand (times as rows, regions as columns)
#% guisection: Output
#%end
#%option G_OPT_F_SEP
#% label: Separator used in input CSV files
#% guisection: Input population
#% answer: comma
#%endimport sys
import math
import numpy as npimport grass.script.core as gcore
import grass.script.utils as gutilsdef exp_approach(x, a, b, c):return (1 - np.exp(-a * (x - b))) + cdef logarithmic2(x, a, b, c):return a + b * np.log(x - c)def logarithmic(x, a, b):return a + b * np.log(x)def magnitude(x):return int(math.log10(x))def main():developments = options['development'].split(',')#待輸入柵格數據的名稱(文件名)observed_popul_file = options['observed_population']#歷史人口調查數據projected_popul_file = options['projected_population']#未來人口預測數據sep = gutils.separator(options['separator'])#預測發展年之間的間隔subregions = options['subregions']#分區的輸入柵格數據methods = options['method'].split(',')#選擇的回歸模型plot = options['plot']#擬合散點圖simulation_times = [float(each) for each in options['simulation_times'].split(',')]#預測的年份時間#遍歷選擇的方法,確定是否有scipy包的支持(exp_approach和logarithmic2這兩種方法)for each in methods:if each in ('exp_approach', 'logarithmic2'):try:from scipy.optimize import curve_fitexcept ImportError:gcore.fatal(_("Importing scipy failed. Method '{m}' is not available").format(m=each))# 模型需要輸入兩個以上的樣本,而指數方法需要至少三個樣本if len(developments) <= 2 and ('exp_approach' in methods or 'logarithmic2' in methods):gcore.fatal(_("Not enough data for method 'exp_approach'"))if len(developments) == 3 and ('exp_approach' in methods and 'logarithmic2' in methods):gcore.warning(_("Can't decide between 'exp_approach' and 'logarithmic2' methods"" because both methods can have exact solutions for 3 data points resulting in RMSE = 0"))#讀取歷史及預測人口數據,得到年份信息作為第一列observed_popul = np.genfromtxt(observed_popul_file, dtype=float, delimiter=sep, names=True)projected_popul = np.genfromtxt(projected_popul_file, dtype=float, delimiter=sep, names=True)year_col = observed_popul.dtype.names[0]observed_times = observed_popul[year_col]year_col = projected_popul.dtype.names[0]projected_times = projected_popul[year_col]#避免人口和柵格數據的年份不匹配if len(developments) != len(observed_times):gcore.fatal(_("Number of development raster maps doesn't not correspond to the number of observed times"))#計算每個分區中的已發展類型的細胞數gcore.info(_("Computing number of developed cells..."))table_developed = {}subregionIds = set()#遍歷輸入柵格數據for i in range(len(observed_times)):#顯示進度條信息gcore.percent(i, len(observed_times), 1)#分區讀取數據#read_command:將所有參數傳遞給pipe_command,然后等待進程完成,返回其標準輸出#r.univar:對柵格像元數據進行分區統計(Zonal Statistics),包括求和、計數、最大最小值、范圍、算術平均值、總體方差、標準差、變異系數等#g:在shell中打出運行狀態,t:以表格形式輸出而不是以標準流格式#zone:用于分區的的柵格地圖,必須是CELL類型 map:輸入柵格的名稱(路徑)data = gcore.read_command('r.univar', flags='gt', zones=subregions, map=developments[i])#遍歷每一行的數據,即每一年的不同區域的信息for line in data.splitlines():stats = line.split('|')#跳過表頭if stats[0] == 'zone':continue#獲取分區的id,以及已發展細胞數(即sum值)subregionId, developed_cells = stats[0], int(stats[12])#分區id數組subregionIds.add(subregionId)#第一個年份,作為基準不計入if i == 0:table_developed[subregionId] = []#已發展地區的細胞數量計數數組table_developed[subregionId].append(developed_cells)gcore.percent(1, 1, 1)#對分區id進行排序subregionIds = sorted(list(subregionIds))# linear interpolation between population points#在人口數量樣本點之間線性插值,獲取模擬的人口點population_for_simulated_times = {}#以年份進行遍歷for subregionId in table_developed.keys():#interp():一維線性插值函數#x: 要估計坐標點的x坐標值 xp:x坐標值 fp:y坐標值population_for_simulated_times[subregionId] = np.interp(x=simulation_times,xp=np.append(observed_times, projected_times),fp=np.append(observed_popul[subregionId],projected_popul[subregionId]))# regression# 模型的回歸demand = {}i = 0#引入matplotlib畫圖if plot:import matplotlibmatplotlib.use('Agg')import matplotlib.pyplot as pltn_plots = np.ceil(np.sqrt(len(subregionIds)))fig = plt.figure(figsize=(5 * n_plots, 5 * n_plots))#遍歷分區for subregionId in subregionIds:i += 1rmse = dict()predicted = dict()simulated = dict()coeff = dict()#遍歷選擇的每一種回歸模型for method in methods:# observed population points for subregion# 歷史人口數據值reg_pop = observed_popul[subregionId]# 線性插值之后的人口值simulated[method] = np.array(population_for_simulated_times[subregionId])if method in ('exp_approach', 'logarithmic2'):# we have to scale it firsty = np.array(table_developed[subregionId])magn = float(np.power(10, max(magnitude(np.max(reg_pop)), magnitude(np.max(y)))))x = reg_pop / magny = y / magnif method == 'exp_approach':initial = (0.5, np.mean(x), np.mean(y)) # this seems to work best for our data for exp_approachelif method == 'logarithmic2':popt, pcov = curve_fit(logarithmic, x, y)initial = (popt[0], popt[1], 0)with np.errstate(invalid='warn'): # when 'raise' it stops every time on FloatingPointErrortry:popt, pcov = curve_fit(globals()[method], x, y, p0=initial)except (FloatingPointError, RuntimeError):rmse[method] = sys.maxsize # so that other method is selectedgcore.warning(_("Method '{m}' cannot converge for subregion {reg}".format(m=method, reg=subregionId)))if len(methods) == 1:gcore.fatal(_("Method '{m}' failed for subregion {reg},"" please select at least one other method").format(m=method, reg=subregionId))else:predicted[method] = globals()[method](simulated[method] / magn, *popt) * magnr = globals()[method](x, *popt) * magn - table_developed[subregionId]coeff[method] = poptrmse[method] = np.sqrt((np.sum(r * r) / (len(reg_pop) - 2)))else:#簡單對數回歸 轉換xif method == 'logarithmic':reg_pop = np.log(reg_pop)#指數回歸if method == 'exponential':y = np.log(table_developed[subregionId])#線性回歸 獲得y值else:y = table_developed[subregionId]#回歸A = np.vstack((reg_pop, np.ones(len(reg_pop)))).T#擬合 獲取斜率和截距m, c = np.linalg.lstsq(A, y)[0] # y = mx + ccoeff[method] = m, c#對結果進行預測if method == 'logarithmic':predicted[method] = np.log(simulated[method]) * m + c#計算離差r = (reg_pop * m + c) - table_developed[subregionId]elif method == 'exponential':predicted[method] = np.exp(m * simulated[method] + c)r = np.exp(m * reg_pop + c) - table_developed[subregionId]else: # linearpredicted[method] = simulated[method] * m + cr = (reg_pop * m + c) - table_developed[subregionId]# RMSE# 計算均方根誤差 rmse[method] = np.sqrt((np.sum(r * r) / (len(reg_pop) - 2)))#均方根誤差最小的方法method = min(rmse, key=rmse.get)gcore.verbose(_("Method '{meth}' was selected for subregion {reg}").format(meth=method, reg=subregionId))# write demand# 組織需求數據demand[subregionId] = predicted[method]demand[subregionId] = np.diff(demand[subregionId])if np.any(demand[subregionId] < 0):gcore.warning(_("Subregion {sub} has negative numbers"" of newly developed cells, changing to zero".format(sub=subregionId)))demand[subregionId][demand[subregionId] < 0] = 0if coeff[method][0] < 0:demand[subregionId].fill(0)gcore.warning(_("For subregion {sub} population and development are inversely proportional,""will result in zero demand".format(sub=subregionId)))# draw# 做擬合模型的散點圖if plot:ax = fig.add_subplot(n_plots, n_plots, i)ax.set_title("{sid}, RMSE: {rmse:.3f}".format(sid=subregionId, rmse=rmse[method]))ax.set_xlabel('population')ax.set_ylabel('developed cells')# plot known pointsx = np.array(observed_popul[subregionId])y = np.array(table_developed[subregionId])ax.plot(x, y, marker='o', linestyle='', markersize=8)# plot predicted curvex_pred = np.linspace(np.min(x),np.max(np.array(population_for_simulated_times[subregionId])), 30)cf = coeff[method]if method == 'linear':line = x_pred * cf[0] + cf[1]label = "$y = {c:.3f} + {m:.3f} x$".format(m=cf[0], c=cf[1])elif method == 'logarithmic':line = np.log(x_pred) * cf[0] + cf[1]label = "$y = {c:.3f} + {m:.3f} \ln(x)$".format(m=cf[0], c=cf[1])elif method == 'exponential':line = np.exp(x_pred * cf[0] + cf[1])label = "$y = {c:.3f} e^{{{m:.3f}x}}$".format(m=cf[0], c=np.exp(cf[1]))elif method == 'exp_approach':line = exp_approach(x_pred / magn, *cf) * magnlabel = "$y = (1 - e^{{-{A:.3f}(x-{B:.3f})}}) + {C:.3f}$".format(A=cf[0], B=cf[1], C=cf[2])elif method == 'logarithmic2':line = logarithmic2(x_pred / magn, *cf) * magnlabel = "$y = {A:.3f} + {B:.3f} \ln(x-{C:.3f})$".format(A=cf[0], B=cf[1], C=cf[2])ax.plot(x_pred, line, label=label)ax.plot(simulated[method], predicted[method], linestyle='', marker='o', markerfacecolor='None')plt.legend(loc=0)labels = ax.get_xticklabels()plt.setp(labels, rotation=30)if plot:plt.tight_layout()fig.savefig(plot)# write demand# 輸出需求數據with open(options['demand'], 'w') as f:header = observed_popul.dtype.names # the order is kept heref.write('\t'.join(header))f.write('\n')i = 0for time in simulation_times[1:]:f.write(str(int(time)))f.write('\t')# put 0 where there are more counties but are not in regionfor sub in header[1:]: # to keep order of subregionsif sub not in subregionIds:f.write('0')else:f.write(str(int(demand[sub][i])))if sub != header[-1]:f.write('\t')f.write('\n')i += 1if __name__ == "__main__":options, flags = gcore.parser()sys.exit(main())