全國地鐵城市數據分析(數據清洗+可視化分析) 一確定問題: 由題看出其屬于開放問題,沒有明確的目的(即可認為無題),其重點是讓人發現問題(比如過程中分析時發現數據有哪些實在的問題就可以拿出來單獨分析),了解數據處理,數據可視化 但是可以通過該問題比較系統的了解數據分析的過程(實際上這里重點是數據分析中的評估部分)
1.獲取數據-采用爬蟲訪問百度地鐵地圖獲取數據的方法
request+xpath爬蟲:
得到url-http://map.amap.com/subway/index.html?&1100 發出請求得到響應對象-request模塊 獲取響應對象數據-使用text函數直接獲取網頁文本。有些數據為json字符串的形式,需要用到json轉化 解析數據-通過使用xpath 持久化存儲-再數據分析中一般將數據保存為csv格式,跟利于處理
在過程中使用了time模塊-sleep函數防止網站宕機 主要的3個函數
#
3 個函數
headers
= { 'user-agent' : 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3947.100 Safari/537.36 2345Explorer/10.11.0.20694' } def
get_city ( ) : #用于得到城市ID和城市名稱url
= 'http://map.amap.com/subway/index.html?&1100' time
. sleep ( 2 ) res
= requests
. get ( url
= url
, headers
= headers
) res
. raise_for_status ( ) res
. encoding
= res
. apparent_encodinghtml
= res
. textHtml
= etree
. HTML ( html
) # 城市列表res1
= Html
. xpath ( '/html/body/div[1]/div[1]/div[1]/div[2]/div[1]/a' ) res2
= Html
. xpath ( '/html/body/div[1]/div[1]/div[1]/div[2]/div[2]/div[2]/a' ) for i in res1
: # 城市ID值ID
= '' . join ( i
. xpath ( './/@id' ) ) # 屬性需要加上雙斜杠# 城市拼音名cityname
= '' . join ( i
. xpath ( './/@cityname' ) ) #
. / 表示在當層目錄下使用# 城市名name
= '' . join ( i
. xpath ( './text()' ) ) get_message ( ID
, cityname
, name
) city_ID
. update ( { name
: ID
} ) for i in res2
: # 城市ID值ID
= '' . join ( i
. xpath ( './/@id' ) ) # 城市拼音名cityname
= '' . join ( i
. xpath ( './/@cityname' ) ) # 城市名name
= '' . join ( i
. xpath ( './text()' ) ) # print(cityname) get_message ( ID
, cityname
, name
) city_ID
= { } def
get_message ( ID
, cityname
, name
) : #用于得到城市的具體線路信息
"" "地鐵線路信息獲取
"" "url
= 'http://map.amap.com/service/subway?_1555502190153&srhdata=' + ID
+ '_drw_' + cityname
+ '.json' # global end_list global stations
# if end_list.get(cityname) == None: # end_list[cityname] = [] # end_list[cityname].setdefault([]) response
= requests
. get ( url
= url
, headers
= headers
) time
. sleep ( 2 ) html
= response
. text
# print(html) result
= json
. loads ( html
) for i in result
[ 'l' ] : for j in i
[ 'st' ] : # 判斷是否含有地鐵分線
if len ( i
[ 'la' ] ) > 0 : # print(name,cityname,j['sl'],j['poiid'], i['ln'] + '(' + i['la'] + ')', j['n']) with
open ( 'subway.csv' , 'a+' , encoding
= 'utf-8' ) as f
: f
. write ( name
+ ',' + cityname
+ ',' + j
[ 'poiid' ] + ',' + j
[ 'sl' ] + ',' + i
[ 'ln' ] + '(' + i
[ 'la' ] + ')' + ',' + j
[ 'n' ] + '\n' ) f
. close ( ) else : # print(name,cityname,j['sl'],j['poiid'], i['ln'], j['n']) with
open ( 'subway.csv' , 'a+' , encoding
= 'utf-8' ) as f
: f
. write ( name
+ ',' + cityname
+ ',' + j
[ 'poiid' ] + ',' + j
[ 'sl' ] + ',' + i
[ 'ln' ] + ',' + j
[ 'n' ] + '\n' ) f
. close ( ) # end_list[cityname].append(j['n']) print ( name
+ '地鐵站點爬取結束' ) f
. close ( ) def
get_district ( df_data
) : #用于得到每個地鐵站點的行政區url1
= 'https://www.youbianku.com/SearchResults?address=' # response=requests.get(url=url1,headers=headers) # response.enconding='utf-8' # print(response.text) from selenium
. webdriver
. common
. desired_capabilities import DesiredCapabilitieschrome_options
= webdriver
. ChromeOptions ( ) desired_capabilities
= DesiredCapabilities
. CHROMEdesired_capabilities
[ "pageLoadStrategy" ] = "none" chrome_options
. add_argument ( '--headless' ) chrome_options
. add_argument ( '--disable-gpu' ) driver
= webdriver
. Chrome ( options
= chrome_options
, executable_path
= r
'C:\Users\Dcnightmare\Desktop\chromedriver' ) list_city
= [ ] last_text
= '' # driver.get(url='https://www.youbianku.com') for i in
list ( zip ( df_data
[ '站點城市' ] . values
, df_data
[ '地鐵站點名稱' ] ) ) : driver
. get ( url
= url1
+ '' . join ( list ( i
) ) ) # driver.find_element_by_id('mySearchInput').send_keys(''.join(list(i))) # driver.find_element_by_id('mySearchButton').click() html_from_page
= driver
. page_sourcehtml
= etree
. HTML ( html_from_page
) try : text
= html
. xpath ( '//div[@class="mw-parser-output"]/div[1]//table//tr[2]/td/text()' ) [ 0 ] text
= text
. split ( '市' ) [ 1 ] . split ( '區' ) [ 0 ] + '區' except Exception
: driver
. execute_script ( "window.stop()" ) list_city
. append ( last_text
) continue if text
== '區' : list_city
. append ( last_text
) continue last_text
= textlist_city
. append ( last_text
) df_data
[ '行政區' ] = list_city
2.有了初步數據之后,需要進一步將數據變為我們可以使用的數據
缺失值處理 (標準化,歸一化,離散化…)這里沒有使用因為這些數據處理都是用于分類,回歸等任務中,而本文主要是對地鐵站數據的簡單分析 重復項處理
import pandas as pddf_data
= pd
. read_csv ( 'subway.csv' , sep
= ',' ) # 使用pd的好處可以使用行和列名進行數據訪問
print ( df_data
) # 缺失值處理:# 得到其讀取到的行
print ( '刪除之前的行:' , df_data
. shape
) # 得到所有的屬性非空項
print ( df_data
. info ( ) ) # 得出其中沒有缺失值的行# 重復數據處理#
"" "刪除完全重復的站點" "" df_data_1
= df_data
. drop_duplicates ( ) # 刪除掉完全相同的行# 得到刪除之后的行
print ( '刪除之后的行:' , df_data_1
. shape
)
這得到的數據真的就是最終數據集了嗎? 其實不然,如果仔細觀察可以發現其中有很多的站點存在重復(即一個站可能是多條地鐵線路站點的情況),所以在考慮求一個城市總的站點數量時還需要去除其中的重復站點數量。
number_sum
= 0 #統計總的站點數量num_station_check
= { } # 用于檢查多余情況num_station_old
= { } #統計處理前的各城市站點數量num_station_new
= { } # 得到站點城市的地鐵站點實際數量
for p in
zip ( df_data_3
[ '站點城市' ] , df_data_3
[ '地鐵站點名稱' ] ) : ( i
, j
) = p# 原始數據
if num_station_old
. get ( i
) == None
: num_station_old
[ i
] = 1 else : num_station_old
[ i
] + = 1 # 處理后的數據
if num_station_check
. get ( p
) == None
: number_sum
+ = 1 if num_station_new
. get ( i
) == None
: num_station_new
[ i
] = 1 else : num_station_new
[ i
] + = 1 num_station_check
[ p
] = 1
到此數據集的準備工作就差不多了.
讓我們來試試做最有意思的部分吧!
3.數據分析+可視化 每個人開始分析的入手角度不同,所以看個人
分析各個城市的站點數量,因為他是最直觀的數據 分析城市的地鐵站點在全國分布情況 分析各城市地鐵站點在全國站點中的比率 分析各個城市的具體數據(內部行政區/市)
出于比較所以我還找了2020年的地鐵站點數據,如果有興趣也可這樣做,不過很可能有錯誤項,比如2020某城市地鐵數據量反而比2021年的高,不過找到這種情況之后可以通過自己再查新聞是否如此(還是挺有趣的)
這里主要通過的是pyecharts來進行的圖表繪制,因為其數據可視化效果比較好,maltplotlib我用著不好使 一.分析各個城市的站點數量
from pyecharts
. charts import Barfrom pyecharts import options as optsfrom pyecharts
. charts import Lineimport pandas as pdattr
= list ( num_station_new
. keys ( ) ) v1
= list ( num_station_new
. values ( ) ) #新
2021 站點數據,主要體現數據處理v2
= list ( num_station_old
. values ( ) ) #舊
2021 站點數據v1_v2
= [ ] # 用于得到換乘站點占比, 主要體現數據分析# 解釋:換乘占比越大其地鐵線路越是密集,其地鐵相對城市的規模也比較大,因為前期地鐵是以向外拓寬為核心,一般都會盡量避免出現換乘站點,導致其資源浪費# 當然也不是絕對的,可能有所偏差,但是大方向是對的
for i in
range ( 0 , len ( v1
) ) : v1_v2
. append ( round ( ( v2
[ i
] - v1
[ i
] ) / v1
[ i
] , 3 ) ) # round用于保留數據的位數
# print(('%.2f' %12.234456))#使用兩個%也可以達到格式化數據的目的 bar1
= ( Bar ( init_opts
= opts
. InitOpts ( width
= "1700px" , height
= "800px" ) ) # 注意添加默認參數時是在init_opts參數中設置
. add_xaxis ( attr
) . add_yaxis ( 'station_number_2021_new' , v1
, itemstyle_opts
= opts
. ItemStyleOpts ( color
= 'blue' ) , label_opts
= opts
. LabelOpts ( is_show
= True
, position
= 'top' , formatter
= "{c}" , color
= 'black' ) ) # 顯示數據標簽
. add_yaxis ( 'station_number_2021_old' , v2
, itemstyle_opts
= opts
. ItemStyleOpts ( color
= 'green' ) , label_opts
= opts
. LabelOpts ( is_show
= True
, position
= 'top' , formatter
= "{c}" , color
= 'blue' ) ) # 顯示數據標簽
. add_yaxis ( '換乘站點占比' , v1_v2
, itemstyle_opts
= opts
. ItemStyleOpts ( color
= 'orange' ) , label_opts
= opts
. LabelOpts ( is_show
= True
, position
= 'top' , formatter
= "{c}" , color
= 'green' ) ) # 顯示數據標簽
. extend_axis ( # 設置次坐標軸yaxis
= opts
. AxisOpts ( name
= "換乘站點占比率" , # 次坐標軸名稱type_
= "value" , # 次坐標手類型min_
= 0 , # 最小值max_
= 50 , # 最大值is_show
= True
, # 是否顯示axisline_opts
= opts
. AxisLineOpts ( is_show
= False
, # y軸線不顯示linestyle_opts
= opts
. LineStyleOpts ( color
= '#f6c065' ) ) , # 設置線顏色
, 字體顏色也變axistick_opts
= opts
. AxisTickOpts ( is_show
= False
) , # 刻度線不顯示axislabel_opts
= opts
. LabelOpts ( formatter
= "{value}%" ) , # 次坐標軸數據顯示格式
) ) . set_global_opts ( # 對x軸標簽,y軸,標題,圖例的格式和類型進行修改# 圖例默認放到 上中 位置xaxis_opts
= opts
. AxisOpts ( name
= '城市' , name_location
= 'middle' , name_gap
= 30 , # 與x軸線的距離
# name_Rorate設置旋轉角度 # x軸名稱的格式配置 name_textstyle_opts
= opts
. TextStyleOpts ( font_family
= 'Microsoft Yahei' , font_size
= 20 , ) , # 坐標軸刻度配置項axistick_opts
= opts
. AxisTickOpts ( is_show
= True
, # is_show=False, # 是否顯示 is_inside
= True
, # 刻度線是否在內側
) , # 坐標軸線的配置axisline_opts
= opts
. AxisLineOpts ( linestyle_opts
= opts
. LineStyleOpts ( width
= 1 , color
= 'black' , ) ) , axislabel_opts
= opts
. LabelOpts ( rotate
= 40 , font_size
= 12 , font_family
= 'Arial' , font_weight
= 'bold' ) , ) , yaxis_opts
= opts
. AxisOpts ( name
= 'station_number' , name_location
= 'middle' , name_gap
= 30 , name_textstyle_opts
= opts
. TextStyleOpts ( font_family
= 'Times New Roman' , font_size
= 20 , color
= 'black' , # font_weight='bolder', ) , axistick_opts
= opts
. AxisTickOpts ( is_show
= False
, # 是否顯示is_inside
= True
, # 刻度線是否在內側
) , axislabel_opts
= opts
. LabelOpts ( font_size
= 12 , font_family
= 'Times New Roman' , formatter
= "{value}" # y軸顯示方式以數據形式
) , splitline_opts
= opts
. SplitLineOpts ( is_show
= True
) , # y軸網格線axisline_opts
= opts
. AxisLineOpts ( is_show
= False
) , # y軸線
) , title_opts
= opts
. TitleOpts ( title
= "城市地鐵站點數量" , # 標題title_textstyle_opts
= opts
. TextStyleOpts ( font_size
= 20 ) , # 主標題字體大小subtitle
= "hello_data_analysis" , # 副標題pos_left
= '6%' ) , toolbox_opts
= opts
. ToolboxOpts ( is_show
= True
) , ) ) bar1
. render ( 'bar_2021_and_2020.html' ) # 將其輸出為html文件#使用webbrowser模塊直接打開網頁import webbrowserwebbrowser
. open ( 'bar_2021_and_2020.html' ) print ( "直方圖分析結束!" )
解釋這里的一個數據-換乘站點占比,由其地鐵站點實際分布的地圖,我初步認為,在城市建立地鐵的初期都是在擴大其覆蓋區域,把居民區(郊區)和商業區(市中心)以及火車站、機場盡可能與客流量掛鉤的地方等連接起來,很少會出現站點重合的現象導致其換乘點占比就比較小,所以如果換乘站點占比大,可以粗略估計其地鐵的發展比較好,規模相對該城市規模而言也比較大,側面反映了當地的經濟發展水平比較高.當然還存在一些其他情況比如說考慮到地質結構,太容易塌陷的地方或者地震斷裂帶也是不行的. 來看結果:  AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3947.100 Safari/537.36 2345Explorer/10.11.0.20694' }
#
jinwei = {} # 用于存放各大城市的主要經緯度 def
get_jinwei ( station_city
) : url
= 'https://www.d1xz.net/xp/jingwei/' time
. sleep ( 1 ) ans
= requests
. get ( url
= url
, headers
= headers
) end
= ans
. textHtml
= etree
. HTML ( end
) res
= Html
. xpath ( '//div[@class="inner_con_art"]/table//tr' ) for i in
range ( 1 , 5 ) : res
= Html
. xpath ( '//div[@class="inner_con_art"]/table//tr[' + str ( i
) + ']/td' ) for j in res
: res_end
= j
. xpath ( './strong/a/@href' ) # 緊接著之前的對象繼續進行xpath操作
# print(res_end) if len ( res_end
) != 0 : name
= j
. xpath ( './strong/a/text()' ) [ 0 ] url_end
= 'https://www.d1xz.net/' + res_end
[ 0 ] ans_1
= requests
. get ( url
= url_end
, headers
= headers
) end_1
= ans_1
. textHtml_1
= etree
. HTML ( end_1
) res_1
= Html_1
. xpath ( '//div[@class="inner_con_art"]/table//tr' ) for num_ in
range ( 2 , len ( res_1
) ) : end_end
= Html_1
. xpath ( '//div[@class="inner_con_art"]/table//tr[' + str ( num_
) + ']/td/text()' ) if end_end
[ 0 ] in station_city
: provice_city
. setdefault ( name
, [ ] ) . append ( end_end
[ 0 ] ) city_provice
. update ( { end_end
[ 0 ] : name
} ) jinwei
. update ( { end_end
[ 0 ] : [ end_end
[ 1 ] , end_end
[ 2 ] ] } ) city_provice
. update ( { '香港' : '香港' } ) provice_city
. update ( { '香港' : [ '香港' ] } ) jinwei
. update ( { '香港' : [ '114.12' , '22.26' ] } )
得到各城市的經緯度數據之后就可以繪制地圖了
#全國地圖from pyecharts import options as optsfrom pyecharts
. globals import ChartTypedata_yy
= list ( num_station_new
. values ( ) ) data_xx
= list ( num_station_new
. keys ( ) ) get_jinwei ( data_xx
) data
= num_station_newprovice
= { } for i in data_xx
: provice
. setdefault ( city_provice
[ i
] , 0 ) provice
[ city_provice
[ i
] ] + = data
[ i
] from pyecharts import options as optsfrom pyecharts
. charts import Mapchina_map
= ( Map ( init_opts
= opts
. InitOpts ( width
= "1530px" , height
= "684px" , theme
= ThemeType
. CHALK
) ) . add ( "中國地鐵" , [ list ( z
) for z in provice
. items ( ) ] , "china" ) . set_global_opts ( title_opts
= opts
. TitleOpts ( title
= "中國地鐵數據" ) , visualmap_opts
= opts
. VisualMapOpts ( max_
= max ( provice
. values ( ) ) , is_piecewise
= True
, textstyle_opts
= opts
. TextStyleOpts ( color
= 'write' , font_size
= 20 , font_family
= 'Microsoft YaHei' ) ) , ) ) #地鐵在各大城市的分布情況(從中國地圖來看
3 D)from pyecharts import options as optsfrom pyecharts
. charts import Map3Dfrom pyecharts
. globals import ChartTypefrom pyecharts
. commons
. utils import JsCode
for i in num_station_new
: jinwei
[ i
] . append ( num_station_new
[ i
] ) #添加城市的站點數量,也相當于加上高度example_data
= [ ( p
, jinwei
[ p
] ) for p in num_station_new
. keys ( ) ] c
= ( Map3D ( init_opts
= opts
. InitOpts ( width
= "1500px" , height
= "700px" ) ) . add_schema ( itemstyle_opts
= opts
. ItemStyleOpts ( color
= "rgb(5,101,123)" , opacity
= 1 , border_width
= 0.8 , border_color
= "rgb(62,215,213)" , ) , map3d_label
= opts
. Map3DLabelOpts ( is_show
= False
, formatter
= JsCode ( "function(data){return data.name + " " + data.value[2];}" ) , ) , emphasis_label_opts
= opts
. LabelOpts ( is_show
= False
, color
= "#fff" , font_size
= 10 , background_color
= "rgba(0,23,11,0)" , ) , light_opts
= opts
. Map3DLightOpts ( main_color
= "#fff" , main_intensity
= 1.2 , main_shadow_quality
= "high" , is_main_shadow
= False
, main_beta
= 10 , ambient_intensity
= 0.3 , ) , ) . add ( series_name
= "數據" , data_pair
= example_data
, type_
= ChartType
. BAR3D
, bar_size
= 1 , shading
= "lambert" , label_opts
= opts
. LabelOpts ( is_show
= False
, formatter
= JsCode ( "function(data){return data.name + ' ' + data.value[2];}" ) , ) , ) . set_global_opts ( title_opts
= opts
. TitleOpts ( title
= "城市數據" ) ) . render ( "帶有數據展示地圖.html" ) ) import webbrowserwebbrowser
. open ( '帶有數據展示地圖.html' ) print ( "地鐵城市在中國分布分析結束!" )
結果如下:
可以看出地鐵城市的分布主要是一些沿海城市,而我們也知道沿海城市的經濟發展相對其他城市要高出一截,也因為經濟發展好,其流動人數也比較多. 查找原因后 發現實際上不是城市想修地鐵就可以修的,需要有高經濟支持(一般都是GDP達到…才行)和人口數量的要求. 三.分析各城市地鐵站點在全國站點中的比率 如果數量不好直接比較的話,那么通過數據所占百分比就可以較好的實現比較各個城市地鐵站點數量
station_proportion
= [ ] for i in num_station_new
. values ( ) : station_proportion
. append ( ( "%.2f" % ( i
/ number_sum
* 100 ) ) ) from pyecharts
. charts import Pieimport pyecharts
. options as optsdata_pie
= tuple ( zip ( num_station_new
. keys ( ) , station_proportion
) ) # print(data_pie) pie
= ( Pie ( init_opts
= opts
. InitOpts ( width
= "1600px" , height
= "1000px" ) ) . add ( series_name
= '城市地鐵站點占比' , data_pair
= data_pie
, center
= [ 600 , 600 ] , label_opts
= opts
. LabelOpts ( distance
= 30 , is_show
= True
) , tooltip_opts
= opts
. TooltipOpts ( is_show
= True
) , radius
= None#
, rosetype
= 'radius' #
, rosetype
= 'area' ) ) pie
. render ( 'station_number_pie.html' ) import webbrowserwebbrowser
. open ( 'station_number_pie.html' ) print ( '站點數據餅狀圖展示結束!' )
結果如下: 四.分析各個城市的具體數據(內部行政區/市) 主要通過前面的3d中國地鐵地圖中的動態數據標簽進行選擇,借助selenium實現對頁面動態數據的爬取,由爬取到的數據通過判斷是否為含有多個地鐵市的省中,如果是則通過彈出的窗口選擇省地圖還是市地圖 主要就是show_city函數
from pyecharts
. charts import BMapfrom pyecharts
. globals import BMapType
, ChartTypejs
= "window.open('{}','_blank');" def
show_city ( bro
, city
) : #city使用字典型from pyecharts
. globals import SymbolType
, ThemeTypefrom pyecharts import options as optsglobal num_station_new
, jinwei
, num_city_new
, df_data_3station
= [ ] # 每條路線初始化為空stations
= [ ] # 城市初始化為空center_x
= 0 center_y
= 0 station_point
= [ ] for i in city
[ 1 ] : data_
= df_data_3
[ df_data_3
. 站點城市
== i
] . values # 開始先對每條線路排了序center_x
+ = float ( jinwei
[ i
] [ 0 ] ) center_y
+ = float ( jinwei
[ i
] [ 1 ] ) for num
, j in
enumerate ( data_
) : # columns屬性得到的迭代對象是列屬性名 類似于字典中的keys
, indexs得到的是行屬性名一般是序號station
. append ( [ j
[ 4 ] , j
[ 5 ] , j
[ 6 ] ] ) station_point
. append ( [ j
[ 4 ] , j
[ 5 ] , j
[ 6 ] ] ) if num
== len ( data_
) - 1 or data_
[ num
+ 1 ] [ 6 ] != data_
[ num
] [ 6 ] : stations
. append ( station
) station
= [ ] center_x
/ = len ( city
[ 1 ] ) center_y
/ = len ( city
[ 1 ] ) if len ( city
[ 1 ] ) != 1 : Zoom
= 8 else : Zoom
= 10 map_b
= ( # 不要異想天開認為可以將其拆開 然后每一條線賦值從而達到可以使用不同顏色添加不同的類型圖的目的
BMap ( init_opts
= opts
. InitOpts ( width
= "1500px" , height
= "800px" , theme
= ThemeType
. MACARONS
) ) . add_schema ( baidu_ak
= 'ybGicIBt9c56brfI4alusbE8SfclQcjW' , # 百度地圖開發應用appkeycenter
= [ center_x
, center_y
] , # 當前視角的中心點zoom
= Zoom
, # 當前視角的縮放比例is_roam
= True
, # 開啟鼠標縮放和平移漫游
) . add ( series_name
= city
[ 0 ] + '地鐵' , type_
= ChartType
. LINES
, # 設置Geo圖類型
, ( pyecharts庫中負責地理坐標系的模塊是Geo
) # 如果是默認的 則為點型有參數symbol_size用于設置點的大小
# data_pair=stations, # 數據i項 data_pair
= stations
, is_polyline
= True
, # 是否是多段線,在畫lines圖情況下#linestyle_opts
= opts
. LineStyleOpts ( color
= "blue" , opacity
= 0.5 , width
= 1.5 ) , # 線樣式配置項effect_opts
= opts
. EffectOpts ( symbol
= SymbolType
. ROUND_RECT
, symbol_size
= 3 , color
= "red" ) ) . set_global_opts ( title_opts
= opts
. TitleOpts ( title
= city
[ 0 ] + "的地鐵線路" ) , tooltip_opts
= opts
. TooltipOpts ( is_show
= True
) ) . add_control_panel ( maptype_control_opts
= opts
. BMapTypeControlOpts ( type_
= BMapType
. MAPTYPE_CONTROL_HORIZONTAL
) , # 切換地圖類型的控件scale_control_opts
= opts
. BMapScaleControlOpts ( ) , # 比例尺控件overview_map_opts
= opts
. BMapOverviewMapControlOpts ( is_open
= True
) , # 添加縮略地圖navigation_control_opts
= opts
. BMapNavigationControlOpts ( ) # 地圖的平移縮放控件
) #
. add_coordinate_json ( json_file
= 'json.json' ) . set_series_opts ( effect_opts
= opts
. EffectOpts ( is_show
= True
, color
= 'red' ) ) . render ( city
[ 0 ] + '地鐵線路圖.html' ) ) # print('ok1') # # map_b
. render ( city
[ 0 ] + '.html' ) # # bro
. execute_script ( js
. format ( city
[ 0 ] + '.html' ) ) data_yy
= list ( num_station_new
. values ( ) ) data
= num_station_newstation_sum
= 0 df_x
= [ ] df_y
= [ ] from collections import Counter
for i in city
[ 1 ] : station_sum
+ = data
[ i
] count
= dict ( Counter ( num_city_new
[ i
] ) ) df_x
+ = list ( count
. keys ( ) ) df_y
+ = list ( count
. values ( ) ) data_xy
= tuple ( zip ( df_x
, df_y
) ) data_xy
= sorted ( data_xy
, key
= lambda x
: x
[ 1 ] , reverse
= True
) data_xy
= dict ( data_xy
) from pyecharts import options as optsfrom pyecharts
. charts import Map#每個城市單獨的分布地圖
if len ( city
[ 1 ] ) != 1 : df_city_x
= [ i
+ '市' for i in city
[ 1 ] ] df_city_y
= [ num_station_new
[ i
] for i in city
[ 1 ] ] show_city
= ( Map ( init_opts
= opts
. InitOpts ( width
= "1700px" , height
= "760px" , theme
= ThemeType
. CHALK
) ) . add ( city
[ 0 ] , [ list ( z
) for z in
zip ( df_city_x
, df_city_y
) ] , maptype
= city
[ 0 ] ) . set_global_opts ( title_opts
= opts
. TitleOpts ( title
= city
[ 0 ] + "地鐵數據" , title_textstyle_opts
= opts
. TextStyleOpts ( font_size
= 20 ) ) , visualmap_opts
= opts
. VisualMapOpts ( max_
= max ( df_city_y
) , is_piecewise
= True
, range_color
= [ "lightskyblue" , "yellow" , "orangered" ] , range_text
= [ "High" , "Low" ] , textstyle_opts
= opts
. TextStyleOpts ( color
= 'write' , font_size
= 20 , font_family
= 'Microsoft YaHei' ) ) , legend_opts
= opts
. LegendOpts ( is_show
= False
) ) ) show_pie
= ( Pie ( init_opts
= opts
. InitOpts ( width
= "200px" , height
= "200px" , theme
= ThemeType
. DARK
) ) . add ( city
[ 0 ] + "各城市地鐵占比" , data_pair
= [ list ( i
) for i in
zip ( df_city_x
, df_city_y
) ] , radius
= [ "20%" , "30%" ] , center
= [ 1200 , 450 ] , label_opts
= opts
. LabelOpts ( position
= "outside" , formatter
= "{a|{a}}{abg|}\n{hr|}\n {b|{b}: }{c} {per|ze8trgl8bvbq%} " , background_color
= "#eee" , border_color
= "#aaa" , border_width
= 1 , border_radius
= 4 , rich
= { "a" : { "color" : "#999" , "lineHeight" : 22 , "align" : "center" } , "abg" : { "backgroundColor" : "#e3e3e3" , "width" : "100%" , "align" : "right" , "height" : 22 , "borderRadius" : [ 4 , 4 , 0 , 0 ] , } , "hr" : { "borderColor" : "blue" , "width" : "100%" , "borderWidth" : 0.5 , "height" : 0 , } , "b" : { "fontSize" : 16 , "lineHeight" : 33 } , "per" : { "color" : "#eee" , "backgroundColor" : "#334455" , "padding" : [ 2 , 4 ] , "borderRadius" : 2 , } , } , ) , ) . set_global_opts ( legend_opts
= opts
. LegendOpts ( is_show
= True
) ) ) else : show_city
= ( Map ( init_opts
= opts
. InitOpts ( width
= "1700px" , height
= "760px" , theme
= ThemeType
. DARK
) ) . add ( city
[ 0 ] , [ list ( z
) for z in
zip ( df_x
, df_y
) ] , maptype
= city
[ 0 ] ) . set_global_opts ( title_opts
= opts
. TitleOpts ( title
= city
[ 0 ] + "地鐵數據" ) , visualmap_opts
= opts
. VisualMapOpts ( max_
= max ( df_y
) , is_piecewise
= True
, range_color
= [ "lightskyblue" , "yellow" , "orangered" ] , range_text
= [ "High" , "Low" ] , textstyle_opts
= opts
. TextStyleOpts ( color
= 'write' , font_size
= 20 , font_family
= 'Microsoft YaHei' ) ) , legend_opts
= opts
. LegendOpts ( is_show
= False
) ) ) data_pie_y
= [ round ( i
/ sum ( df_y
) * 100 , 2 ) for i in df_y
] show_pie
= ( Pie ( init_opts
= opts
. InitOpts ( width
= "1530px" , height
= "684px" , theme
= ThemeType
. CHALK
) ) . add ( city
[ 0 ] + "各區地鐵占比" , data_pair
= [ list ( i
) for i in
zip ( df_x
, df_y
) ] , radius
= [ "40%" , "60%" ] , center
= [ 1200 , 450 ] , label_opts
= opts
. LabelOpts ( is_show
= True
, position
= "inside" , color
= 'black' ) , ) . set_global_opts ( legend_opts
= opts
. LegendOpts ( is_show
= False
) , title_opts
= opts
. TitleOpts ( title
= city
[ 0 ] + "各區地鐵站點占比" , pos_top
= 'top' , pos_right
= '10%' , title_textstyle_opts
= opts
. TextStyleOpts ( color
= 'purple' ) ) ) . set_series_opts ( tooltip_opts
= opts
. TooltipOpts ( trigger
= "item" , formatter
= "{a} <br/>{b}: {c} (ze8trgl8bvbq%)" ) , # label_opts=opts.LabelOpts(formatter="{b}: {c}") ) ) from pyecharts import options as optsfrom pyecharts
. charts import Liquidfrom pyecharts
. commons
. utils import JsCodeliquid
= ( Liquid ( init_opts
= opts
. InitOpts ( width
= "200px" , height
= "200px" , theme
= ThemeType
. CHALK
) ) . add ( city
[ 0 ] + '占比' , [ round ( station_sum
/ sum ( data_yy
) , 4 ) , 1 - round ( station_sum
/ sum ( data_yy
) , 4 ) ] , center
= [ 1200 , 450 ] , shape
= SymbolType
. ARROW
, label_opts
= opts
. LabelOpts ( font_size
= 40 , formatter
= JsCode ( "" "function
( param
) { return ( Math
. floor ( param
. value
* 10000 ) / 100 ) + '%' ; } "" "
) , position
= "inside" , ) , ) . set_global_opts ( title_opts
= opts
. TitleOpts ( title
= city
[ 0 ] + "地鐵站點在全國占比" , pos_top
= 'top' , pos_right
= '10%' , title_textstyle_opts
= opts
. TextStyleOpts ( color
= 'purple' ) ) ) ) from pyecharts import options as optsfrom pyecharts
. charts import Funnel
if len ( df_y
) > 15 : copy_y
= list ( data_xy
. values ( ) ) [ : 15 ] copy_x
= list ( data_xy
. keys ( ) ) [ : 15 ] else : copy_y
= list ( data_xy
. values ( ) ) copy_x
= list ( data_xy
. keys ( ) ) loudou
= ( Funnel ( init_opts
= opts
. InitOpts ( width
= "1530px" , height
= "684px" , theme
= ThemeType
. CHALK
) ) . add ( city
[ 0 ] + "地鐵" , [ list ( z
) for z in
zip ( copy_x
, copy_y
) ] , label_opts
= opts
. LabelOpts ( position
= "inside" ) , ) . set_global_opts ( legend_opts
= opts
. LegendOpts ( is_show
= True
) , visualmap_opts
= opts
. VisualMapOpts ( is_show
= True
, type_
= 'color' , max_
= max ( df_y
) , is_piecewise
= True
, textstyle_opts
= opts
. TextStyleOpts ( color
= 'write' , font_size
= 15 , font_family
= 'Microsoft YaHei' ) ) , title_opts
= opts
. TitleOpts ( title
= city
[ 0 ] + '地鐵分析' , title_textstyle_opts
= opts
. TextStyleOpts ( font_size
= 20 ) ) ) ) from pyecharts
. charts import Barshow_bar
= ( Bar ( init_opts
= opts
. InitOpts ( width
= "1700px" , height
= "760px" , theme
= ThemeType
. CHALK
) ) # 注意添加默認參數時是在init_opts參數中設置
. add_xaxis ( copy_x
) . add_yaxis ( '2021' + city
[ 0 ] + '地鐵分布' , copy_y
, itemstyle_opts
= opts
. ItemStyleOpts ( color
= 'blue' ) , label_opts
= opts
. LabelOpts ( is_show
= True
, position
= 'top' , formatter
= "{c}" , color
= 'Magenta4' ) ) # 顯示數據標簽
. set_global_opts ( # 對x軸標簽,y軸,標題,圖例的格式和類型進行修改
# datazoom_opts=opts.DataZoomOpts(is_show=True), visualmap_opts
= opts
. VisualMapOpts ( is_show
= True
, type_
= 'color' , max_
= max ( df_y
) , is_piecewise
= True
, textstyle_opts
= opts
. TextStyleOpts ( color
= 'write' , font_size
= 15 , font_family
= 'Microsoft YaHei' ) ) , xaxis_opts
= opts
. AxisOpts ( name
= 'City' , name_location
= 'middle' , name_gap
= 30 , # 與x軸線的距離
# name_Rorate設置旋轉角度 # x軸名稱的格式配置 name_textstyle_opts
= opts
. TextStyleOpts ( font_family
= 'Microsoft Yahei' , font_size
= 20 , ) , # 坐標軸刻度配置項axistick_opts
= opts
. AxisTickOpts ( is_show
= True
, # is_show=False, # 是否顯示 is_inside
= True
, # 刻度線是否在內側
) , # 坐標軸線的配置axisline_opts
= opts
. AxisLineOpts ( linestyle_opts
= opts
. LineStyleOpts ( width
= 1 , color
= 'black' , ) ) , axislabel_opts
= opts
. LabelOpts ( rotate
= 40 , font_size
= 12 , font_family
= 'Arial' , font_weight
= 'bold' ) , ) , yaxis_opts
= opts
. AxisOpts ( name
= 'station_number' , name_location
= 'middle' , name_gap
= 30 , name_textstyle_opts
= opts
. TextStyleOpts ( font_family
= 'Times New Roman' , font_size
= 20 , color
= 'black' , # font_weight='bolder', ) , axistick_opts
= opts
. AxisTickOpts ( is_show
= False
, # 是否顯示is_inside
= True
, # 刻度線是否在內側
) , axislabel_opts
= opts
. LabelOpts ( font_size
= 12 , font_family
= 'Times New Roman' , formatter
= "{value}" # y軸顯示方式以數據形式
) , splitline_opts
= opts
. SplitLineOpts ( is_show
= True
) , # y軸網格線axisline_opts
= opts
. AxisLineOpts ( is_show
= False
) , # y軸線
) , title_opts
= opts
. TitleOpts ( title
= city
[ 0 ] + "地鐵站點數量" , # 標題title_textstyle_opts
= opts
. TextStyleOpts ( font_size
= 20 ) , # 主標題字體大小
) , ) ) from pyecharts
. charts import Gridfrom pyecharts
. globals import SymbolType
, ThemeTypegrid1
= ( Grid ( init_opts
= opts
. InitOpts ( width
= "1530px" , height
= "684px" , theme
= ThemeType
. CHALK
) ) . add ( show_city
, grid_opts
= opts
. GridOpts ( pos_bottom
= '50%' , pos_right
= 'left' ) ) . add ( show_pie
, grid_opts
= opts
. GridOpts ( pos_bottom
= '50%' , pos_left
= '55%' ) ) #
. add ( liquid
, grid_opts
= opts
. GridOpts ( pos_top
= '60%' , pos_right
= '50%' , width
= '100px' , height
= '100px' ) ) ) grid2
= ( Grid ( init_opts
= opts
. InitOpts ( width
= "1530px" , height
= "684px" , theme
= ThemeType
. CHALK
) ) . add ( show_bar
, grid_opts
= opts
. GridOpts ( pos_right
= '55%' ) ) #
. add ( loudou
, grid_opts
= opts
. GridOpts ( pos_left
= '80%' ) ) . add ( liquid
, grid_opts
= opts
. GridOpts ( ) ) ) # print('ok5') from pyecharts
. charts import Tabshow_tab
= ( Tab ( ) . add ( grid2
, city
[ 0 ] + '地鐵數量情況' ) . add ( loudou
, city
[ 0 ] + '地鐵(<=15)' ) . add ( grid1
, city
[ 0 ] + '地鐵分布情況' ) ) show_tab
. render ( city
[ 0 ] + '.html' ) bro
. execute_script ( js
. format ( city
[ 0 ] + '.html' ) ) bro
. execute_script ( js
. format ( city
[ 0 ] + '地鐵線路圖' + '.html' ) ) bro
. switch_to
. window ( bro
. window_handles
[ 0 ] )
結果如下: 成都市數據 江蘇省的數據:(其他省同理) 主要還是說明了其地鐵規模和分布情況 4.總結: 本次項目實際的分析比較少,主要是站點和其城市的分析,比較表層,沒有深入。比如數據集其實可以加入不同年份的數據進行比較,或者加入不同城市的地鐵帶來的收入,不同城市人們對地鐵的滿意度,等等數據內容,出于網上資源有限所以沒有能找到可用數據。本數據集更多側重在可視化上,實際的原因分析比較少。
總結
以上是生活随笔 為你收集整理的全国地铁城市数据分析(python实现) 的全部內容,希望文章能夠幫你解決所遇到的問題。
如果覺得生活随笔 網站內容還不錯,歡迎將生活随笔 推薦給好友。