pythonmysql数据分析 tableau_python执行mysql 计算复购率+pyechart+Excel+Tableau绘制双Y轴图...
現有某超市的訂單數據,內容如下:
先求每個會員在每個月的訂單數
sql = ''' --
select MemberID, count(SheetID) AS 每個會員的訂單數,substring(STime,1,7) as YearMonth
from OrderList
where MemberID IS NOT NULL
group by MemberID ,YearMonth
order by YearMonth
;
'''
df = pd.read_sql_query(sql, engine)
df.head(8)
給上表新增一列,判斷其是否在本月多次購買
select temp.YearMonth,temp.MemberID, temp.每個會員的訂單數,
case when temp.每個會員的訂單數>1 then 1 else null
end as 是否在本月多次購買
from
(
select MemberID, count(SheetID) AS 每個會員的訂單數,substring(STime,1,7) as YearMonth
from OrderList
where MemberID IS NOT NULL
group by MemberID ,YearMonth
order by YearMonth
) temp
;
再對上表進行分組統計多少會員是復購會員
sql = ''' -- 使用 count(temp_out.是否在本月多次購買) 也可以
select temp_out.YearMonth ,count(temp_out.MemberID) AS 本月購物的會員數, sum(temp_out.是否在本月多次購買) as 復購會員人數,
sum(temp_out.是否在本月多次購買)/count(temp_out.MemberID) as 占比
from (
select temp.YearMonth,temp.MemberID, temp.每個會員的訂單數,
case when temp.每個會員的訂單數>1 then 1 else null
end as 是否在本月多次購買
from
(
select MemberID, count(SheetID) AS 每個會員的訂單數,substring(STime,1,7) as YearMonth
from OrderList
where MemberID IS NOT NULL
group by MemberID ,YearMonth
order by YearMonth
) temp
) temp_out
group by temp_out.YearMonth
;
'''
df = pd.read_sql_query(sql, engine)
df
上面的方法嵌套了三層子表:orderlist本身, temp , temp_out,實際上可以不用產生 是否在本月多次購買的子表,即上表的temp表,可以使用 sum(case when 每個會員的訂單數 >1 else 0 end )統計,這樣就少產生了一個子表。
sql = '''
select temp.YearMonth ,count(temp.MemberID) AS 本月購物的會員數,
sum(case when temp.每個會員的訂單數 >1 then 1 else 0 end) as 復購會員人數,
sum(case when temp.每個會員的訂單數 >1 then 1 else 0 end)/count(temp.MemberID) as 占比
from
(
select MemberID, count(SheetID) AS 每個會員的訂單數,substring(STime,1,7) as YearMonth
from OrderList
where MemberID IS NOT NULL
group by MemberID ,YearMonth
order by YearMonth
) temp
group by temp.YearMonth
;
'''
df = pd.read_sql_query(sql, engine)
df.tail(24).to_csv('每月的復購率.csv',encoding='utf_8_sig')
df.head(8)
使用pyecharts 0.5畫圖
#0.5版本
import pyecharts
from pyecharts import Overlap, Bar, Line, Grid, EffectScatter
grid = Grid()
v1 = list( df.tail(24)['本月購物的會員數'].values)
v2 = list(df.tail(24)['復購會員人數'].values)
v3 = list( df.tail(24)['占比'].values*100) #
my_attr = list(df.tail(24)['YearMonth'].values) # ["{}號".format(i) for i in range(1, len(v1)+1)] #attr =
bar = Bar(title="DeepWind超市(南沙區)", title_pos="20%")
bar.add("會員人數", my_attr, v1)
bar.add("復購人數",my_attr,v2,yaxis_formatter=" 人",
yaxis_max=5200,
legend_pos="25%",
legend_orient="horizontal",
legend_top="15%",
)
overlap = Overlap(width=1200, height=600)
overlap.add(bar)
line = Line()
line.add("復購人數占比", my_attr, v3, yaxis_formatter=" %",yaxis_max=100)
es = EffectScatter()
#overlap = Overlap(width=1200, height=600)
overlap.add(line, is_add_yaxis=True, yaxis_index=1)
#es.add("", my_attr, v3, effect_scale=8,is_add_yaxis=True, yaxis_index=1,yaxis_max=4000)
#overlap.add(es)
grid.add(overlap, grid_right="20%")
grid.render()
overlap.render()
#bar
grid
使用pyecharts1.6 畫圖
import pyecharts.options as opts
from pyecharts.charts import Bar, Line
from pyecharts.globals import ThemeType
v1 = list( df.tail(24)['本月購物的會員數'].values)
v2 = list(df.tail(24)['復購會員人數'].values)
v3 = list( df.tail(24)['占比'].values*100) #
x_data= list(df.tail(24)['YearMonth'].values)
v1 = [int(each) for each in v1]
v2 = [int(each) for each in v2]
v3 = [int(each) for each in v3]
x_data= [str(each) for each in x_data]
bar = (
Bar(init_opts=opts.InitOpts(width="800px", height="400px",theme=ThemeType.DARK))
.add_xaxis(xaxis_data=x_data)
.add_yaxis(
series_name="會員人數",
yaxis_data= v1,
label_opts=opts.LabelOpts(is_show=False),
markpoint_opts=opts.MarkPointOpts(
data=[
opts.MarkPointItem(type_="max", name="最大值"),
opts.MarkPointItem(type_="min", name="最小值"),
]
),
)
.add_yaxis(
series_name="復購人數",
yaxis_data= v2,
label_opts=opts.LabelOpts(is_show=False),
markpoint_opts=opts.MarkPointOpts(
data=[
opts.MarkPointItem(type_="max", name="最大值"),
opts.MarkPointItem(type_="min", name="最小值"),
]
),
)
.extend_axis(
yaxis=opts.AxisOpts(
name="占比",
type_="value",
min_=50,
max_=100,
interval=10,
axislabel_opts=opts.LabelOpts(formatter="{value} %"),
)
)
.set_global_opts(
tooltip_opts=opts.TooltipOpts(
is_show=True, trigger="axis", axis_pointer_type="cross"
),
xaxis_opts=opts.AxisOpts(
type_="category",
axispointer_opts=opts.AxisPointerOpts(is_show=True, type_="shadow"),
),
yaxis_opts=opts.AxisOpts(
name="人數",
type_="value",
min_=0,
max_=5500,
interval=500,
axislabel_opts=opts.LabelOpts(formatter="{value}人"),
axistick_opts=opts.AxisTickOpts(is_show=True),
splitline_opts=opts.SplitLineOpts(is_show=True),
),
)
)
line = (
Line()
.add_xaxis(xaxis_data=x_data)
.add_yaxis(
series_name="占比",
yaxis_index=1,
y_axis=v3,
label_opts=opts.LabelOpts(is_show=False),
markpoint_opts=opts.MarkPointOpts(
data=[
opts.MarkPointItem(type_="max", name="最大值"),
opts.MarkPointItem(type_="min", name="最小值"),
]
),
)
)
bar.overlap(line).render("復購人數.html")
bar.overlap(line).render_notebook()#render("mixed_bar_and_line.html")
使用Excel畫雙Y軸圖
此時只有一個Y軸,需要對占比重新生成一個列。
設置數據系列格式22.jpg
得到下圖
最終得到
g
使用Tableau畫雙軸圖
將得到
總結
以上是生活随笔為你收集整理的pythonmysql数据分析 tableau_python执行mysql 计算复购率+pyechart+Excel+Tableau绘制双Y轴图...的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: vc设置窗口在另一个窗口前面_日常办公如
- 下一篇: slf4j 如何返回堆栈_重学JS系列