avatar

基于 Jupyter Notebook 和Plotly的交互式COVID-19实时追踪可视化系统(上)

基于 Jupyter Notebook 和Plotly的交互式COVID-19实时追踪可视化系统(上)

2019年末,一种新型冠状病毒在中国湖北武汉爆发,此病毒目前被命名为严重急性呼吸综合征冠状病毒2(SARS-CoV-2)。该疫情目前已经蔓延到中国各个省份以及213个国家和地区,截止至2020年5月31日全球累计确诊人数现已超过600万。Michael Freeborn开发了一个在线的交互式仪表盘用于实时可视化和追踪2019新型冠状病毒疫情(COVID-19)的确诊病例。

1.项目准备

1
2
from datetime import datetime, timezone
f"Last updated: {datetime.now(tz=timezone.utc):%d %B %Y %H:%M:%S %Z}"
1
2
3
4
5
6
7
8
9
10
import re
from datetime import datetime

import numpy as np
import pandas as pd
import plotly.graph_objects as go
from IPython.display import display
from plotly.subplots import make_subplots

pd.options.display.max_columns = 12
1
2
3
4
5
6
7
date_pattern = re.compile(r"\d{1,2}/\d{1,2}/\d{2}")
def reformat_dates(col_name: str) -> str:
#对于作为日期的列,以日/月/年格式输出
try:
return date_pattern.sub(datetime.strptime(col_name, "%m/%d/%y").strftime("%d/%m/%Y"), col_name, count=1)
except ValueError:
return col_name
1
2
3
#此github仓库包含所有冠状病毒病例的时间序列数据:https://github.com/CSSEGISandData/COVID-19 
confirmed_cases_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
deaths_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv"

2.整体图表

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
renamed_columns_map = {
"Country/Region": "country",
"Province/State": "location",
"Lat": "latitude",
"Long": "longitude"
}

cols_to_drop = ["location", "latitude", "longitude"]

confirmed_cases_df = (
pd.read_csv(confirmed_cases_url)
.rename(columns=renamed_columns_map)
.rename(columns=reformat_dates)
.drop(columns=cols_to_drop)
)
deaths_df = (
pd.read_csv(deaths_url)
.rename(columns=renamed_columns_map)
.rename(columns=reformat_dates)
.drop(columns=cols_to_drop)
)

display(confirmed_cases_df.head())
display(deaths_df.head())

1
2
3
4
5
6
7
8
9
10
#仅提取相关的地理数据,然后将其加入另一个具有国家/地区代码的.csv。
#所需的绘图功能的国家代码来标识在地图上的国家
geo_data_df = confirmed_cases_df[["country"]].drop_duplicates()
country_codes_df = (
pd.read_csv(
"country_code_mapping.csv",
usecols=["country", "alpha-3_code"],
index_col="country")
)
geo_data_df = geo_data_df.join(country_codes_df, how="left", on="country").set_index("country")
1
2
3
4
5
#我的国家/地区代码.csv文件和COVID-19数据源在某些国家/地区的名称上存在分歧。这
#数据框应该是空的,否则就意味着我需要修改国名在.csv匹配
geo_data_df[(pd.isnull(geo_data_df["alpha-3_code"])) & (~geo_data_df.index.isin(
["Diamond Princess", "MS Zaandam", "West Bank and Gaza"]
))]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
dates_list = (
deaths_df.filter(regex=r"(\d{2}/\d{2}/\d{4})", axis=1)
.columns
.to_list()
)
#创建日期的映射- >数据帧,其中,每个DF保持箱子每个国家的每日计数和死亡
cases_by_date = {}
for date in dates_list:
confirmed_cases_day_df = (
confirmed_cases_df
.filter(like=date, axis=1)
.rename(columns=lambda col: "confirmed_cases")
)
deaths_day_df = deaths_df.filter(like=date, axis=1).rename(columns=lambda col: "deaths")
cases_df = confirmed_cases_day_df.join(deaths_day_df).set_index(confirmed_cases_df["country"])

date_df = (
geo_data_df.join(cases_df)
.groupby("country")
.agg({"confirmed_cases": "sum", "deaths": "sum", "alpha-3_code": "first"})
)
date_df = date_df[date_df["confirmed_cases"] > 0].reset_index()

cases_by_date[date] = date_df
#每一天的数据框看起来是这样的:
cases_by_date[dates_list[-1]].head()

1
2
3
4
5
6
7
8
#当我们为地图动画制作帧时的辅助函数
def frame_args(duration):
return {
"frame": {"duration": duration},
"mode": "immediate",
"fromcurrent": True,
"transition": {"duration": duration, "easing": "linear"},
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
fig = make_subplots(rows=2, cols=1, specs=[[{"type": "scattergeo"}], [{"type": "xy"}]], row_heights=[0.8, 0.2])

#设置地理数据,滑块,播放和暂停按钮以及标题
fig.layout.geo = {"showcountries": True}
fig.layout.sliders = [{"active": 0, "steps": []}]
fig.layout.updatemenus = [
{
"type": "buttons",
"buttons": [
{
"label": "▶", # play symbol
"method": "animate",
"args": [None, frame_args(100)],
},
{
"label": "◼",
"method": "animate", # stop symbol
"args": [[None], frame_args(0)],
},
],
"showactive": False,
"direction": "left",
}
]
fig.layout.title = {"text": "Covid-19 Global Case Tracker", "x": 0.5}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
frames = []
steps = []
#设置颜色条刻度值,范围从1到最大数。确诊病例任何国家迄今
max_country_confirmed_cases = cases_by_date[dates_list[-1]]["confirmed_cases"].max()

#考虑到案例数量的显着差异,我们希望标度为对数
high_tick = np.log1p(max_country_confirmed_cases)
low_tick = np.log1p(1)
log_tick_values = np.geomspace(low_tick, high_tick, num=6)

#但是,我们希望尺度上的/ labels /是实际的案例数(即不是log(n_cases))
visual_tick_values = np.expm1(log_tick_values).astype(int)
#由于舍入误差- #明确设置最大CBAR值,否则它可能是最大
visual_tick_values[-1] = max_country_confirmed_cases
visual_tick_values = [f"{val:,}" for val in visual_tick_values]

#生成折线图数据
元组的列表#:[(confirmed_cases,死亡),...]
cases_deaths_totals = [(df.filter(like="confirmed_cases").astype("uint32").agg("sum")[0],
df.filter(like="deaths").astype("uint32").agg("sum")[0])
for df in cases_by_date.values()]

confirmed_cases_totals = [daily_total[0] for daily_total in cases_deaths_totals]
deaths_totals =[daily_total[1] for daily_total in cases_deaths_totals]


#该循环为每个帧生成数据
for i, (date, data) in enumerate(cases_by_date.items(), start=1):
df = data

#z比例尺(用于计算每个国家的颜色)需要为对数
df["confirmed_cases_log"] = np.log1p(df["confirmed_cases"])

df["text"] = (
date
+ "<br>"
+ df["country"]
+ "<br>Confirmed cases: "
+ df["confirmed_cases"].apply(lambda x: "{:,}".format(x))
+ "<br>Deaths: "
+ df["deaths"].apply(lambda x: "{:,}".format(x))
)

#创建Choropleth图表
choro_trace = go.Choropleth(
**{
"locations": df["alpha-3_code"],
"z": df["confirmed_cases_log"],
"zmax": high_tick,
"zmin": low_tick,
"colorscale": "reds",
"colorbar": {
"ticks": "outside",
"ticktext": visual_tick_values,
"tickmode": "array",
"tickvals": log_tick_values,
"title": {"text": "<b>Confirmed Cases</b>"},
"len": 0.8,
"y": 1,
"yanchor": "top"
},
"hovertemplate": df["text"],
"name": "",
"showlegend": False
}
)

#创建已确认的案例trace
confirmed_cases_trace = go.Scatter(
x=dates_list,
y=confirmed_cases_totals[:i],
mode="markers" if i == 1 else "lines",
name="Total Confirmed Cases",
line={"color": "Red"},
hovertemplate="%{x}<br>Total confirmed cases: %{y:,}<extra></extra>"
)

#创建死亡跟踪
deaths_trace = go.Scatter(
x=dates_list,
y=deaths_totals[:i],
mode="markers" if i == 1 else "lines",
name="Total Deaths",
line={"color": "Black"},
hovertemplate="%{x}<br>Total deaths: %{y:,}<extra></extra>"
)

if i == 1:
#第一帧是个什么人物最初显示..
fig.add_trace(choro_trace, row=1, col=1)
fig.add_traces([confirmed_cases_trace, deaths_trace], rows=[2, 2], cols=[1, 1])
#...和所有其他帧被附加到`frames`列表和滑块
frames.append({"data": [choro_trace, confirmed_cases_trace, deaths_trace], "name": date})

steps.append(
{"args": [[date], frame_args(50)], "label": date, "method": "animate",}
)

#整理轴和最终确定图表准备好用于显示
fig.update_xaxes(range=[0, len(dates_list)-1], visible=False)
fig.update_yaxes(range=[0, max(confirmed_cases_totals)])
fig.frames = frames
fig.layout.sliders[0].steps = steps
fig.layout.geo.domain = {"x": [0,1], "y": [0.2, 1]}
fig.update_layout(
height=650,
legend={"x": 0.05, "y": 0.175, "yanchor": "top", "bgcolor": "rgba(0, 0, 0, 0)"})
fig

文章作者: CodeHao
文章链接: http://codehao.top/cl1c6w8x40026jkla6avebg73/
版权声明: 本博客所有文章除特别声明外,均采用 CC BY-NC-SA 4.0 许可协议。转载请注明来自 CodeHao's Blog
打赏
  • 微信
    微信
  • 支付宝
    支付宝

评论
简体中文