python機器學習使數據更鮮活的可視化工具Pandas_Alive
數據動畫可視化制作在日常工作中是非常實用的一項技能。目前支持動畫可視化的庫主要以Matplotlib-Animation為主,其特點為:配置復雜,保存動圖容易報錯。
安裝方法
pip install pandas_alive # 或者 conda install pandas_alive -c conda-forge
使用說明
pandas_alive 的設計靈感來自 bar_chart_race,為方便快速進行動畫可視化制作,在數據的格式上需要滿足如下條件:
- 每行表示單個時間段
- 每列包含特定類別的值
- 索引包含時間組件(可選)
支持示例展示
水平條形圖
import pandas_alive covid_df = pandas_alive.load_dataset() covid_df.plot_animated(filename='examples/perpendicular-example.gif',perpendicular_bar_func='mean')
垂直條形圖比賽
import pandas_alive covid_df = pandas_alive.load_dataset() covid_df.plot_animated(filename='examples/example-barv-chart.gif',orientation='v')
條形圖
與時間與 x 軸一起顯示的折線圖類似
import pandas_alive covid_df = pandas_alive.load_dataset() covid_df.sum(axis=1).fillna(0).plot_animated(filename='examples/example-bar-chart.gif',kind='bar', period_label={'x':0.1,'y':0.9}, enable_progress_bar=True, steps_per_period=2, interpolate_period=True, period_length=200 )
餅圖
import pandas_alive covid_df = pandas_alive.load_dataset() covid_df.plot_animated(filename='examples/example-pie-chart.gif',kind="pie",rotatelabels=True,period_label={'x':0,'y':0})
多邊形地理空間圖
import geopandas import pandas_alive import contextily gdf = geopandas.read_file('data/italy-covid-region.gpkg') gdf.index = gdf.region gdf = gdf.drop('region',axis=1) map_chart = gdf.plot_animated(filename='examples/example-geo-polygon-chart.gif',basemap_format={'source':contextily.providers.Stamen.Terrain})
多個圖表
pandas_alive 支持單個可視化中的多個動畫圖表。
示例1
import pandas_alive urban_df = pandas_alive.load_dataset("urban_pop") animated_line_chart = ( urban_df.sum(axis=1) .pct_change() .fillna(method='bfill') .mul(100) .plot_animated(kind="line", title="Total % Change in Population",period_label=False,add_legend=False) ) animated_bar_chart = urban_df.plot_animated(n_visible=10,title='Top 10 Populous Countries',period_fmt="%Y") pandas_alive.animate_multiple_plots('examples/example-bar-and-line-urban-chart.gif',[animated_bar_chart,animated_line_chart], title='Urban Population 1977 - 2018', adjust_subplot_top=0.85, enable_progress_bar=True)
示例2
import pandas_alive covid_df = pandas_alive.load_dataset() animated_line_chart = covid_df.diff().fillna(0).plot_animated(kind='line',period_label=False,add_legend=False) animated_bar_chart = covid_df.plot_animated(n_visible=10) pandas_alive.animate_multiple_plots('examples/example-bar-and-line-chart.gif',[animated_bar_chart,animated_line_chart], enable_progress_bar=True)
示例3
import pandas_alive import pandas as pd data_raw = pd.read_csv( "https://raw.githubusercontent.com/owid/owid-datasets/master/datasets/Long%20run%20life%20expectancy%20-%20Gapminder%2C%20UN/Long%20run%20life%20expectancy%20-%20Gapminder%2C%20UN.csv" ) list_G7 = [ "Canada", "France", "Germany", "Italy", "Japan", "United Kingdom", "United States", ] data_raw = data_raw.pivot( index="Year", columns="Entity", values="Life expectancy (Gapminder, UN)" ) data = pd.DataFrame() data["Year"] = data_raw.reset_index()["Year"] for country in list_G7: data[country] = data_raw[country].values data = data.fillna(method="pad") data = data.fillna(0) data = data.set_index("Year").loc[1900:].reset_index() data["Year"] = pd.to_datetime(data.reset_index()["Year"].astype(str)) data = data.set_index("Year") animated_bar_chart = data.plot_animated( period_fmt="%Y",perpendicular_bar_func="mean", period_length=200,fixed_max=True ) animated_line_chart = data.plot_animated( kind="line", period_fmt="%Y", period_length=200,fixed_max=True ) pandas_alive.animate_multiple_plots( "examples/life-expectancy.gif", plots=[animated_bar_chart, animated_line_chart], title="Life expectancy in G7 countries up to 2015", adjust_subplot_left=0.2, adjust_subplot_top=0.9, enable_progress_bar=True )
示例4
import geopandas import pandas as pd import pandas_alive import contextily import matplotlib.pyplot as plt import urllib.request, json with urllib.request.urlopen( "https://data.nsw.gov.au/data/api/3/action/package_show?id=aefcde60-3b0c-4bc0-9af1-6fe652944ec2" ) as url: data = json.loads(url.read().decode()) # Extract url to csv component covid_nsw_data_url = data["result"]["resources"][0]["url"] # Read csv from data API url nsw_covid = pd.read_csv(covid_nsw_data_url) postcode_dataset = pd.read_csv("data/postcode-data.csv") # Prepare data from NSW health dataset nsw_covid = nsw_covid.fillna(9999) nsw_covid["postcode"] = nsw_covid["postcode"].astype(int) grouped_df = nsw_covid.groupby(["notification_date", "postcode"]).size() grouped_df = pd.DataFrame(grouped_df).unstack() grouped_df.columns = grouped_df.columns.droplevel().astype(str) grouped_df = grouped_df.fillna(0) grouped_df.index = pd.to_datetime(grouped_df.index) cases_df = grouped_df # Clean data in postcode dataset prior to matching grouped_df = grouped_df.T postcode_dataset = postcode_dataset[postcode_dataset['Longitude'].notna()] postcode_dataset = postcode_dataset[postcode_dataset['Longitude'] != 0] postcode_dataset = postcode_dataset[postcode_dataset['Latitude'].notna()] postcode_dataset = postcode_dataset[postcode_dataset['Latitude'] != 0] postcode_dataset['Postcode'] = postcode_dataset['Postcode'].astype(str) # Build GeoDataFrame from Lat Long dataset and make map chart grouped_df['Longitude'] = grouped_df.index.map(postcode_dataset.set_index('Postcode')['Longitude'].to_dict()) grouped_df['Latitude'] = grouped_df.index.map(postcode_dataset.set_index('Postcode')['Latitude'].to_dict()) gdf = geopandas.GeoDataFrame( grouped_df, geometry=geopandas.points_from_xy(grouped_df.Longitude, grouped_df.Latitude),crs="EPSG:4326") gdf = gdf.dropna() # Prepare GeoDataFrame for writing to geopackage gdf = gdf.drop(['Longitude','Latitude'],axis=1) gdf.columns = gdf.columns.astype(str) gdf['postcode'] = gdf.index gdf.to_file("data/nsw-covid19-cases-by-postcode.gpkg", layer='nsw-postcode-covid', driver="GPKG") # Prepare GeoDataFrame for plotting gdf.index = gdf.postcode gdf = gdf.drop('postcode',axis=1) gdf = gdf.to_crs("EPSG:3857") #Web Mercator map_chart = gdf.plot_animated(basemap_format={'source':contextily.providers.Stamen.Terrain},cmap='cool') cases_df.to_csv('data/nsw-covid-cases-by-postcode.csv') from datetime import datetime bar_chart = cases_df.sum(axis=1).plot_animated( kind='line', label_events={ 'Ruby Princess Disembark':datetime.strptime("19/03/2020", "%d/%m/%Y"), 'Lockdown':datetime.strptime("31/03/2020", "%d/%m/%Y") }, fill_under_line_color="blue", add_legend=False ) map_chart.ax.set_title('Cases by Location') grouped_df = pd.read_csv('data/nsw-covid-cases-by-postcode.csv', index_col=0, parse_dates=[0]) line_chart = ( grouped_df.sum(axis=1) .cumsum() .fillna(0) .plot_animated(kind="line", period_label=False, title="Cumulative Total Cases", add_legend=False) ) def current_total(values): total = values.sum() s = f'Total : {int(total)}' return {'x': .85, 'y': .2, 's': s, 'ha': 'right', 'size': 11} race_chart = grouped_df.cumsum().plot_animated( n_visible=5, title="Cases by Postcode", period_label=False,period_summary_func=current_total ) import time timestr = time.strftime("%d/%m/%Y") plots = [bar_chart, line_chart, map_chart, race_chart] from matplotlib import rcParams rcParams.update({"figure.autolayout": False}) # make sure figures are `Figure()` instances figs = plt.Figure() gs = figs.add_gridspec(2, 3, hspace=0.5) f3_ax1 = figs.add_subplot(gs[0, :]) f3_ax1.set_title(bar_chart.title) bar_chart.ax = f3_ax1 f3_ax2 = figs.add_subplot(gs[1, 0]) f3_ax2.set_title(line_chart.title) line_chart.ax = f3_ax2 f3_ax3 = figs.add_subplot(gs[1, 1]) f3_ax3.set_title(map_chart.title) map_chart.ax = f3_ax3 f3_ax4 = figs.add_subplot(gs[1, 2]) f3_ax4.set_title(race_chart.title) race_chart.ax = f3_ax4 timestr = cases_df.index.max().strftime("%d/%m/%Y") figs.suptitle(f"NSW COVID-19 Confirmed Cases up to {timestr}") pandas_alive.animate_multiple_plots( 'examples/nsw-covid.gif', plots, figs, enable_progress_bar=True )
總結
Pandas_Alive 是一款非常好玩、實用的動畫可視化制圖工具,以上就是python機器學習使數據更鮮活的可視化工具Pandas_Alive的詳細內容,更多關於python機器學習可視化工具Pandas_Alive的資料請關註WalkonNet其它相關文章!
推薦閱讀:
- Python繪制數據動態圖的方法詳解
- Python數據可視化探索實例分享
- Python機器學習三大件之二pandas
- python使用Streamlit庫制作Web可視化頁面
- Pandas高級教程之Pandas中的GroupBy操作