Making choropleths is fun. I will leave at that! Want more explanation? Follow this link.
In this post I will show three way to create choropleths with Python. The Good, the Bad and the Ugly (or in the original: Il buono, il brutto, il cattivo)!
The Ugly: geopandas
Choropleths with geopandas is exactly like plotting with pandas:
very convenient, but hard to customize. There isn't an easy way to make
the plot look good. I had a weird issue when trying to plot with geopandas
over a matplotlib ax
instance. However, if your goal is quick visualization,
geopandas is your friend.
import string
import unicodedata
import geopandas
from pandas import read_csv
def remove_accents(s):
"""Python 3 please!"""
return ''.join(x for x in unicodedata.normalize('NFKD', s)
if x in string.ascii_letters).lower()
geo_path = './data/br_states.json'
gdf = geopandas.read_file(geo_path)
gdf['name'] = gdf['name'].str.encode('utf-8')
gdf['name'] = [remove_accents(s.decode('utf-8')) for s in gdf['name']]
df = read_csv('./data/pfas.csv')
df['name'] = [remove_accents(s.decode('utf-8')) for s in df['name']]
gdf.sort('name', inplace=True)
df.sort('name', inplace=True)
gdf['2013'] = df['2013'].values
kw = dict(column='2013', k=6, colormap='YlGn')
ax = gdf.plot(scheme='QUANTILES', **kw)
That is it! We spent more lines loading and cleaning the data than plotting. Note that we can change the classification schemes. I prefer the Fisher Jenks scheme to enhance the color differences. Be aware that in some cases you have to be conservative and use an equal interval. Note that you will need PySAL to use the classification schemes.
ax = gdf.plot(scheme='fisher_jenks', **kw)
ax = gdf.plot(scheme='equal_interval', **kw)
The Good: If you want to publish that figure in a paper you will need to tweak it a little bit with matplotlib and cartopy.
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
from pysal.esda.mapclassify import Fisher_Jenks
def norm_cmap(values, cmap, vmin=None, vmax=None):
"""
Normalize and set colormap
Parameters
----------
values : Series or array to be normalized
cmap : matplotlib Colormap
normalize : matplotlib.colors.Normalize
cm : matplotlib.cm
vmin : Minimum value of colormap. If None, uses min(values).
vmax : Maximum value of colormap. If None, uses max(values).
Returns
-------
n_cmap : mapping of normalized values to colormap (cmap)
"""
mn = vmin or min(values)
mx = vmax or max(values)
norm = Normalize(vmin=mn, vmax=mx)
n_cmap = plt.cm.ScalarMappable(norm=norm, cmap=cmap)
return n_cmap
fj = Fisher_Jenks(df['2013'].values, k=6)
cmap = norm_cmap(fj.yb, cmap='YlGn')
df['color'] = [cmap.to_rgba(value) for value in df['2013'].values]
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
from cartopy.io import shapereader
states = df['name'].str.decode('utf-8').tolist()
kw = dict(resolution='50m', category='cultural',
name='admin_1_states_provinces')
states_shp = shapereader.natural_earth(**kw)
shp = shapereader.Reader(states_shp)
subplot_kw = dict(projection=ccrs.PlateCarree())
fig, ax = plt.subplots(figsize=(5, 5),
subplot_kw=subplot_kw)
ax.set_extent([-82, -32, -45, 10])
ax.background_patch.set_visible(False)
ax.outline_patch.set_visible(False)
for record, state in zip(shp.records(), shp.geometries()):
name = record.attributes['name'].decode('latin-1')
name = remove_accents(name)
if name in states:
facecolor = df[df['name'] == name]['color']
ax.add_geometries([state], ccrs.PlateCarree(),
facecolor=facecolor, edgecolor='black')
cmap = plt.cm.ScalarMappable(cmap='YlGn')
cmap.set_array([])
cmap.set_clim(-0.5, 6+0.5)
bounds = [0, 1, 2, 3, 4, 5, 6]
norm = plt.cm.colors.BoundaryNorm(bounds, cmap.cmap.N)
kw = dict(orientation='vertical', extend='both', cmap=cmap,
norm=norm, boundaries=[0]+bounds+[13],ticks=bounds,
spacing='proportional')
cax = fig.add_axes([0.4, 0.35, 0.015, 0.2])
cbar = plt.colorbar(cmap, cax=cax, **kw)
fig.savefig('choropleth.png', dpi=150, transparent=True)
plt.close(fig)
import numpy as np
from PIL import Image, ImageChops
def trim(img):
border = Image.new(img.mode, img.size, img.getpixel((0, 0)))
diff = ImageChops.difference(img, border)
diff = ImageChops.add(diff, diff, 2.0, -100)
bbox = diff.getbbox()
if bbox:
img = img.crop(bbox)
return img
img = Image.open('choropleth.png')
img = trim(img)
After some last trimming we have a much nicer figure to put on a paper! The customizations are natural to matplotlib users.
img
The bad·ass: Want it to be pretty? Easy? Web friendly? And on top of it all interactive? Not a problem! Folium has your back.
import json
import folium
import numpy as np
geo_str = json.dumps(json.load(open(geo_path, 'r')))
threshold_scale = np.linspace(df['2013'].min(),
df['2013'].max(), 6, dtype=int).tolist()
mapa = folium.Map(location=[-15.80, -47.88],
tiles="Mapbox Bright",
zoom_start=3)
mapa.geo_json(geo_str=geo_str,
data=df,
columns=['state', '2013'],
fill_color='YlGn',
key_on='feature.id',
threshold_scale=threshold_scale)
mapa