I'm re-factoring the python-oceans module. The first step was to create another module just for the CTD tools (python-ctd)
The new module is already at PyPI
and the API will remain the same as before.
To install it type:
pip install ctd
Now we can import the module:
from ctd import DataFrame, Series, lp_filter, plot_section, movingaverage
import re
import os
from glob import glob
from collections import OrderedDict
import gsw
import numpy as np
import matplotlib.pyplot as plt
from pandas import Panel
from matplotlib.patches import Polygon
from mpl_toolkits.basemap import Basemap
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from oceans.datasets import etopo_subset
from oceans.sw_extras import gamma_GP_from_SP_pt
Let's define a function to streamline the CTD data processing.
def alphanum_key(s):
key = re.split(r"(\d+)", s)
key[1::2] = map(int, key[1::2])
return key
def derive_cnv(self):
"""Compute SP, SA, CT, z, and GP from a cnv pre-processed cast."""
cast = self.copy() # FIXME: Use MetaDataFrame to propagate lon, lat.
p = cast.index.values.astype(float)
cast['SP'] = gsw.SP_from_C(cast['c0S/m'].values * 10., cast['t090C'].values, p)
cast['SA'] = gsw.SA_from_SP(cast['SP'].values, p, self.lon, self.lat)
cast['SR'] = gsw.SR_from_SP(cast['SP'].values)
cast['CT'] = gsw.CT_from_t(cast['SA'].values, cast['t090C'].values, p)
cast['z'] = -gsw.z_from_p(p, self.lat)
cast['sigma0_CT'] = gsw.sigma0_CT_exact(cast['SA'].values, cast['CT'].values)
return cast
def proc_ctd(fname, compression='gzip', below_water=True):
# 00-Split, clean 'bad pump' data, and apply flag.
cast = DataFrame.from_cnv(fname, compression=compression,
below_water=below_water).split()[0]
cast = cast[cast['pumps']]
cast = cast[~cast['flag']] # True for bad values.
name = os.path.basename(fname).split('.')[0]
# Removed unwanted columns.
keep = set(['altM', 'c0S/m', 'dz/dtM', 'wetCDOM', 'latitude',
'longitude', 'sbeox0Mm/Kg', 'sbeox1Mm/Kg', 'oxsolMm/Kg',
'oxsatMm/Kg', 'par', 'pla', 'sva', 't090C', 't190C', 'tsa',
'sbeox0V'])
null = map(cast.pop, keep.symmetric_difference(cast.columns))
del null
# Smooth velocity with a 2 seconds windows.
cast['dz/dtM'] = movingaverage(cast['dz/dtM'], window_size=48)
# 01-Filter pressure.
kw = dict(sample_rate=24.0, time_constant=0.15)
cast.index = lp_filter(cast.index, **kw)
# 02-Remove pressure reversals.
cast = cast.press_check()
cast = cast.dropna()
# 03-Loop Edit.
cast = cast[cast['dz/dtM'] >= 0.25] # Threshold velocity.
# 04-Remove spikes.
kw = dict(n1=2, n2=20, block=100)
cast = cast.apply(Series.despike, **kw)
# 05-Bin-average.
cast = cast.apply(Series.bindata, **dict(delta=1.))
# 06-interpolate.
cast = cast.apply(Series.interpolate)
if True: # 07-Smooth.
pmax = max(cast.index)
if pmax >= 500.:
window_len = 21
elif pmax >= 100.:
window_len = 11
else:
window_len = 5
kw = dict(window_len=window_len, window='hanning')
cast = cast.apply(Series.smooth, **kw)
# Add metadata to the DataFrame.
cast.lat = cast['latitude'].mean()
cast.lon = cast['longitude'].mean()
cast.name = name
# 08-Derive.
cast = derive_cnv(cast)
return cast
A map for the stations location.
def map_limits(m):
llcrnrlon = min(m.boundarylons)
urcrnrlon = max(m.boundarylons)
llcrnrlat = min(m.boundarylats)
urcrnrlat = max(m.boundarylats)
return llcrnrlon, urcrnrlon, llcrnrlat, urcrnrlat
def make_map(llcrnrlon=-91.1, urcrnrlon=-87.5, llcrnrlat=27.5, urcrnrlat=29.5,
projection='merc', resolution='i', figsize=(6, 6), inset=True):
m = Basemap(llcrnrlon=llcrnrlon, urcrnrlon=urcrnrlon,
llcrnrlat=llcrnrlat, urcrnrlat=urcrnrlat,
projection=projection, resolution=resolution)
fig, ax = plt.subplots(figsize=figsize)
m.drawstates()
m.drawcoastlines()
m.fillcontinents(color='0.85')
meridians = np.arange(llcrnrlon, urcrnrlon + 2, 2)
parallels = np.arange(llcrnrlat, urcrnrlat + 1, 1)
m.drawparallels(parallels, linewidth=0, labels=[1, 0, 0, 0])
m.drawmeridians(meridians, linewidth=0, labels=[0, 0, 0, 1])
m.ax = ax
if inset:
axin = inset_axes(m.ax, width="40%", height="40%", loc=4)
# Global inset map.
inmap = Basemap(lon_0=np.mean(m.boundarylons),
lat_0=np.mean(m.boundarylats),
projection='ortho', ax=axin, anchor='NE')
inmap.drawcountries(color='white')
inmap.fillcontinents(color='gray')
bx, by = inmap(m.boundarylons, m.boundarylats)
xy = list(zip(bx, by))
mapboundary = Polygon(xy, edgecolor='k', linewidth=1, fill=False)
inmap.ax.add_patch(mapboundary)
return fig, m
And finally we can process all the files in a single loop.
lon, lat = [], []
pattern = './data/cruise2/*c.cnv.gz'
fnames = sorted(glob(pattern), key=alphanum_key)
section = OrderedDict()
for fname in fnames:
cast = proc_ctd(fname)
name = os.path.basename(fname).split('.')[0]
section.update({name: cast})
lon.append(cast.longitude.mean())
lat.append(cast.latitude.mean())
# Section.
section = Panel.fromDict(section)
The warning is due to an issue with the data files, where some numbers were
"glued" and the parser ended-up loading then as objects
instead of floats
.
That is not a big deal, usually those values are already flagged as bad data
during the Data Conversion step and will be removed once we apply the flag.
Note that, using the pandas Panel.fromDict
and OrderedDict
, we created
a hydrographic section
where the stations are the items
, the minor
axis
represent the data, and the major
axis is our index (pressure or depth).
To plot a temperature section we needed is to create a cross section
in our
Panel
. Unfortunately, pandas loses the object meta-data when performing
slicing operations. That is why we need to re-attach the longitude and
latitude information before plotting.
# Stations map.
fig, m = make_map(figsize=(6, 6))
x, y, topo = etopo_subset(*map_limits(m), smoo=True, tfile='dap')
topo = np.where(topo > -1., 1.e10, topo)
topo = np.ma.masked_values(topo, 1.e10)
cs = m.contour(x, y, -topo, (100, 200, 500, 1000), colors='k',
latlon=True, alpha=0.5)
m.ax.clabel(cs, fmt='%1.0f m', fontsize=8, inline=1)
m.plot(lon, lat, 'k.', latlon=True)
# Plot consevative temperature section.
CT = section.minor_xs('CT')
CT.lon, CT.lat = lon, lat
fig, ax, cb = plot_section(CT, reverse=True)
fig.set_size_inches((6, 3))
CT
can be thought as a 2-D matrix with the pressure information as its index.
To create a single cast plot just plot one column using either the station name
or python indexing syntax:
fig, ax = CT.icol(0).plot(color='k', linewidth=2)
ax.set_xlabel(u"Temperature [\u00b0C]")
ax.set_ylabel("Pressure [dbar]")
ax.grid()
fig.set_size_inches(3, 4)
HTML(html)