Pandas is an amazing python module to deal with data. It has tons of features worthwhile learning, but the best of all is how quick one can explore a data with just a few code lines once you learn the basics.
I wrote a small module to read CTD (also XBT's EDF and FSI's CTD format) data directly as a pandas DataFrame. Here is an example how to use it:
import gsw
import numpy as np
from oceans.sw_extras import gamma_GP_from_SP_pt
from ctd import DataFrame, Series, movingaverage, rosette_summary, lp_filter
def o2sat(s, pt):
from seawater.library import T68conv
t = T68conv(pt) + 273.16
a = (-177.7888, 255.5907, 146.4813, -22.2040)
b = (-0.037362, 0.016504, -0.0020564)
lnC = (a[0] + a[1] * (100. / t) + a[2] * np.log(t / 100.) + a[3] *
(t / 100.) +
s * (b[0] + b[1] * (t / 100.) + b[2] * (t / 100.) ** 2))
return np.exp(lnC) * 1000. / 22.392
cast = DataFrame.from_cnv('./data/CTD_001.cnv.gz', compression='gzip')
That's it! One line to load the compressed SeaBird cnv file into memory. If you have the Rosette file you can load it as well and print the bottle summary with just two lines:
rossum = rosette_summary('./data/CTD_001.ros')
rossum.groupby(rossum.index)['t090C', 't190C', 'c0S/m', 'c1S/m'].mean()
Metadata and data flags are also easily accessed.
# Read metadata.
hours = cast['timeS'].max() / 60. / 60.
lon, lat = cast.longitude.mean(), cast.latitude.mean()
# Apply 'bad pump' and 'flag'.
cast = cast[cast['pumps']] # True for good values.
cast = cast[~cast['flag']] # True for bad values blame SBE!
print("Cast duration was: %2.2f hours." % hours)
cast[['t090C', 't190C', 'c0S/m', 'c1S/m']].describe() # Statistics for sensors pairs 0, 1.
Cleaning the DataFrame and manipulating the variables:
keep = set(['t090C', 'c0S/m', 'sbeox0Mm/Kg', 'dz/dtM'])
cast = cast.drop(keep.symmetric_difference(cast.columns), axis=1)
The module also contains some rudimentary CTD processing steps akin to those found in the SBE Software:
- Smooth velocity and Oxygen with a 2 seconds window:
cast['dz/dtM'] = movingaverage(cast['dz/dtM'], window_size=48)
cast['sbeox0Mm/Kg'] = movingaverage(cast['sbeox0Mm/Kg'], window_size=48)
- Filter pressure:
kw = dict(sample_rate=24.0, time_constant=0.15)
cast.index = lp_filter(cast.index, **kw)
- Split:
downcast, upcast = cast.split()
- Loop Edit:
downcast = downcast.press_check() # Remove pressure reversals.
downcast = downcast[downcast['dz/dtM'] >= 0.25] # Threshold velocity.
- Wild Edit:
kw = dict(n1=2, n2=20, block=150)
downcast = downcast.apply(Series.despike, **kw)
- Bin-average:
downcast = downcast.apply(Series.bindata, **dict(delta=1.))
downcast = downcast.apply(Series.interpolate)
- Smooth:
pmax = max(cast.index)
if pmax >= 500:
window_len = 21
elif pmax >= 100:
window_len = 11
else:
window_len = 5
kw = dict(window_len=window_len, window='hanning')
downcast = downcast.apply(Series.smooth, **kw)
- The module also allows for a more "customized"
Derive
step:
p = downcast.index.values.astype(float)
downcast['depth'] = -gsw.z_from_p(p, lat)
# ctm [mS/cm] = ctm [S/m] * 10
downcast['SP'] = gsw.SP_from_C(downcast['c0S/m'].values * 10, downcast['t090C'].values, p)
downcast['SA'] = gsw.SA_from_SP(downcast['SP'].values, p, lon, lat)
downcast['CT'] = gsw.CT_from_t(downcast['SA'].values, downcast['t090C'].values, p)
downcast['sigma0_CT'] = gsw.sigma0_CT_exact(downcast['SA'].values, downcast['CT'].values)
downcast['sound'] = gsw.sound_speed_CT_exact(downcast['SA'].values, downcast['CT'].values, p)
downcast['gamma'] = gamma_GP_from_SP_pt(downcast['SA'].values, downcast['CT'].values, p, lon, lat)
# Does't fit the DataFrame because the index in at the middle (p_mid) point of the original data.
N2, p_mid = gsw.Nsquared(downcast['SA'].values, downcast['CT'].values, p, lat=lat)
downcast['aou'] = o2sat(downcast['SA'].values, downcast['CT'].values) - downcast['sbeox0Mm/Kg']
Last but not least, a handy way for plotting the data:
figsize = (4, 6)
kw_t = dict(linestyle='-', color='#6699cc', linewidth=3, label=r"Temperature [$^\circ$C]")
kw_ct = dict(linestyle='-', color='#ffcc33', alpha=0.75, linewidth=3, label=r"Conservative temperature [$^\circ$C]")
fig, ax = downcast['t090C'].plot(**kw_t)
ax.plot(downcast['CT'], downcast.index, **kw_ct)
ax.grid(True)
ax.set_xlabel("Temperature")
ax.set_ylabel("Pressure [dbar]")
l = ax.legend(loc="lower right")
fig.set_size_inches(figsize)
kw_sp = dict(linestyle='-', color='#339933', linewidth=3, label=r"Practical Salinity")
kw_sa = dict(linestyle='-', color='#999999', linewidth=3, label=r"Absolute Salinity [g kg$^{-1}$]")
fig, ax = downcast['SP'].plot(**kw_sp)
ax.plot(downcast['SA'], downcast.index, **kw_sa)
ax.grid(True)
ax.set_xlabel("Salinity")
ax.set_ylabel("Pressure [dbar]")
l = ax.legend(loc="lower right")
fig.set_size_inches(figsize)
kw_gm = dict(linestyle='-', color='#0000ff', linewidth=3, label=r"Gamma")
kw_sg = dict(linestyle='-', color='#ff3333', linewidth=3, label=r"Sigma")
fig, ax = downcast['gamma'].plot(**kw_sg)
ax.plot(downcast['sigma0_CT'], downcast.index, **kw_gm)
ax.grid(True)
ax.set_xlabel("Density [kg m $^{-3}$]")
ax.set_ylabel("Pressure [dbar]")
l = ax.legend(loc="lower left")
fig.set_size_inches(figsize)
_ = ax.set_xlim(25.3, 28.5)
HTML(html)