Turning ripples into waves


I'm re-factoring the python-oceans module. The first step was to create another module just for the CTD tools (python-ctd)

The new module is already at PyPI and the API will remain the same as before. To install it type:

pip install ctd

Now we can import the module:

In [2]:
from ctd import DataFrame, Series, lp_filter, plot_section, movingaverage
In [3]:
import re
import os

from glob import glob
from collections import OrderedDict

import gsw
import numpy as np
import matplotlib.pyplot as plt

from pandas import Panel
from matplotlib.patches import Polygon
from mpl_toolkits.basemap import Basemap
from mpl_toolkits.axes_grid1.inset_locator import inset_axes

from oceans.datasets import etopo_subset
from oceans.sw_extras import gamma_GP_from_SP_pt

Let's define a function to streamline the CTD data processing.

In [4]:
def alphanum_key(s):
    key = re.split(r"(\d+)", s)
    key[1::2] = map(int, key[1::2])
    return key

def derive_cnv(self):
    """Compute SP, SA, CT, z, and GP from a cnv pre-processed cast."""
    cast = self.copy()  # FIXME: Use MetaDataFrame to propagate lon, lat.
    p = cast.index.values.astype(float)
    cast['SP'] = gsw.SP_from_C(cast['c0S/m'].values * 10., cast['t090C'].values, p)
    cast['SA'] = gsw.SA_from_SP(cast['SP'].values, p, self.lon,
    cast['SR'] = gsw.SR_from_SP(cast['SP'].values)
    cast['CT'] = gsw.CT_from_t(cast['SA'].values, cast['t090C'].values, p)
    cast['z'] = -gsw.z_from_p(p,
    cast['sigma0_CT'] = gsw.sigma0_CT_exact(cast['SA'].values, cast['CT'].values)
    return cast

def proc_ctd(fname, compression='gzip', below_water=True):
    # 00-Split, clean 'bad pump' data, and apply flag.
    cast = DataFrame.from_cnv(fname, compression=compression,
    cast = cast[cast['pumps']]
    cast = cast[~cast['flag']]  # True for bad values.
    name = os.path.basename(fname).split('.')[0]

    # Removed unwanted columns.
    keep = set(['altM', 'c0S/m', 'dz/dtM', 'wetCDOM', 'latitude',
                'longitude', 'sbeox0Mm/Kg', 'sbeox1Mm/Kg', 'oxsolMm/Kg',
                'oxsatMm/Kg', 'par', 'pla', 'sva', 't090C', 't190C', 'tsa',

    null = map(cast.pop, keep.symmetric_difference(cast.columns))
    del null

    # Smooth velocity with a 2 seconds windows.
    cast['dz/dtM'] = movingaverage(cast['dz/dtM'], window_size=48)

    # 01-Filter pressure.
    kw = dict(sample_rate=24.0, time_constant=0.15)
    cast.index = lp_filter(cast.index, **kw)

    # 02-Remove pressure reversals.
    cast = cast.press_check()
    cast = cast.dropna()

    # 03-Loop Edit.
    cast = cast[cast['dz/dtM'] >= 0.25]  # Threshold velocity.

    # 04-Remove spikes.
    kw = dict(n1=2, n2=20, block=100)
    cast = cast.apply(Series.despike, **kw)

    # 05-Bin-average.
    cast = cast.apply(Series.bindata, **dict(delta=1.))

    # 06-interpolate.
    cast = cast.apply(Series.interpolate)

    if True:  # 07-Smooth.
        pmax = max(cast.index)
        if pmax >= 500.:
            window_len = 21
        elif pmax >= 100.:
            window_len = 11
            window_len = 5
        kw = dict(window_len=window_len, window='hanning')
        cast = cast.apply(Series.smooth, **kw)

    # Add metadata to the DataFrame. = cast['latitude'].mean()
    cast.lon = cast['longitude'].mean() = name

    # 08-Derive.
    cast = derive_cnv(cast)
    return cast

A map for the stations location.

In [5]:
def map_limits(m):
    llcrnrlon = min(m.boundarylons)
    urcrnrlon = max(m.boundarylons)
    llcrnrlat = min(m.boundarylats)
    urcrnrlat = max(m.boundarylats)
    return llcrnrlon, urcrnrlon, llcrnrlat, urcrnrlat

def make_map(llcrnrlon=-91.1, urcrnrlon=-87.5, llcrnrlat=27.5, urcrnrlat=29.5,
             projection='merc', resolution='i', figsize=(6, 6), inset=True):
    m = Basemap(llcrnrlon=llcrnrlon, urcrnrlon=urcrnrlon,
                llcrnrlat=llcrnrlat, urcrnrlat=urcrnrlat,
                projection=projection, resolution=resolution)
    fig, ax = plt.subplots(figsize=figsize)
    meridians = np.arange(llcrnrlon, urcrnrlon + 2, 2)
    parallels = np.arange(llcrnrlat, urcrnrlat + 1, 1)
    m.drawparallels(parallels, linewidth=0, labels=[1, 0, 0, 0])
    m.drawmeridians(meridians, linewidth=0, labels=[0, 0, 0, 1]) = ax

    if inset:
        axin = inset_axes(, width="40%", height="40%", loc=4)
        # Global inset map.
        inmap = Basemap(lon_0=np.mean(m.boundarylons),
                        projection='ortho', ax=axin, anchor='NE')
        bx, by = inmap(m.boundarylons, m.boundarylats)
        xy = list(zip(bx, by))
        mapboundary = Polygon(xy, edgecolor='k', linewidth=1, fill=False)
    return fig, m

And finally we can process all the files in a single loop.

In [6]:
lon, lat = [], []
pattern = './data/cruise2/*c.cnv.gz'
fnames = sorted(glob(pattern), key=alphanum_key)
section = OrderedDict()

for fname in fnames:
    cast = proc_ctd(fname)
    name = os.path.basename(fname).split('.')[0]
    section.update({name: cast})

# Section.
section = Panel.fromDict(section)
/home/filipe/miniconda3/envs/Blog/lib/python2.7/site-packages/IPython/core/ DtypeWarning: Columns (9) have mixed types. Specify dtype option on import or set low_memory=False.
  exec(code_obj, self.user_global_ns, self.user_ns)
/home/filipe/miniconda3/envs/Blog/lib/python2.7/site-packages/ctd/ UserWarning: Could not convert sbeox1Mm/Kg to float.
  warnings.warn('Could not convert %s to float.' % column)

The warning is due to an issue with the data files, where some numbers were "glued" and the parser ended-up loading then as objects instead of floats. That is not a big deal, usually those values are already flagged as bad data during the Data Conversion step and will be removed once we apply the flag.

Note that, using the pandas Panel.fromDict and OrderedDict, we created a hydrographic section where the stations are the items, the minor axis represent the data, and the major axis is our index (pressure or depth).

To plot a temperature section we needed is to create a cross section in our Panel. Unfortunately, pandas loses the object meta-data when performing slicing operations. That is why we need to re-attach the longitude and latitude information before plotting.

In [7]:
# Stations map.
fig, m = make_map(figsize=(6, 6))
x, y, topo = etopo_subset(*map_limits(m), smoo=True, tfile='dap')
topo = np.where(topo > -1., 1.e10, topo)
topo =, 1.e10)
cs = m.contour(x, y, -topo, (100, 200, 500, 1000), colors='k',
                latlon=True, alpha=0.5), fmt='%1.0f m', fontsize=8, inline=1)
m.plot(lon, lat, 'k.', latlon=True)
[<matplotlib.lines.Line2D at 0x7f7385c65c90>]
In [8]:
# Plot consevative temperature section.
CT = section.minor_xs('CT')
CT.lon, = lon, lat
fig, ax, cb = plot_section(CT, reverse=True)
fig.set_size_inches((6, 3))

CT can be thought as a 2-D matrix with the pressure information as its index. To create a single cast plot just plot one column using either the station name or python indexing syntax:

In [9]:
fig, ax = CT.icol(0).plot(color='k', linewidth=2)
ax.set_xlabel(u"Temperature [\u00b0C]")
ax.set_ylabel("Pressure [dbar]")
fig.set_size_inches(3, 4)
-c:1: FutureWarning: icol(i) is deprecated. Please use .iloc[:,i]

In [10]:

This post was written as an IPython notebook. It is available for download or as a static html.

Creative Commons License
python4oceanographers by Filipe Fernandes is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License.
Based on a work at