Narrow River kayak trip data processing notebook

This is a Jupyter notebook. They are neat, you can read about them here:http://jupyter.org/.

They are made up of cells containing:

- inline markdown like this (which can contain code snippets or LaTex / MathJax)
- raw code to be run interactively
- example code not to be run
- raw html/CSS/JS
- any output such as figures, tables, console, etc.
- there are kernels / engines for over 40 languages
- converting to script, pdf, html, or slideshow (with reveal.js)

Or to quote from the source "Open source, interactive data science and scientific computing across over 40 programming languages."

This one will outline some routines for processing temperature data logged during the inaugural PODS kayak trip.

In [1]:
# traditional python imports at the beginning
import numpy as np # matrix library
import matplotlib.pyplot as plt # plotting library
import pandas as pd # dataframe / spreadsheet library with some nice extras for timeseries etc. 

# nbagg for interactive plotting within the notebook
# uline for static/inline plots
%matplotlib inline

plt.rcParams['figure.figsize'] = (16, 9)
plt.rcParams['axes.formatter.useoffset'] = False
#plt.rcParams['axes.formatter'] = False
In [2]:
#hideme
for i in plt.rcParams.keys():
    print i
agg.path.chunksize
animation.avconv_args
animation.avconv_path
animation.bitrate
[...]
ytick.minor.width
In [3]:
# function definitions should go here so they are loaded from the beginning...

def readTemp(fname):
    """
    convenience funtion to read in hobo csv's, strip of unwanted data,
    and set index to DateTime parsed from file and trim start to be around
    launch time
    
    returns pandas DataFrame object of single temp record indexed by time
    that can then be merged etc.
    """
    
    temp = pd.read_csv(fname, header=1, parse_dates=[1]) # read the file in
    
    temp.drop(temp.columns[[0]], axis=1, inplace=True) # drop the first index column it is redundant
    
    temp.set_index('Date Time, GMT-04:00', inplace=True) # set index to be time not integer - can use iloc and loc 
    
    # trim to time we were in water, start at 2:30 local to 3:30 local
    temp = temp['2016-09-28 14:45:00':'2016-09-28 15:40:00']
    
    
    return temp
In [4]:
# list out the files in the directory
!ls
gpsallout.gpx			       NR_2016-09-28_T11.hobo
gpsallout.kml			       NR_2016-09-28_T12.csv
gpstrackout.gpx			       NR_2016-09-28_T12.hobo
kayak-track.csv			       NR_2016-09-28_T1.csv
kayak-track-only.csv		       NR_2016-09-28_T1.hobo
narrow river kayak trip.ipynb	       NR_2016-09-28_T2.csv
Narrow River Kayak Trip RP-CJ-TS.docx  NR_2016-09-28_T2.hobo
narrow river.zip		       NR_2016-09-28_T3.csv
NR_2016-09-28_Cast_3.csv	       NR_2016-09-28_T3.hobo
NR_2016-09-28_Cast_3.hobo	       NR_2016-09-28_T6.csv
NR_2016-09-28_Cast_4.csv	       NR_2016-09-28_T6.hobo
NR_2016-09-28_Cast_4.hobo	       NR_2016-09-28_T7.csv
NR_2016-09-28_Cast_5.csv	       NR_2016-09-28_T7.hobo
NR_2016-09-28_Cast_5.hobo	       NR_2016-09-28_T8.csv
NR_2016-09-28_Cast_6.csv	       NR_2016-09-28_T8.hobo
NR_2016-09-28_Cast_6.hobo	       NR_2016-09-28_T9.csv
NR_2016-09-28_T10.csv		       NR_2016-09-28_T9.hobo
NR_2016-09-28_T10.hobo		       rhode_island_coastline
NR_2016-09-28_T11.csv		       rhode_island_water
In [5]:
# lets try importing one of the temp logs

temp1 = readTemp('NR_2016-09-28_T1.csv')
temp2 = readTemp('NR_2016-09-28_T2.csv')

temp2.head()
Out[5]:
Temp, °C (LGR S/N: 11000995, SEN S/N: 11000995)
Date Time, GMT-04:00
2016-09-28 14:45:00 19.08
2016-09-28 14:45:01 19.08
2016-09-28 14:45:02 19.08
2016-09-28 14:45:03 19.08
2016-09-28 14:45:04 19.08
In [6]:
# joining / merging datasets

#mytemps = pd.merge(temp1, temp2, on='Date Time, GMT-04:00', how='outer')
#mytemps = pd.merge(temp1, temp2, how='outer')

mytemps = temp1.join(temp2, how='outer') # outer join keeps everything and fills in blanks with NANs

mytemps.head() # look at the top
Out[6]:
Temp, °C (LGR S/N: 11001004, SEN S/N: 11001004) Temp, °C (LGR S/N: 11000995, SEN S/N: 11000995)
Date Time, GMT-04:00
2016-09-28 14:45:00 18.913 19.08
2016-09-28 14:45:01 18.913 19.08
2016-09-28 14:45:02 18.913 19.08
2016-09-28 14:45:03 18.913 19.08
2016-09-28 14:45:04 18.913 19.08
In [7]:
# renaming column headers
'''
mytemps.rename(columns={'Temp, °C (LGR S/N: 11001004, SEN S/N: 11001004)': 'T1', 
                          'Temp, °C (LGR S/N: 11000995, SEN S/N: 11000995)': 'T2'},
                 inplace=True)
                 '''

mytemps.columns = ['T1','T2']
In [8]:
mytemps['Tavg'] = (mytemps['T1'] + mytemps['T2']) / 2.0 # add an average column
mytemps['Tanom'] = mytemps['Tavg'] - mytemps['Tavg'].mean()
In [9]:
mytemps.describe() # look at some statistics
Out[9]:
T1 T2 Tavg Tanom
count 3301.000000 3301.000000 3301.000000 3.301000e+03
mean 19.185718 19.244263 19.214990 -4.142113e-13
std 0.087947 0.131029 0.104576 1.045761e-01
min 18.913000 19.080000 18.996500 -2.184905e-01
25% 19.151000 19.199000 19.163000 -5.199046e-02
50% 19.175000 19.199000 19.187000 -2.799046e-02
75% 19.199000 19.246000 19.210500 -4.490457e-03
max 19.579000 20.031000 19.686000 4.710095e-01
In [10]:
mytemps.info() # more info on object / data
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3301 entries, 2016-09-28 14:45:00 to 2016-09-28 15:40:00
Data columns (total 4 columns):
T1       3301 non-null float64
T2       3301 non-null float64
Tavg     3301 non-null float64
Tanom    3301 non-null float64
dtypes: float64(4)
memory usage: 128.9 KB
In [11]:
# plot up timeseries
#plt.figure()
mytemps.plot(subplots=True)

plt.legend(loc='best')
plt.ylabel(u'Temp, °C')
plt.title('Tucker towed HOBO time series');
In [12]:
plt.figure()
mytemps['T1'].plot()
mytemps['T2'].plot()
mytemps['Tavg'].plot()

mean = mytemps['Tavg'].mean()
meanlabel = u'Mean = {:4.1f}°C'.format(mean)

plt.axhline(y=mean, xmin=0, xmax=1, hold=None, label=meanlabel, color='teal', lw=4)
plt.legend()
Out[12]:
<matplotlib.legend.Legend at 0x7f75e7043cd0>

Adding GPS and georeferencing temperatures

In [13]:
# read in gps data

gpsTrack = pd.read_csv('kayak-track-only.csv', header=0, parse_dates=[0]) # read and parse headers/dates

gpsTrack.set_index('time GMT', inplace=True) # set index to be time

gpsTrack.index = gpsTrack.index - pd.offsets.Hour(4) # subtract 4 hrs to put in local time

gpsTrack.index.rename('local time', inplace=True) # rename the index to reflect local time change

geotemps = gpsTrack.join(mytemps, how='inner') # inner join = keep common indices to both datasets

geotemps.head() # show the top
Out[13]:
latitude longitude speed (km/h) course distance_interval (m) T1 T2 Tavg Tanom
2016-09-28 14:45:40 41.449340 -71.450023 0.5 249.6 7.19 19.080 19.151 19.1155 -0.09949
2016-09-28 14:46:23 41.449276 -71.449941 0.8 135.9 9.80 19.199 19.318 19.2585 0.04351
2016-09-28 14:46:44 41.449189 -71.449854 2.1 143.5 12.15 19.294 19.389 19.3415 0.12651
2016-09-28 14:47:05 41.449060 -71.449785 2.6 157.9 15.42 19.413 19.460 19.4365 0.22151
2016-09-28 14:47:44 41.448925 -71.449571 2.2 130.2 23.30 19.532 19.555 19.5435 0.32851
In [14]:
geotemps.tail() # look at the end
Out[14]:
latitude longitude speed (km/h) course distance_interval (m) T1 T2 Tavg Tanom
2016-09-28 15:39:13 41.449398 -71.449825 0.3 52.4 2.55 19.127 19.199 19.163 -0.05199
2016-09-28 15:39:31 41.449409 -71.449822 0.3 12.2 1.26 19.127 19.175 19.151 -0.06399
2016-09-28 15:39:35 41.449426 -71.449768 4.4 66.6 4.93 19.127 19.199 19.163 -0.05199
2016-09-28 15:39:40 41.449440 -71.449692 4.7 76.7 6.53 19.127 19.199 19.163 -0.05199
2016-09-28 15:39:59 41.449549 -71.449420 4.9 61.9 25.75 19.127 19.175 19.151 -0.06399
In [15]:
geotemps.describe() # statistics
Out[15]:
latitude longitude speed (km/h) course distance_interval (m) T1 T2 Tavg Tanom
count 161.000000 161.000000 161.000000 161.000000 161.000000 161.000000 161.000000 161.000000 161.000000
mean 41.447038 -71.447632 4.090683 193.837267 20.426273 19.181919 19.237696 19.209807 -0.005183
std 0.003306 0.002798 2.278261 105.758274 13.025238 0.079244 0.122763 0.096515 0.096515
min 41.442823 -71.451352 0.000000 0.900000 0.240000 19.080000 19.151000 19.115500 -0.099490
25% 41.443626 -71.449854 2.100000 114.600000 7.190000 19.151000 19.199000 19.163000 -0.051990
50% 41.447323 -71.448300 4.700000 169.400000 21.980000 19.175000 19.199000 19.186500 -0.028490
75% 41.449409 -71.445444 6.000000 313.000000 31.460000 19.175000 19.222000 19.210500 -0.004490
max 41.453533 -71.442774 8.200000 359.400000 49.870000 19.579000 19.984000 19.662500 0.447510
In [16]:
# plotting timeseries
#plt.figure()
geotemps.plot(subplots=True); # put all on separate subplots
In [23]:
# more plotting
plt.figure()
#fig.yaxis.set_major_formatter(FormatStrFormatter('%.2f'))

cm = plt.cm.get_cmap('winter')


x = geotemps['longitude']
y = geotemps['latitude']
c = geotemps['Tavg'] - geotemps['Tavg'].mean()

# color normalization
#c /= c.max()
#colors = cm.seismic(np.linspace(0,1,len(c)))

'''
# plot point by point - probably not best way
for i in range(len(x)):
    plt.scatter(x[i],y[i], color=colors[i], vmin=18, vmax=20, s=55, cmap=cm)
    
'''
# plot whole list at once

sc = plt.scatter(x, y, c=c, s=55, cmap=cm, alpha=0.8, label='T')

c = plt.colorbar(sc, orientation='horizontal')
c.set_label('T')

plt.xlim((x.min(), x.max()))
plt.ylim((y.min(),y.max()))

plt.xlabel('Longitude')
plt.ylabel('Latitude')

#plt.legend()
titlestr = u'Temperature anomaly [°C] at mouth of Narrow River'
plt.title(titlestr);
#plt.show()
In [18]:
# more plotting
plt.figure()
#fig.yaxis.set_major_formatter(FormatStrFormatter('%.2f'))

cm = plt.cm.get_cmap('rainbow')


x = geotemps['longitude']
y = geotemps['latitude']
c = geotemps.index

# color normalization
#c /= c.max()
#colors = cm.seismic(np.linspace(0,1,len(c)))

'''
# plot point by point - probably not best way
for i in range(len(x)):
    plt.scatter(x[i],y[i], color=colors[i], vmin=18, vmax=20, s=55, cmap=cm)
    
'''
# plot whole list at once

sc = plt.scatter(x, y, c=c, s=70, cmap=cm, alpha=0.8, label='time')
plt.plot(x,y,'k')
#c = plt.colorbar(sc, orientation='horizontal')
#c.set_label('T')

plt.xlim((x.min(), x.max()))
plt.ylim((y.min(),y.max()))

plt.xlabel('Longitude')
plt.ylabel('Latitude')

plt.legend()
titlestr = 'Track time log, lightens toward end'
plt.title(titlestr);
#plt.show()

plotting on a map... maybe

In [19]:
# basemap plotting
from mpl_toolkits.basemap import Basemap
In [20]:
plt.figure()
m = Basemap()
m.drawcoastlines()
plt.show()
In [21]:
plt.figure(figsize=(9,16))

minlon = geotemps['longitude'].min()
minlat = geotemps['latitude'].min()
maxlon = geotemps['longitude'].max()
maxlat = geotemps['latitude'].max()
meanlon = geotemps['longitude'].mean()
meanlat = geotemps['latitude'].mean()


m = Basemap(projection='merc',
            lat_0=meanlat,
            lon_0=meanlon,
            resolution='h',
            area_thresh=0.1,
            llcrnrlon=-71.5,
            llcrnrlat=41.4,
            urcrnrlon=-71.4,
            urcrnrlat=41.5,
            )


#m.drawmapboundary(fill_color='blue')
#m.fillcontinents(color='coral', lake_color='aqua')
#m.bluemarble()
#m.drawcoastlines()
#m.drawrivers()

m.readshapefile('./rhode_island_water/rhode_island_water', 'water')
m.readshapefile('./rhode_island_coastline/rhode_island_coastline', 'coast')
#wx, wy = zip(*m.water)
#m.plot(wx,wy,'k')


# scatter our data on the map colored by temp anomoly
x1, y1 = m(geotemps['longitude'].values, geotemps['latitude'].values) # cast into map coordinates [m]
c = geotemps['Tanom']

m.scatter(x1, y1, c=c, s=30, cmap='seismic', zorder=5)


plt.show()
In [24]:
plt.figure(figsize=(9,16))

minlon = geotemps['longitude'].min()
minlat = geotemps['latitude'].min()
maxlon = geotemps['longitude'].max()
maxlat = geotemps['latitude'].max()
meanlon = geotemps['longitude'].mean()
meanlat = geotemps['latitude'].mean()


m = Basemap(projection='merc',
            lat_0=meanlat,
            lon_0=meanlon,
            resolution='h',
            area_thresh=0.001,
            llcrnrlon=-71.452,
            llcrnrlat=41.44,
            urcrnrlon=-71.442,
            urcrnrlat=41.455,
            )


#m.drawmapboundary(fill_color='blue')
#m.fillcontinents(color='coral', lake_color='aqua')
#m.bluemarble()
#m.drawcoastlines()
#m.drawrivers()

m.readshapefile('./rhode_island_water/rhode_island_water', 'water')
m.readshapefile('./rhode_island_coastline/rhode_island_coastline', 'coast')
#wx, wy = zip(*m.water)
#m.plot(wx,wy,'k')


# scatter our data on the map colored by temp anomoly
x1, y1 = m(geotemps['longitude'].values, geotemps['latitude'].values) # cast into map coordinates [m]
c = geotemps['Tanom']

m.scatter(x1, y1, c=c, s=70, cmap='winter', zorder=5)


plt.show()
In [38]:
t9 = readTemp('./NR_2016-09-28_T9.csv')
t8 = readTemp('./NR_2016-09-28_T8.csv')
t10 = readTemp('./NR_2016-09-28_T10.csv')
t6 = readTemp('./NR_2016-09-28_T6.csv')
t12 = readTemp('./NR_2016-09-28_T12.csv')
In [40]:
%matplotlib nbagg
In [42]:
plt.figure(figsize=(16,9))
plt.hold(True)
plt.plot(t9['Temp, \xc2\xb0C (LGR S/N: 11001009, SEN S/N: 11001009)'].values, label='t9')
plt.plot(t8['Temp, \xc2\xb0C (LGR S/N: 11000997, SEN S/N: 11000997)'].values, label='t8')
plt.plot(t10['Temp, \xc2\xb0C (LGR S/N: 11001008, SEN S/N: 11001008)'].values, label='t10')
plt.plot(t6['Temp, \xc2\xb0C (LGR S/N: 11000996, SEN S/N: 11000996)'].values, label='t6')
plt.plot(t12['Temp, \xc2\xb0C (LGR S/N: 11001006, SEN S/N: 11001006)'].values, label='t12')
plt.plot(mytemps['T1'].values, label='t1')
plt.plot(mytemps['T2'].values, label='t2')
plt.legend()
Out[42]:
<matplotlib.legend.Legend at 0x7f75e1cdead0>