GEOG 5680 Module 11 Exercise

Author

Vivian Strange

1. Read in the data using fread

library(data.table)
temperature = fread("Temperature.csv")
class(temperature)
[1] "data.table" "data.frame"
(temperature)
             Sample     Date     DateNr dDay1 dDay2 dDay3 Station   Area
             <char>    <int>     <char> <int> <int> <int>  <char> <char>
   1: DANT.19900110 19900110    10/1/90     7     9     9    DANT     WZ
   2: DANT.19900206 19900206     6/2/90    34    36    36    DANT     WZ
   3: DANT.19900308 19900308     8/3/90    64    66    66    DANT     WZ
   4: DANT.19900404 19900404     4/4/90    91    93    93    DANT     WZ
   5: DANT.19900509 19900509     9/5/90   126   128   128    DANT     WZ
  ---                                                                   
8524: ZUID.20050926 20050926  9/26/2005  5745  5747   268    ZUID     WZ
8525: ZUID.20051012 20051012   12/10/05  5761  5763   284    ZUID     WZ
8526: ZUID.20051027 20051027 10/27/2005  5776  5778   299    ZUID     WZ
8527: ZUID.20051110 20051110   10/11/05  5790  5792   313    ZUID     WZ
8528: ZUID.20051212 20051212   12/12/05  5822  5824   345    ZUID     WZ
      31UE_ED50 31UN_ED50  Year Month Season Salinity Temperature CHLFa
          <num>     <num> <int> <int> <char>    <num>       <num> <num>
   1:  681379.6   5920571  1990     1 winter    29.19        4.00  1.30
   2:  681379.6   5920571  1990     2 winter    27.37        6.00    NA
   3:  681379.6   5920571  1990     3 spring    24.99        7.30 21.10
   4:  681379.6   5920571  1990     4 spring    28.79        8.20 25.00
   5:  681379.6   5920571  1990     5 spring    33.28       17.40 10.20
  ---                                                                  
8524:  733386.3   5928197  2005     9 autumn    30.91       15.47 11.40
8525:  733386.3   5928197  2005    10 autumn    31.18       13.45  8.30
8526:  733386.3   5928197  2005    10 autumn    28.67       12.09  4.56
8527:  733386.3   5928197  2005    11 autumn    29.53        9.03  4.94
8528:  733386.3   5928197  2005    12 winter    29.08        5.13  2.38

2. Extract all winter observations

winter = temperature[Season == "winter"]
(winter)
             Sample     Date    DateNr dDay1 dDay2 dDay3 Station   Area
             <char>    <int>    <char> <int> <int> <int>  <char> <char>
   1: DANT.19900110 19900110   10/1/90     7     9     9    DANT     WZ
   2: DANT.19900206 19900206    6/2/90    34    36    36    DANT     WZ
   3: DANT.19901212 19901212  12/12/90   343   345   345    DANT     WZ
   4: DANT.19910116 19910116 1/16/1991   378   380    15    DANT     WZ
   5: DANT.19910226 19910226 2/26/1991   419   421    56    DANT     WZ
  ---                                                                  
1702: ZUID.20040216 20040216 2/16/2004  5157  5159    46    ZUID     WZ
1703: ZUID.20041208 20041208   8/12/04  5453  5455   342    ZUID     WZ
1704: ZUID.20050119 20050119 1/19/2005  5495  5497    18    ZUID     WZ
1705: ZUID.20050218 20050218 2/18/2005  5525  5527    48    ZUID     WZ
1706: ZUID.20051212 20051212  12/12/05  5822  5824   345    ZUID     WZ
      31UE_ED50 31UN_ED50  Year Month Season Salinity Temperature CHLFa
          <num>     <num> <int> <int> <char>    <num>       <num> <num>
   1:  681379.6   5920571  1990     1 winter    29.19        4.00  1.30
   2:  681379.6   5920571  1990     2 winter    27.37        6.00    NA
   3:  681379.6   5920571  1990    12 winter    31.50        4.20 60.50
   4:  681379.6   5920571  1991     1 winter    20.83       -0.30  2.30
   5:  681379.6   5920571  1991     2 winter    28.06        3.90  3.52
  ---                                                                  
1702:  733386.3   5928197  2004     2 winter    24.70        4.40    NA
1703:  733386.3   5928197  2004    12 winter    28.67        4.72  1.52
1704:  733386.3   5928197  2005     1 winter    28.39        3.57  4.12
1705:  733386.3   5928197  2005     2 winter    29.31        1.31  6.56
1706:  733386.3   5928197  2005    12 winter    29.08        5.13  2.38

3. Extract all winter observations for zone NC

winter_NC = temperature[Season == "winter" & Area == "NC"]
(winter_NC)
            Sample     Date     DateNr dDay1 dDay2 dDay3 Station   Area
            <char>    <int>     <char> <int> <int> <int>  <char> <char>
  1: T100.19900103 19900103     3/1/90     0     2     2    T100     NC
  2: T100.19900205 19900205     5/2/90    33    35    35    T100     NC
  3: T100.19901218 19901218 12/18/1990   349   351   351    T100     NC
  4: T100.19910116 19910116  1/16/1991   378   380    15    T100     NC
  5: T100.19910205 19910205     5/2/91   398   400    35    T100     NC
 ---                                                                   
177: T235.20040209 20040209     9/2/04  5150  5152    39    T235     NC
178: T235.20041214 20041214 12/14/2004  5459  5461   348    T235     NC
179: T235.20050126 20050126  1/26/2005  5502  5504    25    T235     NC
180: T235.20050214 20050214  2/14/2005  5521  5523    44    T235     NC
181: T235.20051220 20051220 12/20/2005  5830  5832   353    T235     NC
     31UE_ED50 31UN_ED50  Year Month Season Salinity Temperature CHLFa
         <num>     <num> <int> <int> <char>    <num>       <num> <num>
  1:  587650.2   6001110  1990     1 winter    34.82        8.50  0.30
  2:  587650.2   6001110  1990     2 winter       NA          NA    NA
  3:  587650.2   6001110  1990    12 winter    34.80        9.20  0.40
  4:  587650.2   6001110  1991     1 winter    34.86        6.10  0.68
  5:  587650.2   6001110  1991     2 winter    34.53        5.20  0.34
 ---                                                                  
177:  510032.3   6114101  2004     2 winter       NA          NA    NA
178:  510032.3   6114101  2004    12 winter    34.96        8.47  0.62
179:  510032.3   6114101  2005     1 winter    35.09        6.44  0.62
180:  510032.3   6114101  2005     2 winter       NA          NA    NA
181:  510032.3   6114101  2005    12 winter    33.87        8.19  1.12

4. Select only the columns Area, Season and Temperature

q4_column_select_area_szn_temp = temperature[, list(Area, Season, Temperature)]
(q4_column_select_area_szn_temp)
        Area Season Temperature
      <char> <char>       <num>
   1:     WZ winter        4.00
   2:     WZ winter        6.00
   3:     WZ spring        7.30
   4:     WZ spring        8.20
   5:     WZ spring       17.40
  ---                          
8524:     WZ autumn       15.47
8525:     WZ autumn       13.45
8526:     WZ autumn       12.09
8527:     WZ autumn        9.03
8528:     WZ winter        5.13

5. Select only the columns Area and Temperature but only for winter observations

q5_column_select_area_temp_by_winter = winter[, list(Area, Temperature)]
(q5_column_select_area_temp_by_winter)
        Area Temperature
      <char>       <num>
   1:     WZ        4.00
   2:     WZ        6.00
   3:     WZ        4.20
   4:     WZ       -0.30
   5:     WZ        3.90
  ---                   
1702:     WZ        4.40
1703:     WZ        4.72
1704:     WZ        3.57
1705:     WZ        1.31
1706:     WZ        5.13

6. Find the total number of observations in winter

winter_obs_count = temperature[Season == "winter", length(Year)]
temperature[Season == "winter", length(Year)]
[1] 1706

The total number of observations in winter is 1,706.

7. Calculate the mean temperature and mean salinity in winter (Note that there are missing values so will have to use na.rm = TRUE)

q7_mean_temp_and_salinity = winter[, list(m_temp = mean(Temperature, na.rm = TRUE), m_salinity = mean(Salinity, na.rm = TRUE))]
winter[, list(m_temp = mean(Temperature, na.rm = TRUE), m_salinity = mean(Salinity, na.rm = TRUE))]
    m_temp m_salinity
     <num>      <num>
1: 5.57162   29.15756

Mean Temperature in Winter = 5.57162

Mean Salinity in Winter = 29.15756

8. Find the number of observations per station in winter

q8_obs_per_station_in_winter = winter[, .(.N), by = .(Station)]
temperature[, list(.N), by = .(Station, Season)]
     Station Season     N
      <char> <char> <int>
  1:    DANT winter    50
  2:    DANT spring    89
  3:    DANT summer    89
  4:    DANT autumn    72
  5:    DREI winter    52
 ---                     
114:    ZIJP autumn    61
115:    ZUID winter    52
116:    ZUID spring    89
117:    ZUID summer    89
118:    ZUID autumn    73

9. Find the number of observations per station per season

q9_obs_per_station_per_season = temperature[, list(.N), by = .(Station, Season)]
temperature[, list(.N), by = .(Station, Season)]
     Station Season     N
      <char> <char> <int>
  1:    DANT winter    50
  2:    DANT spring    89
  3:    DANT summer    89
  4:    DANT autumn    72
  5:    DREI winter    52
 ---                     
114:    ZIJP autumn    61
115:    ZUID winter    52
116:    ZUID spring    89
117:    ZUID summer    89
118:    ZUID autumn    73

10. Estimate average temperatures by month

q10_avg_temp_by_month = temperature[, .(m_temp = mean(Temperature, na.rm = TRUE)), by = Month]
temperature[, .(m_temp = mean(Temperature, na.rm = TRUE)), by = Month]
    Month    m_temp
    <int>     <num>
 1:     1  5.174210
 2:     2  4.737400
 3:     3  6.125961
 4:     4  8.702035
 5:     5 12.293479
 6:     6 15.659933
 7:     7 18.077343
 8:     8 19.388355
 9:     9 16.995974
10:    10 13.619670
11:    11  9.848891
12:    12  6.746339

11. Estimate average temperatures by month by area

q11_avg_temp_by_month_by_area = temperature[, .(m_temp = mean(Temperature, na.rm = TRUE)), by = .(Month, Area)]
temperature[, .(m_temp = mean(Temperature, na.rm = TRUE)), by = .(Month, Area)]
     Month   Area    m_temp
     <int> <char>     <num>
  1:     1     WZ  3.377826
  2:     2     WZ  3.925800
  3:     3     WZ  5.818481
  4:     4     WZ  9.270805
  5:     5     WZ 13.398191
 ---                       
116:     1     NC  6.789808
117:     2     NC  5.682581
118:     3     NC  5.837500
119:    11     NC 10.978269
120:    12     NC  8.716957

12. Plot the output of the previous question using ggplot2 using the geom_line() geometry

library(ggplot2)
q11_avg_temp_by_month_by_area[, Month := factor(month.abb[Month], levels = month.abb)]
ggplot(q11_avg_temp_by_month_by_area, aes(x=Month, y=m_temp, color = Area, group=Area)) + geom_line(linewidth=.5) + labs(x="Month", y="Temperature (Celsius)", title ="Average Temperatures by Month by Area")