GEOG 5680/6680 Module 11: Data Manipulation III

Author

Zach Grube

Setup

library(data.table)
library(ggplot2)

Read In The Data Using fread

# This first tries to read Temperature.csv from the same folder as this
# Quarto file. If it is not there, it uses the original file location.
data_file <- "Temperature.csv"

if (!file.exists(data_file)) {
  data_file <- "C:/Users/yolom/OneDrive/Desktop/GEOG5680/Module 11/Temperature.csv"
}

temperature <- fread(data_file)

dim(temperature)
[1] 8528   16
names(temperature)
 [1] "Sample"      "Date"        "DateNr"      "dDay1"       "dDay2"      
 [6] "dDay3"       "Station"     "Area"        "31UE_ED50"   "31UN_ED50"  
[11] "Year"        "Month"       "Season"      "Salinity"    "Temperature"
[16] "CHLFa"      
head(temperature)
          Sample     Date    DateNr dDay1 dDay2 dDay3 Station   Area 31UE_ED50
          <char>    <int>    <char> <int> <int> <int>  <char> <char>     <num>
1: DANT.19900110 19900110   10/1/90     7     9     9    DANT     WZ  681379.6
2: DANT.19900206 19900206    6/2/90    34    36    36    DANT     WZ  681379.6
3: DANT.19900308 19900308    8/3/90    64    66    66    DANT     WZ  681379.6
4: DANT.19900404 19900404    4/4/90    91    93    93    DANT     WZ  681379.6
5: DANT.19900509 19900509    9/5/90   126   128   128    DANT     WZ  681379.6
6: DANT.19900620 19900620 6/20/1990   168   170   170    DANT     WZ  681379.6
   31UN_ED50  Year Month Season Salinity Temperature CHLFa
       <num> <int> <int> <char>    <num>       <num> <num>
1:   5920571  1990     1 winter    29.19         4.0   1.3
2:   5920571  1990     2 winter    27.37         6.0    NA
3:   5920571  1990     3 spring    24.99         7.3  21.1
4:   5920571  1990     4 spring    28.79         8.2  25.0
5:   5920571  1990     5 spring    33.28        17.4  10.2
6:   5920571  1990     6 summer    32.69        18.1   6.2

1. Extract All Winter Observations

winter_observations <- temperature[Season == "winter"]
winter_observations
             Sample     Date    DateNr dDay1 dDay2 dDay3 Station   Area
             <char>    <int>    <char> <int> <int> <int>  <char> <char>
   1: DANT.19900110 19900110   10/1/90     7     9     9    DANT     WZ
   2: DANT.19900206 19900206    6/2/90    34    36    36    DANT     WZ
   3: DANT.19901212 19901212  12/12/90   343   345   345    DANT     WZ
   4: DANT.19910116 19910116 1/16/1991   378   380    15    DANT     WZ
   5: DANT.19910226 19910226 2/26/1991   419   421    56    DANT     WZ
  ---                                                                  
1702: ZUID.20040216 20040216 2/16/2004  5157  5159    46    ZUID     WZ
1703: ZUID.20041208 20041208   8/12/04  5453  5455   342    ZUID     WZ
1704: ZUID.20050119 20050119 1/19/2005  5495  5497    18    ZUID     WZ
1705: ZUID.20050218 20050218 2/18/2005  5525  5527    48    ZUID     WZ
1706: ZUID.20051212 20051212  12/12/05  5822  5824   345    ZUID     WZ
      31UE_ED50 31UN_ED50  Year Month Season Salinity Temperature CHLFa
          <num>     <num> <int> <int> <char>    <num>       <num> <num>
   1:  681379.6   5920571  1990     1 winter    29.19        4.00  1.30
   2:  681379.6   5920571  1990     2 winter    27.37        6.00    NA
   3:  681379.6   5920571  1990    12 winter    31.50        4.20 60.50
   4:  681379.6   5920571  1991     1 winter    20.83       -0.30  2.30
   5:  681379.6   5920571  1991     2 winter    28.06        3.90  3.52
  ---                                                                  
1702:  733386.3   5928197  2004     2 winter    24.70        4.40    NA
1703:  733386.3   5928197  2004    12 winter    28.67        4.72  1.52
1704:  733386.3   5928197  2005     1 winter    28.39        3.57  4.12
1705:  733386.3   5928197  2005     2 winter    29.31        1.31  6.56
1706:  733386.3   5928197  2005    12 winter    29.08        5.13  2.38

2. Extract All Winter Observations For Zone NC

winter_nc <- temperature[Season == "winter" & Area == "NC"]
winter_nc
            Sample     Date     DateNr dDay1 dDay2 dDay3 Station   Area
            <char>    <int>     <char> <int> <int> <int>  <char> <char>
  1: T100.19900103 19900103     3/1/90     0     2     2    T100     NC
  2: T100.19900205 19900205     5/2/90    33    35    35    T100     NC
  3: T100.19901218 19901218 12/18/1990   349   351   351    T100     NC
  4: T100.19910116 19910116  1/16/1991   378   380    15    T100     NC
  5: T100.19910205 19910205     5/2/91   398   400    35    T100     NC
 ---                                                                   
177: T235.20040209 20040209     9/2/04  5150  5152    39    T235     NC
178: T235.20041214 20041214 12/14/2004  5459  5461   348    T235     NC
179: T235.20050126 20050126  1/26/2005  5502  5504    25    T235     NC
180: T235.20050214 20050214  2/14/2005  5521  5523    44    T235     NC
181: T235.20051220 20051220 12/20/2005  5830  5832   353    T235     NC
     31UE_ED50 31UN_ED50  Year Month Season Salinity Temperature CHLFa
         <num>     <num> <int> <int> <char>    <num>       <num> <num>
  1:  587650.2   6001110  1990     1 winter    34.82        8.50  0.30
  2:  587650.2   6001110  1990     2 winter       NA          NA    NA
  3:  587650.2   6001110  1990    12 winter    34.80        9.20  0.40
  4:  587650.2   6001110  1991     1 winter    34.86        6.10  0.68
  5:  587650.2   6001110  1991     2 winter    34.53        5.20  0.34
 ---                                                                  
177:  510032.3   6114101  2004     2 winter       NA          NA    NA
178:  510032.3   6114101  2004    12 winter    34.96        8.47  0.62
179:  510032.3   6114101  2005     1 winter    35.09        6.44  0.62
180:  510032.3   6114101  2005     2 winter       NA          NA    NA
181:  510032.3   6114101  2005    12 winter    33.87        8.19  1.12

3. Select Only Area, Season, And Temperature

area_season_temperature <- temperature[, .(Area, Season, Temperature)]
area_season_temperature
        Area Season Temperature
      <char> <char>       <num>
   1:     WZ winter        4.00
   2:     WZ winter        6.00
   3:     WZ spring        7.30
   4:     WZ spring        8.20
   5:     WZ spring       17.40
  ---                          
8524:     WZ autumn       15.47
8525:     WZ autumn       13.45
8526:     WZ autumn       12.09
8527:     WZ autumn        9.03
8528:     WZ winter        5.13

4. Select Area And Temperature For Winter Observations

winter_area_temperature <- temperature[
  Season == "winter",
  .(Area, Temperature)
]

winter_area_temperature
        Area Temperature
      <char>       <num>
   1:     WZ        4.00
   2:     WZ        6.00
   3:     WZ        4.20
   4:     WZ       -0.30
   5:     WZ        3.90
  ---                   
1702:     WZ        4.40
1703:     WZ        4.72
1704:     WZ        3.57
1705:     WZ        1.31
1706:     WZ        5.13

5. Total Number Of Observations In Winter

n_winter <- temperature[Season == "winter", .N]
n_winter
[1] 1706

6. Mean Temperature And Mean Salinity In Winter

winter_means <- temperature[
  Season == "winter",
  .(
    mean_temperature = mean(Temperature, na.rm = TRUE),
    mean_salinity = mean(Salinity, na.rm = TRUE)
  )
]

winter_means
   mean_temperature mean_salinity
              <num>         <num>
1:          5.57162      29.15756

7. Number Of Observations Per Station In Winter

winter_station_counts <- temperature[
  Season == "winter",
  .N,
  keyby = Station
]

winter_station_counts
Key: <Station>
    Station     N
     <char> <int>
 1:    DANT    50
 2:    DREI    52
 3:      G6   101
 4:    GROO    50
 5:    HAMM    55
 6:    HANS    56
 7:    HUIB    50
 8:    LODS    54
 9:    MARS    49
10:     N02   115
11:     N10   131
12:     N20    50
13:     N70    50
14:     R03    32
15:    SOEL    50
16:    T004    97
17:    T010    45
18:    T100    45
19:    T135    46
20:    T175    45
21:    T235    45
22:    VLIS    84
23:     W02    99
24:     W20    47
25:     W70    47
26:    WISS    55
27:    ZIJP    54
28:    ZUID    52
    Station     N
     <char> <int>

8. Number Of Observations Per Station Per Season

station_season_counts <- temperature[
  ,
  .N,
  keyby = .(Station, Season)
]

station_season_counts
Key: <Station, Season>
     Station Season     N
      <char> <char> <int>
  1:    DANT autumn    72
  2:    DANT spring    89
  3:    DANT summer    89
  4:    DANT winter    50
  5:    DREI autumn    61
 ---                     
114:    ZIJP winter    54
115:    ZUID autumn    73
116:    ZUID spring    89
117:    ZUID summer    89
118:    ZUID winter    52

9. Average Temperatures By Month

average_temperature_month <- temperature[
  ,
  .(mean_temperature = mean(Temperature, na.rm = TRUE)),
  keyby = Month
]

average_temperature_month
Key: <Month>
    Month mean_temperature
    <int>            <num>
 1:     1         5.174210
 2:     2         4.737400
 3:     3         6.125961
 4:     4         8.702035
 5:     5        12.293479
 6:     6        15.659933
 7:     7        18.077343
 8:     8        19.388355
 9:     9        16.995974
10:    10        13.619670
11:    11         9.848891
12:    12         6.746339

10. Average Temperatures By Month By Area

average_temperature_month_area <- temperature[
  ,
  .(mean_temperature = mean(Temperature, na.rm = TRUE)),
  keyby = .(Area, Month)
]

average_temperature_month_area
Key: <Area, Month>
       Area Month mean_temperature
     <char> <int>            <num>
  1:     ED     1         3.086333
  2:     ED     2         3.306667
  3:     ED     3         5.648000
  4:     ED     4         9.470000
  5:     ED     5        14.025161
 ---                              
116:     WZ     8        19.548851
117:     WZ     9        15.905122
118:     WZ    10        12.288816
119:     WZ    11         8.261600
120:     WZ    12         4.985111

11. Plot Average Temperatures By Month By Area

ggplot(
  average_temperature_month_area,
  aes(x = Month, y = mean_temperature, color = Area)
) +
  geom_line(linewidth = 1) +
  geom_point(size = 1.5) +
  scale_x_continuous(breaks = 1:12) +
  labs(
    title = "Average Sea Temperature By Month And Area",
    x = "Month",
    y = "Average temperature (C)",
    color = "Area"
  ) +
  theme_minimal()