exercise11

Author

Hangu Lee

0. Setting

# Load the package
library(data.table)

Attaching package: 'data.table'
The following object is masked from 'package:base':

    %notin%
library(ggplot2)

1. Read in the data using fread

temp_data <- fread("./data/Temperature.csv")
temp_data
             Sample     Date     DateNr dDay1 dDay2 dDay3 Station   Area
             <char>    <int>     <char> <int> <int> <int>  <char> <char>
   1: DANT.19900110 19900110    10/1/90     7     9     9    DANT     WZ
   2: DANT.19900206 19900206     6/2/90    34    36    36    DANT     WZ
   3: DANT.19900308 19900308     8/3/90    64    66    66    DANT     WZ
   4: DANT.19900404 19900404     4/4/90    91    93    93    DANT     WZ
   5: DANT.19900509 19900509     9/5/90   126   128   128    DANT     WZ
  ---                                                                   
8524: ZUID.20050926 20050926  9/26/2005  5745  5747   268    ZUID     WZ
8525: ZUID.20051012 20051012   12/10/05  5761  5763   284    ZUID     WZ
8526: ZUID.20051027 20051027 10/27/2005  5776  5778   299    ZUID     WZ
8527: ZUID.20051110 20051110   10/11/05  5790  5792   313    ZUID     WZ
8528: ZUID.20051212 20051212   12/12/05  5822  5824   345    ZUID     WZ
      31UE_ED50 31UN_ED50  Year Month Season Salinity Temperature CHLFa
          <num>     <num> <int> <int> <char>    <num>       <num> <num>
   1:  681379.6   5920571  1990     1 winter    29.19        4.00  1.30
   2:  681379.6   5920571  1990     2 winter    27.37        6.00    NA
   3:  681379.6   5920571  1990     3 spring    24.99        7.30 21.10
   4:  681379.6   5920571  1990     4 spring    28.79        8.20 25.00
   5:  681379.6   5920571  1990     5 spring    33.28       17.40 10.20
  ---                                                                  
8524:  733386.3   5928197  2005     9 autumn    30.91       15.47 11.40
8525:  733386.3   5928197  2005    10 autumn    31.18       13.45  8.30
8526:  733386.3   5928197  2005    10 autumn    28.67       12.09  4.56
8527:  733386.3   5928197  2005    11 autumn    29.53        9.03  4.94
8528:  733386.3   5928197  2005    12 winter    29.08        5.13  2.38

2. Extract all winter observations

winter_obs <- temp_data[Season == "winter"]
head(winter_obs)
          Sample     Date     DateNr dDay1 dDay2 dDay3 Station   Area 31UE_ED50
          <char>    <int>     <char> <int> <int> <int>  <char> <char>     <num>
1: DANT.19900110 19900110    10/1/90     7     9     9    DANT     WZ  681379.6
2: DANT.19900206 19900206     6/2/90    34    36    36    DANT     WZ  681379.6
3: DANT.19901212 19901212   12/12/90   343   345   345    DANT     WZ  681379.6
4: DANT.19910116 19910116  1/16/1991   378   380    15    DANT     WZ  681379.6
5: DANT.19910226 19910226  2/26/1991   419   421    56    DANT     WZ  681379.6
6: DANT.19911219 19911219 12/19/1991   715   717   352    DANT     WZ  681379.6
   31UN_ED50  Year Month Season Salinity Temperature CHLFa
       <num> <int> <int> <char>    <num>       <num> <num>
1:   5920571  1990     1 winter    29.19         4.0  1.30
2:   5920571  1990     2 winter    27.37         6.0    NA
3:   5920571  1990    12 winter    31.50         4.2 60.50
4:   5920571  1991     1 winter    20.83        -0.3  2.30
5:   5920571  1991     2 winter    28.06         3.9  3.52
6:   5920571  1991    12 winter    25.31         3.9  3.50

3. Extract all winter observations for zone NC

winter_nc <- temp_data[Season == "winter" & Area == "NC"]
head(winter_nc)
          Sample     Date     DateNr dDay1 dDay2 dDay3 Station   Area 31UE_ED50
          <char>    <int>     <char> <int> <int> <int>  <char> <char>     <num>
1: T100.19900103 19900103     3/1/90     0     2     2    T100     NC  587650.2
2: T100.19900205 19900205     5/2/90    33    35    35    T100     NC  587650.2
3: T100.19901218 19901218 12/18/1990   349   351   351    T100     NC  587650.2
4: T100.19910116 19910116  1/16/1991   378   380    15    T100     NC  587650.2
5: T100.19910205 19910205     5/2/91   398   400    35    T100     NC  587650.2
6: T100.19911211 19911211   11/12/91   707   709   344    T100     NC  587650.2
   31UN_ED50  Year Month Season Salinity Temperature CHLFa
       <num> <int> <int> <char>    <num>       <num> <num>
1:   6001110  1990     1 winter    34.82         8.5  0.30
2:   6001110  1990     2 winter       NA          NA    NA
3:   6001110  1990    12 winter    34.80         9.2  0.40
4:   6001110  1991     1 winter    34.86         6.1  0.68
5:   6001110  1991     2 winter    34.53         5.2  0.34
6:   6001110  1991    12 winter    34.79         9.7  0.44

4. Select only the columns Area, Season and Temperature

cols_select1 <- temp_data[, .(Area, Season, Temperature)]
head(cols_select1)
     Area Season Temperature
   <char> <char>       <num>
1:     WZ winter         4.0
2:     WZ winter         6.0
3:     WZ spring         7.3
4:     WZ spring         8.2
5:     WZ spring        17.4
6:     WZ summer        18.1

5. Select only the columns Area and Temperature but only for winter observations

winter_cols <- temp_data[Season == "winter", .(Area, Temperature)]
head(winter_cols)
     Area Temperature
   <char>       <num>
1:     WZ         4.0
2:     WZ         6.0
3:     WZ         4.2
4:     WZ        -0.3
5:     WZ         3.9
6:     WZ         3.9

6. Find the total number of observations in winter

winter_count <- temp_data[Season == "winter", .N]
winter_count
[1] 1706

7. Calculate the mean temperature and mean salinity in winter (Note that there are missing values so will have to use na.rm = TRUE)

winter_means <- temp_data[Season == "winter", 
                          .(mean_temp = mean(Temperature, na.rm = TRUE), 
                            mean_sal  = mean(Salinity, na.rm = TRUE))]
winter_means
   mean_temp mean_sal
       <num>    <num>
1:   5.57162 29.15756

8. Find the number of observations per station in winter

station_winter_count <- temp_data[Season == "winter", .N, by = Station]
head(station_winter_count)
   Station     N
    <char> <int>
1:    DANT    50
2:    DREI    52
3:      G6   101
4:    GROO    50
5:    HAMM    55
6:    HANS    56

9. Find the number of observations per station per season

station_season_count <- temp_data[, .N, by = .(Station, Season)]
head(station_season_count)
   Station Season     N
    <char> <char> <int>
1:    DANT winter    50
2:    DANT spring    89
3:    DANT summer    89
4:    DANT autumn    72
5:    DREI winter    52
6:    DREI spring    92

10. Estimate average temperatures by month

avg_temp_month <- temp_data[, .(mean_temp = mean(Temperature, na.rm = TRUE)), by = Month]
avg_temp_month
    Month mean_temp
    <int>     <num>
 1:     1  5.174210
 2:     2  4.737400
 3:     3  6.125961
 4:     4  8.702035
 5:     5 12.293479
 6:     6 15.659933
 7:     7 18.077343
 8:     8 19.388355
 9:     9 16.995974
10:    10 13.619670
11:    11  9.848891
12:    12  6.746339

11. Estimate average temperatures by month by area

avg_temp_month_area <- temp_data[, 
                                 .(mean_temp = mean(Temperature, na.rm = TRUE)), 
                                 keyby = .(Month, Area)]
head(avg_temp_month_area)
Key: <Month, Area>
   Month   Area mean_temp
   <int> <char>     <num>
1:     1     ED  3.086333
2:     1     GM  4.308750
3:     1     KZ  5.296222
4:     1     NC  6.789808
5:     1     NZ  8.122000
6:     1     OS  4.868028

12. Plot the output of the previous question using ggplot2 using the geom_line() geometry

ggplot(avg_temp_month_area, aes(x = Month, y = mean_temp, col = Area)) +
  geom_line() +
  labs(title = "Average Temperature by Month and Area",
       x = "Month",
       y = "Mean Temperature")