Module11_Exercise: Data Manipulation

Author

Cienna Kim

Published

June 17, 2026

Setup

# Load the package
library(data.table)

Attaching package: 'data.table'
The following object is masked from 'package:base':

    %notin%
library(ggplot2)

1. Read in the Data Using Fread

temp <- fread("./data/Temperature.csv")
temp
             Sample     Date     DateNr dDay1 dDay2 dDay3 Station   Area
             <char>    <int>     <char> <int> <int> <int>  <char> <char>
   1: DANT.19900110 19900110    10/1/90     7     9     9    DANT     WZ
   2: DANT.19900206 19900206     6/2/90    34    36    36    DANT     WZ
   3: DANT.19900308 19900308     8/3/90    64    66    66    DANT     WZ
   4: DANT.19900404 19900404     4/4/90    91    93    93    DANT     WZ
   5: DANT.19900509 19900509     9/5/90   126   128   128    DANT     WZ
  ---                                                                   
8524: ZUID.20050926 20050926  9/26/2005  5745  5747   268    ZUID     WZ
8525: ZUID.20051012 20051012   12/10/05  5761  5763   284    ZUID     WZ
8526: ZUID.20051027 20051027 10/27/2005  5776  5778   299    ZUID     WZ
8527: ZUID.20051110 20051110   10/11/05  5790  5792   313    ZUID     WZ
8528: ZUID.20051212 20051212   12/12/05  5822  5824   345    ZUID     WZ
      31UE_ED50 31UN_ED50  Year Month Season Salinity Temperature CHLFa
          <num>     <num> <int> <int> <char>    <num>       <num> <num>
   1:  681379.6   5920571  1990     1 winter    29.19        4.00  1.30
   2:  681379.6   5920571  1990     2 winter    27.37        6.00    NA
   3:  681379.6   5920571  1990     3 spring    24.99        7.30 21.10
   4:  681379.6   5920571  1990     4 spring    28.79        8.20 25.00
   5:  681379.6   5920571  1990     5 spring    33.28       17.40 10.20
  ---                                                                  
8524:  733386.3   5928197  2005     9 autumn    30.91       15.47 11.40
8525:  733386.3   5928197  2005    10 autumn    31.18       13.45  8.30
8526:  733386.3   5928197  2005    10 autumn    28.67       12.09  4.56
8527:  733386.3   5928197  2005    11 autumn    29.53        9.03  4.94
8528:  733386.3   5928197  2005    12 winter    29.08        5.13  2.38

2. Extract All Winter Observations

winter_obs <- temp[Season == "winter"]

3. Extract All Winter Observations for Zone NC

winter_nc <- temp[Season == "winter" & Area == "NC"]

4. Select Only the Columns Area, Season and Temperature

cols_select1 <- temp[, .(Area, Season, Temperature)]

5. Select Only Area and Temperature but Only for Winter Observations

winter_cols <- temp[Season == "winter", .(Area, Temperature)]

6. Find Total Number of Observations in Winter

winter_count <- temp[Season == "winter", .N]
winter_count
[1] 1706

7. Calculate Mean Temperature and Mean Salinity in Winter (Note that there are missing values so will have to use na.rm = TRUE)

winter_means <- temp[Season == "winter", 
                          .(mean_temp = mean(Temperature, na.rm = TRUE), 
                            mean_sal  = mean(Salinity, na.rm = TRUE))]
winter_means
   mean_temp mean_sal
       <num>    <num>
1:   5.57162 29.15756

8. Find Number of Observations Per Station in Winter

station_winter_count <- temp[Season == "winter", .N, by = Station]

9. Find Number of Observations Per Station Per Season

station_season_count <- temp[, .N, by = .(Station, Season)]

10. Estimate Average Temperatures by Month

month_avg <- temp[, .(mean_temp = mean(Temperature, na.rm = TRUE)), by = Month]
month_avg
    Month mean_temp
    <int>     <num>
 1:     1  5.174210
 2:     2  4.737400
 3:     3  6.125961
 4:     4  8.702035
 5:     5 12.293479
 6:     6 15.659933
 7:     7 18.077343
 8:     8 19.388355
 9:     9 16.995974
10:    10 13.619670
11:    11  9.848891
12:    12  6.746339

11. Estimate Average Temperatures by Month by Area

month_area_avg <- temp[, 
                                 .(mean_temp = mean(Temperature, na.rm = TRUE)), 
                                 keyby = .(Month, Area)]

12. Plot Output of Previous Question

ggplot(month_area_avg, aes(x = Month, y = mean_temp, col = Area)) +
  geom_line() +
  labs(title = "Average Temperature by Month and Area",
       x = "Month",
       y = "Mean Temperature") +
  theme_minimal()