Data

Author

Jal Vashi (N0990629)

For Objective 1: Is there a correlation between air temperature and sea surface temperature?

# readxl is used to read the excel file stored in the local computer drive. 
# ggplot2 is used to visualise the data set. 

library(readxl)
library(ggplot2)

height <- read_excel("/Users/jal4510/Desktop/expt/height,leaf.xlsx", sheet = "Jal_30th_1st_20250722_PlantEye")

# head is used to view the first 5-10 data points to ensure the data is loaded correctly
# Summery is used to summarise the variables and provide class, mean, and other basic statistical analyses. 

head(height)

# A tibble: 6 × 8
  unit                timestamp           treatment   Height_max Leaf_Area  SPAD
  <dttm>              <dttm>              <chr>            <dbl>     <dbl> <dbl>
1 1899-12-31 10:01:01 2025-06-30 11:19:17 Control_Me…       177.     2892.  26.9
2 1899-12-31 10:01:01 2025-07-07 15:38:39 Control_Me…       177.     4753.  31.1
3 1899-12-31 10:01:01 2025-07-14 11:54:04 Control_Me…       177.     4533.  35.7
4 1899-12-31 10:01:01 2025-07-21 14:04:57 Control_Me…       177.     6263.  35.3
5 1899-12-31 10:01:02 2025-06-30 11:19:17 Control_Me…       177.     2161.  29.4
6 1899-12-31 10:01:02 2025-07-07 15:38:39 Control_Me…       177.     5982.  28.5
# ℹ 2 more variables: Vit_C <dbl>, Weight <dbl>

summary(height)

      unit                          timestamp                     
 Min.   :1899-12-31 10:01:01.00   Min.   :2025-06-30 11:19:17.00  
 1st Qu.:1899-12-31 11:46:46.25   1st Qu.:2025-07-05 20:57:16.25  
 Median :1899-12-31 13:32:31.50   Median :2025-07-11 02:14:40.50  
 Mean   :1899-12-31 13:32:31.50   Mean   :2025-07-11 01:48:56.80  
 3rd Qu.:1899-12-31 15:18:16.75   3rd Qu.:2025-07-16 07:18:31.50  
 Max.   :1899-12-31 17:04:02.00   Max.   :2025-07-21 15:11:25.00  
                                                                  
  treatment           Height_max       Leaf_Area          SPAD      
 Length:256         Min.   : 28.91   Min.   :    0   Min.   : 2.10  
 Class :character   1st Qu.: 62.16   1st Qu.: 2123   1st Qu.:22.98  
 Mode  :character   Median : 78.81   Median : 3143   Median :29.80  
                    Mean   : 97.78   Mean   : 3637   Mean   :30.72  
                    3rd Qu.:136.15   3rd Qu.: 4742   3rd Qu.:39.27  
                    Max.   :178.01   Max.   :11092   Max.   :55.00  
                                                                    
     Vit_C             Weight     
 Min.   :  43.40   Min.   :0.100  
 1st Qu.:  69.83   1st Qu.:1.350  
 Median :  97.10   Median :2.450  
 Mean   : 153.24   Mean   :2.474  
 3rd Qu.: 163.62   3rd Qu.:3.325  
 Max.   :1174.00   Max.   :5.800  
 NA's   :192       NA's   :192

# sapply is used to identify the data type of the column in a data-set. 
sapply (height, class)

$unit
[1] "POSIXct" "POSIXt" 

$timestamp
[1] "POSIXct" "POSIXt" 

$treatment
[1] "character"

$Height_max
[1] "numeric"

$Leaf_Area
[1] "numeric"

$SPAD
[1] "numeric"

$Vit_C
[1] "numeric"

$Weight
[1] "numeric"

# str is to understand the kind of data, including rows and columns provided in the data set. 
str (height)

tibble [256 × 8] (S3: tbl_df/tbl/data.frame)
 $ unit      : POSIXct[1:256], format: "1899-12-31 10:01:01" "1899-12-31 10:01:01" ...
 $ timestamp : POSIXct[1:256], format: "2025-06-30 11:19:17" "2025-07-07 15:38:39" ...
 $ treatment : chr [1:256] "Control_Medania" "Control_Medania" "Control_Medania" "Control_Medania" ...
 $ Height_max: num [1:256] 177 177 177 177 177 ...
 $ Leaf_Area : num [1:256] 2892 4753 4533 6263 2161 ...
 $ SPAD      : num [1:256] 26.9 31.1 35.7 35.3 29.4 28.5 28.1 39.5 24.7 34.4 ...
 $ Vit_C     : num [1:256] NA NA NA 182 NA ...
 $ Weight    : num [1:256] NA NA NA 1 NA NA NA 1.4 NA NA ...

library(nortest)

# Perform the Anderson-Darling test for normality for different variables
height.norm <- ad.test(height$Height_max)
print(height.norm)


    Anderson-Darling normality test

data:  height$Height_max
A = 21.576, p-value < 2.2e-16

Leaf_Area.norm <- ad.test(height$Leaf_Area)
print(Leaf_Area.norm)


    Anderson-Darling normality test

data:  height$Leaf_Area
A = 5.1314, p-value = 1.036e-12

height$treatment <- as.factor(height$treatment)

result <- aov(Height_max ~ treatment, data = height)
summary(result)

             Df Sum Sq Mean Sq F value Pr(>F)
treatment     7   6881   982.9   0.412  0.894
Residuals   248 591801  2386.3

boxplot(Height_max ~ treatment, data = height,
        main = "Height_max by Treatment",
        xlab = "Treatment",
        ylab = "Maximum Height",
        col = "lightgreen",
        border = "darkgreen",
        las = 2,             # rotate axis labels vertically
        cex.axis = 0.8)      # shrink axis label font size

# Ensure treatment is a factor
height$treatment <- as.factor(height$treatment)  

# Run ANOVA
result <- aov(Leaf_Area ~ treatment, data = height)
summary(result)

             Df    Sum Sq  Mean Sq F value   Pr(>F)    
treatment     7 134581492 19225927   5.082 2.07e-05 ***
Residuals   248 938159041  3782899                     
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Boxplot of Leaf_Area by treatment
boxplot(Leaf_Area ~ treatment, data = height,
        main = "Leaf Area by Treatment",
        xlab = "Treatment",
        ylab = "Leaf Area",
        col = "lightblue",
        border = "darkblue",
        las = 2,            # rotate axis labels vertically
        cex.axis = 0.8)     # adjust axis text size

# Ensure treatment is a factor
height$treatment <- as.factor(height$treatment)

# Run ANOVA
result <- aov(SPAD ~ treatment, data = height)
summary(result)

             Df Sum Sq Mean Sq F value   Pr(>F)    
treatment     7   4604   657.7   6.636 3.34e-07 ***
Residuals   248  24581    99.1                     
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Boxplot of Leaf_Area by treatment
boxplot(SPAD ~ treatment, data = height,
        main = "SPAD by Treatment",           # Updated title to match the variable
        xlab = "Treatment",
        ylab = "SPAD",                        # Updated y-label
        col = "lightcoral",                   # Valid color name
        border = "darkred",
        las = 2,                              # Rotate axis labels vertically
        cex.axis = 0.8)                       # Adjust axis text size

# Ensure treatment is a factor
height$treatment <- as.factor(height$treatment)

# Run ANOVA
result <- aov(Vit_C ~ treatment, data = height)
summary(result)

            Df  Sum Sq Mean Sq F value Pr(>F)
treatment    7  308218   44031   1.275  0.279
Residuals   56 1933634   34529               
192 observations deleted due to missingness

# Load required package
library(dplyr)


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

# Set up 1 row and 2 columns for plotting side-by-side
par(mfrow = c(1, 2))  # optional: reset with par(mfrow = c(1, 1)) later

# Boxplot of Vit_C by treatment
boxplot(Vit_C ~ treatment, data = height,
        main = "Vit_C by Treatment",
        xlab = "Treatment",
        ylab = "Vit_C",
        col = "gray",
        border = "black",
        las = 2,
        cex.axis = 0.8)

# Calculate mean Vit_C per treatment
means <- height %>%
  group_by(treatment) %>%
  summarise(mean_vitc = mean(Vit_C, na.rm = TRUE))

# Ensure treatment order matches the factor levels
means$treatment <- factor(means$treatment, levels = levels(height$treatment))

# Line graph of mean Vit_C per treatment
plot(means$treatment, means$mean_vitc,
     type = "b",                    # Both points and lines
     main = "Mean Vit_C per Treatment",
     xlab = "Treatment",
     ylab = "Mean Vit_C",
     col = "darkred",
     pch = 16,                      # Solid circle point
     lwd = 2,                       # Line width
     cex = 1.2)                     # Point size