# readxl is used to read the excel file stored in the local computer drive.
# ggplot2 is used to visualise the data set.
library(readxl)
library(ggplot2)Data
For Objective 1: Is there a correlation between air temperature and sea surface temperature?
height <- read_excel("/Users/jal4510/Desktop/expt/height,leaf.xlsx", sheet = "Jal_30th_1st_20250722_PlantEye")# head is used to view the first 5-10 data points to ensure the data is loaded correctly
# Summery is used to summarise the variables and provide class, mean, and other basic statistical analyses.
head(height)# A tibble: 6 × 8
unit timestamp treatment Height_max Leaf_Area SPAD
<dttm> <dttm> <chr> <dbl> <dbl> <dbl>
1 1899-12-31 10:01:01 2025-06-30 11:19:17 Control_Me… 177. 2892. 26.9
2 1899-12-31 10:01:01 2025-07-07 15:38:39 Control_Me… 177. 4753. 31.1
3 1899-12-31 10:01:01 2025-07-14 11:54:04 Control_Me… 177. 4533. 35.7
4 1899-12-31 10:01:01 2025-07-21 14:04:57 Control_Me… 177. 6263. 35.3
5 1899-12-31 10:01:02 2025-06-30 11:19:17 Control_Me… 177. 2161. 29.4
6 1899-12-31 10:01:02 2025-07-07 15:38:39 Control_Me… 177. 5982. 28.5
# ℹ 2 more variables: Vit_C <dbl>, Weight <dbl>
summary(height) unit timestamp
Min. :1899-12-31 10:01:01.00 Min. :2025-06-30 11:19:17.00
1st Qu.:1899-12-31 11:46:46.25 1st Qu.:2025-07-05 20:57:16.25
Median :1899-12-31 13:32:31.50 Median :2025-07-11 02:14:40.50
Mean :1899-12-31 13:32:31.50 Mean :2025-07-11 01:48:56.80
3rd Qu.:1899-12-31 15:18:16.75 3rd Qu.:2025-07-16 07:18:31.50
Max. :1899-12-31 17:04:02.00 Max. :2025-07-21 15:11:25.00
treatment Height_max Leaf_Area SPAD
Length:256 Min. : 28.91 Min. : 0 Min. : 2.10
Class :character 1st Qu.: 62.16 1st Qu.: 2123 1st Qu.:22.98
Mode :character Median : 78.81 Median : 3143 Median :29.80
Mean : 97.78 Mean : 3637 Mean :30.72
3rd Qu.:136.15 3rd Qu.: 4742 3rd Qu.:39.27
Max. :178.01 Max. :11092 Max. :55.00
Vit_C Weight
Min. : 43.40 Min. :0.100
1st Qu.: 69.83 1st Qu.:1.350
Median : 97.10 Median :2.450
Mean : 153.24 Mean :2.474
3rd Qu.: 163.62 3rd Qu.:3.325
Max. :1174.00 Max. :5.800
NA's :192 NA's :192
# sapply is used to identify the data type of the column in a data-set.
sapply (height, class)$unit
[1] "POSIXct" "POSIXt"
$timestamp
[1] "POSIXct" "POSIXt"
$treatment
[1] "character"
$Height_max
[1] "numeric"
$Leaf_Area
[1] "numeric"
$SPAD
[1] "numeric"
$Vit_C
[1] "numeric"
$Weight
[1] "numeric"
# str is to understand the kind of data, including rows and columns provided in the data set.
str (height)tibble [256 × 8] (S3: tbl_df/tbl/data.frame)
$ unit : POSIXct[1:256], format: "1899-12-31 10:01:01" "1899-12-31 10:01:01" ...
$ timestamp : POSIXct[1:256], format: "2025-06-30 11:19:17" "2025-07-07 15:38:39" ...
$ treatment : chr [1:256] "Control_Medania" "Control_Medania" "Control_Medania" "Control_Medania" ...
$ Height_max: num [1:256] 177 177 177 177 177 ...
$ Leaf_Area : num [1:256] 2892 4753 4533 6263 2161 ...
$ SPAD : num [1:256] 26.9 31.1 35.7 35.3 29.4 28.5 28.1 39.5 24.7 34.4 ...
$ Vit_C : num [1:256] NA NA NA 182 NA ...
$ Weight : num [1:256] NA NA NA 1 NA NA NA 1.4 NA NA ...
library(nortest)
# Perform the Anderson-Darling test for normality for different variables
height.norm <- ad.test(height$Height_max)
print(height.norm)
Anderson-Darling normality test
data: height$Height_max
A = 21.576, p-value < 2.2e-16
Leaf_Area.norm <- ad.test(height$Leaf_Area)
print(Leaf_Area.norm)
Anderson-Darling normality test
data: height$Leaf_Area
A = 5.1314, p-value = 1.036e-12
height$treatment <- as.factor(height$treatment)
result <- aov(Height_max ~ treatment, data = height)
summary(result) Df Sum Sq Mean Sq F value Pr(>F)
treatment 7 6881 982.9 0.412 0.894
Residuals 248 591801 2386.3
boxplot(Height_max ~ treatment, data = height,
main = "Height_max by Treatment",
xlab = "Treatment",
ylab = "Maximum Height",
col = "lightgreen",
border = "darkgreen",
las = 2, # rotate axis labels vertically
cex.axis = 0.8) # shrink axis label font size# Ensure treatment is a factor
height$treatment <- as.factor(height$treatment)
# Run ANOVA
result <- aov(Leaf_Area ~ treatment, data = height)
summary(result) Df Sum Sq Mean Sq F value Pr(>F)
treatment 7 134581492 19225927 5.082 2.07e-05 ***
Residuals 248 938159041 3782899
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Boxplot of Leaf_Area by treatment
boxplot(Leaf_Area ~ treatment, data = height,
main = "Leaf Area by Treatment",
xlab = "Treatment",
ylab = "Leaf Area",
col = "lightblue",
border = "darkblue",
las = 2, # rotate axis labels vertically
cex.axis = 0.8) # adjust axis text size# Ensure treatment is a factor
height$treatment <- as.factor(height$treatment)
# Run ANOVA
result <- aov(SPAD ~ treatment, data = height)
summary(result) Df Sum Sq Mean Sq F value Pr(>F)
treatment 7 4604 657.7 6.636 3.34e-07 ***
Residuals 248 24581 99.1
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Boxplot of Leaf_Area by treatment
boxplot(SPAD ~ treatment, data = height,
main = "SPAD by Treatment", # Updated title to match the variable
xlab = "Treatment",
ylab = "SPAD", # Updated y-label
col = "lightcoral", # Valid color name
border = "darkred",
las = 2, # Rotate axis labels vertically
cex.axis = 0.8) # Adjust axis text size# Ensure treatment is a factor
height$treatment <- as.factor(height$treatment)
# Run ANOVA
result <- aov(Vit_C ~ treatment, data = height)
summary(result) Df Sum Sq Mean Sq F value Pr(>F)
treatment 7 308218 44031 1.275 0.279
Residuals 56 1933634 34529
192 observations deleted due to missingness
# Load required package
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
# Set up 1 row and 2 columns for plotting side-by-side
par(mfrow = c(1, 2)) # optional: reset with par(mfrow = c(1, 1)) later
# Boxplot of Vit_C by treatment
boxplot(Vit_C ~ treatment, data = height,
main = "Vit_C by Treatment",
xlab = "Treatment",
ylab = "Vit_C",
col = "gray",
border = "black",
las = 2,
cex.axis = 0.8)
# Calculate mean Vit_C per treatment
means <- height %>%
group_by(treatment) %>%
summarise(mean_vitc = mean(Vit_C, na.rm = TRUE))
# Ensure treatment order matches the factor levels
means$treatment <- factor(means$treatment, levels = levels(height$treatment))
# Line graph of mean Vit_C per treatment
plot(means$treatment, means$mean_vitc,
type = "b", # Both points and lines
main = "Mean Vit_C per Treatment",
xlab = "Treatment",
ylab = "Mean Vit_C",
col = "darkred",
pch = 16, # Solid circle point
lwd = 2, # Line width
cex = 1.2) # Point size