##Data cleaning

getwd()
## [1] "C:/Users/YSELoaner/Desktop"
A<-read.csv("D:/oct2023/All initial data set.csv")

Format variables name A2<-clean_names(A)

remove empty rows and column A3<-remove_empty(A2, which = c (“rows”,“cols”), quiet = FALSE)

Duplicate rows remove A4<-distinct(A3)

remove Na A5<-na.omit(A4)

A5\(FW <- (A5\)fw1 + A5\(fw2 + A5\)fw3 + A5\(fw4 + A5\)fw5) / 5

dw1,.. & la1,… covert to non numeric

A5\(dw1<-as.numeric(A5\)dw1) A5\(dw2<-as.numeric(A5\)dw2) A5\(dw3<-as.numeric(A5\)dw3) A5\(dw4<-as.numeric(A5\)dw4) A5\(dw5<-as.numeric(A5\)dw5) A5\(DW <- (A5\)dw1 + A5\(dw2 + A5\)dw3 + A5\(dw4 + A5\)dw5) / 5 A5\(LT <- (A5\)lt1 + A5\(lt2 + A5\)lt3 + A5\(lt4 + A5\)lt5) / 5 A5\(LA <- (A5\)la1 + A5\(la2 + A5\)la3 + A5\(la4 + A5\)la5) / 5 A5\(la1<-as.numeric(A5\)la1) A5\(la2<-as.numeric(A5\)la2) A5\(la3<-as.numeric(A5\)la3) A5\(la4<-as.numeric(A5\)la4) A5\(la5<-as.numeric(A5\)la5) A5\(SLA <- (A5\)LA / A5\(DW ) A5\)LDMC<-(A5\(DW/A5\)FW)

Save the data frame A5 to a CSV file

write.csv(A5, file = “D:/oct2023/average.csv”, row.names = FALSE)

select sp,SLA,LT,LA,LDMC

A6<-select(sp,SLA,LA,LT,LDMC)

Remove outliers FROM SLA

remove_outliers <- function(A6, SLA) { Q1 <- quantile(A6[[SLA]], 0.25, na.rm = TRUE) Q3 <- quantile(A6[[SLA]], 0.75, na.rm = TRUE) IQR_value <- Q3 - Q1 lower_bound <- Q1 - 1.5 * IQR_value upper_bound <- Q3 + 1.5 * IQR_value data_no_outliers <- A6[A6[[SLA]] >= lower_bound & A6[[SLA]] <= upper_bound, ] return(data_no_outliers) }

A6_cleaned <- remove_outliers(A6, “SLA”)

LA

remove_outliers <- function(A6, LA) { Q1 <- quantile(A6[[LA]], 0.25, na.rm = TRUE) Q3 <- quantile(A6[[LA]], 0.75, na.rm = TRUE) IQR_value <- Q3 - Q1 lower_bound <- Q1 - 1.5 * IQR_value upper_bound <- Q3 + 1.5 * IQR_value data_no_outliers <- A6[A6[[LA]] >= lower_bound & A6[[LA]] <= upper_bound, ] return(data_no_outliers) }

A6_cleanedLA <- remove_outliers(A6, “LA”)

LT

remove_outliers <- function(A6, LT) { Q1 <- quantile(A6[[LT]], 0.25, na.rm = TRUE) Q3 <- quantile(A6[[LT]], 0.75, na.rm = TRUE) IQR_value <- Q3 - Q1 lower_bound <- Q1 - 1.5 * IQR_value upper_bound <- Q3 + 1.5 * IQR_value data_no_outliers <- A6[A6[[LT]] >= lower_bound & A6[[LT]] <= upper_bound, ] return(data_no_outliers) }

A6_cleanedLT <- remove_outliers(A6, “LT”)

LDMC

remove_outliers <- function(A6, LDMC) { Q1 <- quantile(A6[[LDMC]], 0.25, na.rm = TRUE) Q3 <- quantile(A6[[LDMC]], 0.75, na.rm = TRUE) IQR_value <- Q3 - Q1 lower_bound <- Q1 - 1.5 * IQR_value upper_bound <- Q3 + 1.5 * IQR_value data_no_outliers <- A6[A6[[LDMC]] >= lower_bound & A6[[LDMC]] <= upper_bound, ] return(data_no_outliers) }

A6_cleanedLDMC <- remove_outliers(A6, “LDMC”)

Save file Each files each column outliers are removed

write.csv(A6_cleaned, file = “D:/oct2023/Cleane_sla.csv”, row.names = FALSE) write.csv(A6_cleanedLA, file = “D:/oct2023/Cleane_la.csv”, row.names = FALSE) write.csv(A6_cleanedLT, file = “D:/oct2023/Cleane_lt.csv”, row.names = FALSE) write.csv(A6_cleanedLDMC, file = “D:/oct2023/Cleane_ldmc.csv”, row.names = FALSE)

According outliers removin data (range of each traits)

SLA = 32.09648-241.3107 LA = 8.5328-183.8428 LT = 0.0844-0.3372 LDMC=0.15247-0.643027

##without remving outliers ggplot(data = A6, aes(x = sp, y =SLA, fill = sp)) + geom_boxplot()