##Data cleaning
getwd()
## [1] "C:/Users/YSELoaner/Desktop"
A<-read.csv("D:/oct2023/All initial data set.csv")
Format variables name A2<-clean_names(A)
remove empty rows and column A3<-remove_empty(A2, which = c (“rows”,“cols”), quiet = FALSE)
Duplicate rows remove A4<-distinct(A3)
remove Na A5<-na.omit(A4)
A5\(FW <- (A5\)fw1 + A5\(fw2 + A5\)fw3 + A5\(fw4 + A5\)fw5) / 5
dw1,.. & la1,… covert to non numeric
A5\(dw1<-as.numeric(A5\)dw1) A5\(dw2<-as.numeric(A5\)dw2) A5\(dw3<-as.numeric(A5\)dw3) A5\(dw4<-as.numeric(A5\)dw4) A5\(dw5<-as.numeric(A5\)dw5) A5\(DW <- (A5\)dw1 + A5\(dw2 + A5\)dw3 + A5\(dw4 + A5\)dw5) / 5 A5\(LT <- (A5\)lt1 + A5\(lt2 + A5\)lt3 + A5\(lt4 + A5\)lt5) / 5 A5\(LA <- (A5\)la1 + A5\(la2 + A5\)la3 + A5\(la4 + A5\)la5) / 5 A5\(la1<-as.numeric(A5\)la1) A5\(la2<-as.numeric(A5\)la2) A5\(la3<-as.numeric(A5\)la3) A5\(la4<-as.numeric(A5\)la4) A5\(la5<-as.numeric(A5\)la5) A5\(SLA <- (A5\)LA / A5\(DW ) A5\)LDMC<-(A5\(DW/A5\)FW)
Save the data frame A5 to a CSV file
write.csv(A5, file = “D:/oct2023/average.csv”, row.names = FALSE)
select sp,SLA,LT,LA,LDMC
A6<-select(sp,SLA,LA,LT,LDMC)
Remove outliers FROM SLA
remove_outliers <- function(A6, SLA) { Q1 <- quantile(A6[[SLA]], 0.25, na.rm = TRUE) Q3 <- quantile(A6[[SLA]], 0.75, na.rm = TRUE) IQR_value <- Q3 - Q1 lower_bound <- Q1 - 1.5 * IQR_value upper_bound <- Q3 + 1.5 * IQR_value data_no_outliers <- A6[A6[[SLA]] >= lower_bound & A6[[SLA]] <= upper_bound, ] return(data_no_outliers) }
A6_cleaned <- remove_outliers(A6, “SLA”)
LA
remove_outliers <- function(A6, LA) { Q1 <- quantile(A6[[LA]], 0.25, na.rm = TRUE) Q3 <- quantile(A6[[LA]], 0.75, na.rm = TRUE) IQR_value <- Q3 - Q1 lower_bound <- Q1 - 1.5 * IQR_value upper_bound <- Q3 + 1.5 * IQR_value data_no_outliers <- A6[A6[[LA]] >= lower_bound & A6[[LA]] <= upper_bound, ] return(data_no_outliers) }
A6_cleanedLA <- remove_outliers(A6, “LA”)
LT
remove_outliers <- function(A6, LT) { Q1 <- quantile(A6[[LT]], 0.25, na.rm = TRUE) Q3 <- quantile(A6[[LT]], 0.75, na.rm = TRUE) IQR_value <- Q3 - Q1 lower_bound <- Q1 - 1.5 * IQR_value upper_bound <- Q3 + 1.5 * IQR_value data_no_outliers <- A6[A6[[LT]] >= lower_bound & A6[[LT]] <= upper_bound, ] return(data_no_outliers) }
A6_cleanedLT <- remove_outliers(A6, “LT”)
LDMC
remove_outliers <- function(A6, LDMC) { Q1 <- quantile(A6[[LDMC]], 0.25, na.rm = TRUE) Q3 <- quantile(A6[[LDMC]], 0.75, na.rm = TRUE) IQR_value <- Q3 - Q1 lower_bound <- Q1 - 1.5 * IQR_value upper_bound <- Q3 + 1.5 * IQR_value data_no_outliers <- A6[A6[[LDMC]] >= lower_bound & A6[[LDMC]] <= upper_bound, ] return(data_no_outliers) }
A6_cleanedLDMC <- remove_outliers(A6, “LDMC”)
Save file Each files each column outliers are removed
write.csv(A6_cleaned, file = “D:/oct2023/Cleane_sla.csv”, row.names = FALSE) write.csv(A6_cleanedLA, file = “D:/oct2023/Cleane_la.csv”, row.names = FALSE) write.csv(A6_cleanedLT, file = “D:/oct2023/Cleane_lt.csv”, row.names = FALSE) write.csv(A6_cleanedLDMC, file = “D:/oct2023/Cleane_ldmc.csv”, row.names = FALSE)
SLA = 32.09648-241.3107 LA = 8.5328-183.8428 LT = 0.0844-0.3372 LDMC=0.15247-0.643027
##without remving outliers ggplot(data = A6, aes(x = sp, y =SLA, fill = sp)) + geom_boxplot()