Part 1 Variation Graphs:
#have to use this to have the plots next to each other
par(mfrow=c(1, 2))
#Esmerald
df<-read_excel("ESMERALD.xlsx")
## New names:
## • `voucher` -> `voucher...8`
## • `voucher` -> `voucher...9`
## • `voucher` -> `voucher...10`
## • `voucher` -> `voucher...11`
## • `voucher` -> `voucher...12`
## • `STEMDBH` -> `STEMDBH...15`
## • `STEMDBH` -> `STEMDBH...16`
## • `STEMDBH` -> `STEMDBH...17`
## • `STEMDBH` -> `STEMDBH...18`
## • `STEMDBH` -> `STEMDBH...19`
## • `STEMDBH` -> `STEMDBH...20`
## • `STEMDBH` -> `STEMDBH...21`
## • `STEMDBH` -> `STEMDBH...22`
## • `STEMDBH` -> `STEMDBH...23`
df$Line<-as.numeric(df$Line)
df<-df[order(df$Line),]
#cleans up missing values
df <- df[!is.na(df[,"N(IND)"]),]
df_line <- aggregate(df[,"N(IND)"],list(Line=df$Line),sum)
names(df_line)[2] <- "N(IND)"
dbh_cols <- grep("STEMDBH",colnames(df))
#set the columns to numeric so the calculations actually work (this caused many errors)
df[dbh_cols] <- lapply(df[dbh_cols],as.numeric)
#calculations - given from lab 3 as well
df[, "DBHsum"] <- rowSums(df[,dbh_cols], na.rm=TRUE)
df_line[,"DBHsum"] <- aggregate(df$DBHsum,list(df$Line),sum)$x
df_line[,"DBHmean"] <- df_line[,"DBHsum"]/df_line[,"N(IND)"]
#chose green this time for a more forest-like vibe :)
barplot(DBHmean~Line,data=df_line,main="Esmerald Variation",col="lightgreen",ylim=c(0, 14))
#Ducke
df <- read_excel("DUCKE.xlsx")
## New names:
## • `voucher` -> `voucher...8`
## • `voucher` -> `voucher...9`
## • `voucher` -> `voucher...10`
## • `voucher` -> `voucher...11`
## • `voucher` -> `voucher...12`
## • `STEMDBH` -> `STEMDBH...15`
## • `STEMDBH` -> `STEMDBH...16`
## • `STEMDBH` -> `STEMDBH...17`
## • `STEMDBH` -> `STEMDBH...18`
## • `STEMDBH` -> `STEMDBH...19`
## • `STEMDBH` -> `STEMDBH...20`
## • `STEMDBH` -> `STEMDBH...21`
## • `STEMDBH` -> `STEMDBH...22`
## • `STEMDBH` -> `STEMDBH...23`
## • `STEMDBH` -> `STEMDBH...24`
df$Line <- as.numeric(df$Line)
df <- df[order(df$Line),]
#cleans up missing values
df <- df[!is.na(df[,"N(IND)"]),]
df_line <- aggregate(df[,"N(IND)"],list(Line=df$Line), sum)
names(df_line)[2] <- "N(IND)"
dbh_cols <- grep("STEMDBH",colnames(df))
#set the columns to numeric
df[dbh_cols]<-lapply(df[dbh_cols],as.numeric)
#calculations - given from lab 3
df[,"DBHsum"]<-rowSums(df[,dbh_cols],na.rm=TRUE)
df_line[,"DBHsum"]<-aggregate(df$DBHsum,list(df$Line),sum)$x
df_line[,"DBHmean"]<-df_line[,"DBHsum"]/df_line[,"N(IND)"]
#outputs the barplot
barplot(DBHmean~Line,data=df_line,main="Ducke Variation",col="forestgreen",ylim=c(0,14))

Part 2 Ingesting Weather Data:
#Esmerald
EsmeraldData<-read_excel("ESMWeather.xlsx")
#the first iteration of the code did not include the numbered brackets for the columns. I used the
#header names but for some reason this only worked once, and after re-running the code and also
#running it through "source" it didn't work for some reason so I just directly reference call the
#column number in place of the actual names for the temperature and rain data for example.
#makes columns strictly numerical
EsmeraldData[[2]]<-as.numeric(EsmeraldData[[2]])
EsmeraldData[[3]]<-as.numeric(EsmeraldData[[3]])
#averages for Esmerald
EsmeraldAnnualTemp<-mean(EsmeraldData[[2]],na.rm=TRUE)
EsmeraldAnnualRain<-sum(EsmeraldData[[3]],na.rm=TRUE)
#Ducke
DuckeData<-read_excel("DUCWeather.xlsx")
#makes columns strictly numerical
DuckeData[[2]]<-as.numeric(DuckeData[[2]])
DuckeData[[3]]<-as.numeric(DuckeData[[3]])
#averages for Ducke
DuckeAnnualTemp<-mean(DuckeData[[2]],na.rm=TRUE)
DuckeAnnualRain<-sum(DuckeData[[3]],na.rm =TRUE)
#The values for the Shannon and Simpson index are calculated directly from the DBHmean data
#presented in the boxplots, averages for temp and rain are taken from the excel and evaluated
CombinedData<-data.frame(Area=c("Esmerald","Ducke"),Shannon = c(2.828, 3.438),Simpson = c(0.922, 0.964),MeanDBH=c(7.84, 9.00),AvgerageAnnTemp=c(EsmeraldAnnualTemp,DuckeAnnualTemp),TotalAnnualRain=c(EsmeraldAnnualRain,DuckeAnnualRain))
#this now (just for visualization purposes) outputs the averages alongside the calculated indices
#for both simpson and shannon index
print(CombinedData)
## Area Shannon Simpson MeanDBH AvgerageAnnTemp TotalAnnualRain
## 1 Esmerald 2.828 0.922 7.84 81.41667 38.5
## 2 Ducke 3.438 0.964 9.00 82.16667 69.3
Part 3 Finalization of the Climate Profiles:
#finalization of the climate profiles for both areas to conclude which is more
#biodiverse and how rainfall, temperature, and mean DBH play into this
#shows all plots in one output
par(mfrow = c(2, 3))
#!!!this is obviously incredibly unintuitive and is just a place holder for now!!!
#the names of the columns as well as colors will be done in a presentable way
#so that even people who aren't well-versed with the data can understand what they are seeing
barplot(CombinedData$Shannon,names.arg=CombinedData$Area,main="Shannon")
barplot(CombinedData$Simpson,names.arg=CombinedData$Area,main="Simpson")
barplot(CombinedData$MeanDBH,names.arg=CombinedData$Area,main="DBH")
barplot(CombinedData$AvgerageAnnTemp,names.arg=CombinedData$Area,main="Temp")
barplot(CombinedData$TotalAnnualRain,names.arg=CombinedData$Area,main="Rainfall")
