R Final Project Assignment

Part 1 Variation Graphs:

#have to use this to have the plots next to each other
par(mfrow=c(1, 2))

#Esmerald
df<-read_excel("ESMERALD.xlsx")

## New names:
## • `voucher` -> `voucher...8`
## • `voucher` -> `voucher...9`
## • `voucher` -> `voucher...10`
## • `voucher` -> `voucher...11`
## • `voucher` -> `voucher...12`
## • `STEMDBH` -> `STEMDBH...15`
## • `STEMDBH` -> `STEMDBH...16`
## • `STEMDBH` -> `STEMDBH...17`
## • `STEMDBH` -> `STEMDBH...18`
## • `STEMDBH` -> `STEMDBH...19`
## • `STEMDBH` -> `STEMDBH...20`
## • `STEMDBH` -> `STEMDBH...21`
## • `STEMDBH` -> `STEMDBH...22`
## • `STEMDBH` -> `STEMDBH...23`

df$Line<-as.numeric(df$Line)
df<-df[order(df$Line),]

#cleans up missing values
df <- df[!is.na(df[,"N(IND)"]),]

df_line <- aggregate(df[,"N(IND)"],list(Line=df$Line),sum)
names(df_line)[2] <- "N(IND)"
dbh_cols <- grep("STEMDBH",colnames(df))

#set the columns to numeric so the calculations actually work (this caused many errors)
df[dbh_cols] <- lapply(df[dbh_cols],as.numeric)

#calculations - given from lab 3 as well
df[, "DBHsum"] <- rowSums(df[,dbh_cols], na.rm=TRUE)
df_line[,"DBHsum"] <- aggregate(df$DBHsum,list(df$Line),sum)$x
df_line[,"DBHmean"] <- df_line[,"DBHsum"]/df_line[,"N(IND)"]
#chose green this time for a more forest-like vibe :)
barplot(DBHmean~Line,data=df_line,main="Esmerald Variation",col="lightgreen",ylim=c(0, 14))

#Ducke
df <- read_excel("DUCKE.xlsx")

## New names:
## • `voucher` -> `voucher...8`
## • `voucher` -> `voucher...9`
## • `voucher` -> `voucher...10`
## • `voucher` -> `voucher...11`
## • `voucher` -> `voucher...12`
## • `STEMDBH` -> `STEMDBH...15`
## • `STEMDBH` -> `STEMDBH...16`
## • `STEMDBH` -> `STEMDBH...17`
## • `STEMDBH` -> `STEMDBH...18`
## • `STEMDBH` -> `STEMDBH...19`
## • `STEMDBH` -> `STEMDBH...20`
## • `STEMDBH` -> `STEMDBH...21`
## • `STEMDBH` -> `STEMDBH...22`
## • `STEMDBH` -> `STEMDBH...23`
## • `STEMDBH` -> `STEMDBH...24`

df$Line <- as.numeric(df$Line)
df <- df[order(df$Line),]

#cleans up missing values
df <- df[!is.na(df[,"N(IND)"]),]
df_line <- aggregate(df[,"N(IND)"],list(Line=df$Line), sum)
names(df_line)[2] <- "N(IND)"
dbh_cols <- grep("STEMDBH",colnames(df))

#set the columns to numeric
df[dbh_cols]<-lapply(df[dbh_cols],as.numeric)

#calculations - given from lab 3
df[,"DBHsum"]<-rowSums(df[,dbh_cols],na.rm=TRUE)
df_line[,"DBHsum"]<-aggregate(df$DBHsum,list(df$Line),sum)$x
df_line[,"DBHmean"]<-df_line[,"DBHsum"]/df_line[,"N(IND)"]

#outputs the barplot
barplot(DBHmean~Line,data=df_line,main="Ducke Variation",col="forestgreen",ylim=c(0,14))

Part 2 Ingesting Weather Data:

#Esmerald
EsmeraldData<-read_excel("ESMWeather.xlsx")

#the first iteration of the code did not include the numbered brackets for the columns. I used the
#header names but for some reason this only worked once, and after re-running the code and also 
#running it through "source" it didn't work for some reason so I just directly reference call the
#column number in place of the actual names for the temperature and rain data for example.

#makes columns strictly numerical
EsmeraldData[[2]]<-as.numeric(EsmeraldData[[2]])
EsmeraldData[[3]]<-as.numeric(EsmeraldData[[3]])

#averages for Esmerald
EsmeraldAnnualTemp<-mean(EsmeraldData[[2]],na.rm=TRUE)
EsmeraldAnnualRain<-sum(EsmeraldData[[3]],na.rm=TRUE)


#Ducke
DuckeData<-read_excel("DUCWeather.xlsx")

#makes columns strictly numerical
DuckeData[[2]]<-as.numeric(DuckeData[[2]])
DuckeData[[3]]<-as.numeric(DuckeData[[3]])

#averages for Ducke
DuckeAnnualTemp<-mean(DuckeData[[2]],na.rm=TRUE)
DuckeAnnualRain<-sum(DuckeData[[3]],na.rm =TRUE)

#The values for the Shannon and Simpson index are calculated directly from the DBHmean data
#presented in the boxplots, averages for temp and rain are taken from the excel and evaluated
CombinedData<-data.frame(Area=c("Esmerald","Ducke"),Shannon = c(2.828, 3.438),Simpson = c(0.922, 0.964),MeanDBH=c(7.84, 9.00),AvgerageAnnTemp=c(EsmeraldAnnualTemp,DuckeAnnualTemp),TotalAnnualRain=c(EsmeraldAnnualRain,DuckeAnnualRain))

#this now (just for visualization purposes) outputs the averages alongside the calculated indices 
#for both simpson and shannon index
print(CombinedData)

##       Area Shannon Simpson MeanDBH AvgerageAnnTemp TotalAnnualRain
## 1 Esmerald   2.828   0.922    7.84        81.41667            38.5
## 2    Ducke   3.438   0.964    9.00        82.16667            69.3

Part 3 Finalization of the Climate Profiles:

#finalization of the climate profiles for both areas to conclude which is more
#biodiverse and how rainfall, temperature, and mean DBH play into this

#shows all plots in one output
par(mfrow = c(2, 3))

#!!!this is obviously incredibly unintuitive and is just a place holder for now!!!
#the names of the columns as well as colors will be done in a presentable way 
#so that even people who aren't well-versed with the data can understand what they are seeing
barplot(CombinedData$Shannon,names.arg=CombinedData$Area,main="Shannon")
barplot(CombinedData$Simpson,names.arg=CombinedData$Area,main="Simpson")
barplot(CombinedData$MeanDBH,names.arg=CombinedData$Area,main="DBH")
barplot(CombinedData$AvgerageAnnTemp,names.arg=CombinedData$Area,main="Temp")
barplot(CombinedData$TotalAnnualRain,names.arg=CombinedData$Area,main="Rainfall")

R Final Project Assignment - Update 1

Maximilian Luetz

4/27/2026

Part 1 Variation Graphs:

Part 2 Ingesting Weather Data:

Part 3 Finalization of the Climate Profiles: