Part 1 Variation Graphs:
#have to use this to have the plots next to each other
par(mfrow=c(1, 2))
#esmerald Data
df<-read_excel("ESMERALD.xlsx")
## New names:
## • `voucher` -> `voucher...8`
## • `voucher` -> `voucher...9`
## • `voucher` -> `voucher...10`
## • `voucher` -> `voucher...11`
## • `voucher` -> `voucher...12`
## • `STEMDBH` -> `STEMDBH...15`
## • `STEMDBH` -> `STEMDBH...16`
## • `STEMDBH` -> `STEMDBH...17`
## • `STEMDBH` -> `STEMDBH...18`
## • `STEMDBH` -> `STEMDBH...19`
## • `STEMDBH` -> `STEMDBH...20`
## • `STEMDBH` -> `STEMDBH...21`
## • `STEMDBH` -> `STEMDBH...22`
## • `STEMDBH` -> `STEMDBH...23`
df$Line<-as.numeric(df$Line)
df<-df[order(df$Line),]
#cleans up missing values
df<-df[!is.na(df[,"N(IND)"]),]
df_line<-aggregate(df[,"N(IND)"],list(Line=df$Line),sum)
names(df_line)[2]<-"N(IND)"
dbh_cols<-grep("STEMDBH",colnames(df))
#set the columns to numeric so the calculations actually work
df[dbh_cols]<-lapply(df[dbh_cols],as.numeric)
#calculations - given from lab 3
df[,"DBHsum"]<-rowSums(df[,dbh_cols],na.rm=TRUE)
df_line[,"DBHsum"]<-aggregate(df$DBHsum,list(df$Line),sum)$x
df_line[,"DBHmean"]<-df_line[,"DBHsum"]/df_line[,"N(IND)"]
#chose green this time for a more forest-like vibe :)
barplot(DBHmean~Line,data=df_line,main="Esmerald Variation",col="lightgreen",ylim=c(0, 14))
#esmerald diversity calculations
#following Lab 3 slides instead of hardcoding like I did in my first progress update
df[,"Prop"]<-df[,"N(IND)"]/df_line[match(df$Line,df_line$Line),"N(IND)"]
#simpson Index
df[,"PropSquared"]<-df$Prop^2
df_line[,"D"]<-aggregate(list(D=df$PropSquared),list(Line=df$Line),sum)$D
df_line[,"Simpson"]<-1-df_line[,"D"]
#shannon Index (adding log function as per instructions)
df[,"LogShannon"]<-ifelse(df$Prop>0,df$Prop*log(df$Prop),0)
df_line[,"Shannon"]<--aggregate(list(H=df$LogShannon),list(Line=df$Line),sum)$H
#indentifies area of column
df_line$StudyArea<-"Esmerald"
#stores the averages for Esmerald to use in the final table
EsmeraldShannon<-mean(df_line$Shannon)
EsmeraldSimpson<-mean(df_line$Simpson)
EsmeraldDBHmean<-mean(df_line$DBHmean)
#saves df_line so it doesn't get overwritten by Ducke
Esmerald_df_line<-df_line
#ducke Data
df<-read_excel("DUCKE.xlsx")
## New names:
## • `voucher` -> `voucher...8`
## • `voucher` -> `voucher...9`
## • `voucher` -> `voucher...10`
## • `voucher` -> `voucher...11`
## • `voucher` -> `voucher...12`
## • `STEMDBH` -> `STEMDBH...15`
## • `STEMDBH` -> `STEMDBH...16`
## • `STEMDBH` -> `STEMDBH...17`
## • `STEMDBH` -> `STEMDBH...18`
## • `STEMDBH` -> `STEMDBH...19`
## • `STEMDBH` -> `STEMDBH...20`
## • `STEMDBH` -> `STEMDBH...21`
## • `STEMDBH` -> `STEMDBH...22`
## • `STEMDBH` -> `STEMDBH...23`
## • `STEMDBH` -> `STEMDBH...24`
df$Line<-as.numeric(df$Line)
df<-df[order(df$Line),]
#cleans up missing values
df<-df[!is.na(df[,"N(IND)"]),]
df_line<-aggregate(df[,"N(IND)"],list(Line=df$Line),sum)
names(df_line)[2]<-"N(IND)"
dbh_cols<-grep("STEMDBH",colnames(df))
#set the columns to numeric
df[dbh_cols]<-lapply(df[dbh_cols],as.numeric)
#calculations
df[,"DBHsum"]<-rowSums(df[,dbh_cols],na.rm=TRUE)
df_line[,"DBHsum"]<-aggregate(df$DBHsum,list(df$Line),sum)$x
df_line[,"DBHmean"]<-df_line[,"DBHsum"]/df_line[,"N(IND)"]
#outputs the barplot
barplot(DBHmean~Line,data=df_line,main="Ducke Variation",col="darkgreen",ylim=c(0,14))

#Ducke diversity calculations
df[,"Prop"]<-df[,"N(IND)"]/df_line[match(df$Line,df_line$Line),"N(IND)"]
#Simpson Index
df[,"PropSquared"]<-df$Prop^2
df_line[,"D"]<-aggregate(list(D=df$PropSquared),list(Line=df$Line),sum)$D
df_line[,"Simpson"]<-1-df_line[,"D"]
#Shannon Index
df[,"LogShannon"]<-ifelse(df$Prop>0,df$Prop*log(df$Prop),0)
df_line[,"Shannon"]<--aggregate(list(H=df$LogShannon),list(Line=df$Line),sum)$H
#identifies the column area
df_line$StudyArea<-"Ducke"
#saves the data for Ducke for graphing purposes
DuckeShannon<-mean(df_line$Shannon)
DuckeSimpson<-mean(df_line$Simpson)
DuckeDBHmean<-mean(df_line$DBHmean)
Ducke_df_line<-df_line
#transect and dbh comparison
#combines the data for the transect table using base R rbind
CombinedTransectTable<-rbind(Esmerald_df_line[,c("StudyArea","Line","Shannon","Simpson")],Ducke_df_line[,c("StudyArea","Line","Shannon","Simpson")])
print(CombinedTransectTable)
## StudyArea Line Shannon Simpson
## 1 Esmerald 1 3.092075 0.9464575
## 2 Esmerald 2 2.954003 0.9355102
## 3 Esmerald 3 3.407904 0.9600000
## 4 Esmerald 4 3.004227 0.9437500
## 5 Esmerald 5 2.375988 0.8731098
## 6 Esmerald 6 3.080476 0.9464923
## 7 Esmerald 7 2.846720 0.9323621
## 8 Esmerald 8 2.593866 0.8975069
## 9 Esmerald 9 2.531012 0.9005102
## 10 Esmerald 10 2.397267 0.8798186
## 11 Ducke 1 3.401197 0.9666667
## 12 Ducke 2 3.429486 0.9660494
## 13 Ducke 3 3.728961 0.9756625
## 14 Ducke 4 3.691537 0.9732911
## 15 Ducke 5 3.704663 0.9750567
## 16 Ducke 6 3.344549 0.9635796
## 17 Ducke 7 3.122425 0.9519890
## 18 Ducke 8 3.431665 0.9479384
## 19 Ducke 9 3.271689 0.9607610
## 20 Ducke 10 3.258097 0.9615385
#reset the plot window for a single output
par(mfrow=c(1,1))
#as per (Geoff's :D) recommendation, adding a box plot for the DBH comparisons
boxplot(Esmerald_df_line$DBHmean,Ducke_df_line$DBHmean,names=c("Esmerald","Ducke"),col=c("lightgreen","darkgreen"),main="Distribution of Mean DBH by Transect",ylab ="Mean DBH in cm")

Part 2 Ingesting Weather Data:
#Esmerald
EsmeraldData<-read_excel("ESMWeather.xlsx")
#the first iteration of the code did not include the numbered brackets for the columns. I used the
#header names but for some reason this only worked once, and after re-running the code and also
#running it through "source" it didn't work for some reason so I just directly reference call the
#column number in place of the actual names for the temperature and rain data for example.
#makes columns strictly numerical
EsmeraldData[[2]]<-as.numeric(EsmeraldData[[2]])
EsmeraldData[[3]]<-as.numeric(EsmeraldData[[3]])
#averages for Esmerald
EsmeraldAnnualTemp<-mean(EsmeraldData[[2]],na.rm=TRUE)
EsmeraldAnnualRain<-sum(EsmeraldData[[3]],na.rm=TRUE)
#Ducke
DuckeData<-read_excel("DUCWeather.xlsx")
#makes columns strictly numerical
DuckeData[[2]]<-as.numeric(DuckeData[[2]])
DuckeData[[3]]<-as.numeric(DuckeData[[3]])
#averages for Ducke
DuckeAnnualTemp<-mean(DuckeData[[2]],na.rm=TRUE)
DuckeAnnualRain<-sum(DuckeData[[3]],na.rm =TRUE)
#values for Shannon and Simpson index are calculated dynamically from the first part
CombinedData<-data.frame(Area=c("Esmerald","Ducke"),
Shannon=c(EsmeraldShannon,DuckeShannon),
Simpson=c(EsmeraldSimpson,DuckeSimpson),
MeanDBH=c(EsmeraldDBHmean,DuckeDBHmean),
AvgerageAnnTemp=c(EsmeraldAnnualTemp,DuckeAnnualTemp),
TotalAnnualRain=c(EsmeraldAnnualRain,DuckeAnnualRain))
#prints out the combined distribution plot for esmerald and ducke
print(CombinedData)
## Area Shannon Simpson MeanDBH AvgerageAnnTemp TotalAnnualRain
## 1 Esmerald 2.828354 0.9215518 7.841486 81.41667 38.5
## 2 Ducke 3.438427 0.9642533 9.001136 82.16667 69.3
Part 3 Finalization of the Graphs:
#finalization of the climate profiles for both areas to conclude which is more
#biodiverse and how rainfall, temperature, and mean DBH play into this
#for this portion I wanted to use more complex libraries to make fun plots
#here I use tidyr, and ggplot2
#renames the columns in the dataframe
colnames(CombinedData)<-c("StudyArea","Shannon Diversity Index","Simpson Diversity Index","Mean DBH in cm","Average Annual Temp in F","Total Annual Rain in Inches")
#reshapes the data so we can facet everything into one snapshot
plot_data<-pivot_longer(CombinedData,cols=-StudyArea,names_to="Metric",values_to="Value")
#makes a basic lollipop plot, geom_segment defines the line that carries the heavier top
#(named it Lolliplot because it sounds fun)
#portion of the line, and geom_point makes the tip of the line show up on the graphs
#to make sure that each plot is seperate I looked into facet_wrap which does this, and defined it
#by metric and scaled it by y axis so that the y-axis scales are all independent and thus can be
#graphed separately
#made titles and colors for the lollipops so that it is visualized intuitively and in a way that
#can be read and interpreted easily.
#liked the theme as opposed to other ones from light, dark, etc.
Lolliplot<-ggplot(plot_data, aes(x = StudyArea, y = Value, color = StudyArea))+geom_segment(aes(x =StudyArea,xend=StudyArea,y=0,yend=Value),linewidth=2)+geom_point(size=6)+
#this facets the y axis independently so the values define how high the y axis can go for each graph
facet_wrap(~Metric, scales = "free_y")+
#wanted to use lollipop-adjacent colors for visualization
scale_color_manual(values=c("Esmerald"="pink","Ducke"="lightblue"))+
#gave a title and subtitle to make it more informational
labs(title="Biodiversity & Climate Profiles",subtitle="Esmerald, Ecuador as Compared to Ducke, Brazil",x ="",y ="")+theme_minimal()
#I left x and y blank since it gives unnecessary identifiers for the entire page, it looks cleaner without
#prints the lollipop plots
print(Lolliplot)

#I edited the output in the report since it looks nicer that way but the output data itself is the same