Week 3 Homework
Isler<-read.csv("C:/Users/kelse/OneDrive/Documents/Research Design Analysis/Isler et al 2008.csv", header=TRUE)
Question 1
a.
nrow(Isler)
## [1] 3813
ncol(Isler)
## [1] 22
b.
Isler$gen_sp<-paste(Isler$Genus, Isler$Species)
unique_species<-length(unique(Isler$gen_sp))
unique_species
## [1] 257
Question 2
a.
sort(unique(Isler$Wild_captive))
## [1] "Captive" "Captive bred"
## [3] "Died in captivity" "Unknown"
## [5] "Wild" "Wild born/died in captivity"
Isler$Wild_captive[Isler$Wild_captive == "Captive bred"]<-"Captive"
Isler<-Isler[!grepl("Died in captivity|Unknown|Wild born/died in captivity", Isler$Wild_captive), ]
unique(Isler$Wild_captive)
## [1] "Wild" "Captive"
b.
table(Isler$Wild_captive)
##
## Captive Wild
## 252 3363
c.
Isler<-Isler[grepl("^(f|m)$", Isler$Sex), ]
unique(Isler$Sex)
## [1] "m" "f"
d.
table(Isler$Sex)
##
## f m
## 1666 1848
e.
nrow(Isler)
## [1] 3514
ncol(Isler)
## [1] 23
length(unique(Isler$gen_sp))
## [1] 241
Question 3
a.
library(ggplot2)
papio_data<-subset(Isler, Genus == "Papio")
sex_species_table<-table(papio_data$Species, papio_data$Sex)
df<-as.data.frame(sex_species_table)
colnames(df)<-c("Species", "Sex", "Count")
ggplot(df, aes(x=Species, y=Count, fill=Sex))+
geom_bar(stat="identity", position="dodge")+
scale_fill_manual(values=c("f"="lightgreen", "m"="lightblue2"))+
labs(title="Distribution of Male and Female Specimens within Papio Species",
x="Species",
y="Count")+
theme_minimal()

Question 4
a.
library(ggplot2)
papio_data<-subset(Isler, Genus == "Papio")
ggplot(papio_data, aes(x=Species, y=ECV_cc))+
geom_boxplot(fill="lightblue", color="black")+
labs(title="Distribution of Endocranial Volume (ECV) within Papio Species",
x="Species",
y="Endocranial Volume (cc)")+
theme_minimal()

b.
#I used the mean and standard deviation because they show the average ECV for each species and how much the values vary. The mean helps show the typical value, and the standard deviation tells me how spread out the ECV values are around that average.
mean_ECV<-tapply(papio_data$ECV_cc, papio_data$Species, mean, na.rm=TRUE)
sd_ECV<-tapply(papio_data$ECV_cc, papio_data$Species, sd, na.rm=TRUE)
location_spread_measures<-data.frame(Species=names(mean_ECV),
Mean_ECV=mean_ECV,
SD_ECV=sd_ECV)
location_spread_measures
## Species Mean_ECV SD_ECV
## anubis anubis 169.1115 19.768789
## cynocephalus cynocephalus 165.0348 21.815230
## hamadryas hamadryas 153.1000 17.214012
## ursinus ursinus 188.6433 3.954698
Question 5
a.
# I'm going to stop using ggplot here, because I am running out of time to do this assignment, and figuring out how to use it is taking up too much time. I will work on learning it better next week.
hist(Isler$Body.mass_g,
main="Distribution of Body Mass Across All Species",
xlab="Body Mass (g)",
col="lightblue",
border="black")

hist(Isler$ECV_cc,
main="Distribution of Endocranial Volume Across All Species",
xlab="Endocranial Volume (cc)",
col="lightpink",
border="black")

b.
plot(log(Isler$Body.mass_g), log(Isler$ECV_cc),
xlab="Log Body Mass (g)",
ylab="Log Endocranial Volume (cc)",
main="Log-transformed ECV_cc vs Body Mass",
pch=16,
col="lightgreen")

c.
median_body_mass<-median(Isler$Body.mass_g, na.rm=TRUE)
iqr_body_mass<-IQR(Isler$Body.mass_g, na.rm=TRUE)
median_ecv<-median(Isler$ECV_cc, na.rm=TRUE)
iqr_ecv<-IQR(Isler$ECV_cc, na.rm=TRUE)
median_body_mass
## [1] 3629
iqr_body_mass
## [1] 5583.75
median_ecv
## [1] 62.3
iqr_ecv
## [1] 75.1475
d.
plot(Isler$Body.mass_g, Isler$ECV_cc,
xlab="Body Mass (g)",
ylab="Endocranial Volume (cc)",
main="ECV_cc vs Body Mass",
pch=16,
col="lightgreen")
