Week 3 Homework

Isler<-read.csv("C:/Users/kelse/OneDrive/Documents/Research Design Analysis/Isler et al 2008.csv", header=TRUE)

Question 1

a.

nrow(Isler)
## [1] 3813
ncol(Isler)
## [1] 22

b.

Isler$gen_sp<-paste(Isler$Genus, Isler$Species)
unique_species<-length(unique(Isler$gen_sp))
unique_species
## [1] 257

Question 2

a.

sort(unique(Isler$Wild_captive))
## [1] "Captive"                     "Captive bred"               
## [3] "Died in captivity"           "Unknown"                    
## [5] "Wild"                        "Wild born/died in captivity"
Isler$Wild_captive[Isler$Wild_captive == "Captive bred"]<-"Captive"
Isler<-Isler[!grepl("Died in captivity|Unknown|Wild born/died in captivity", Isler$Wild_captive), ]
unique(Isler$Wild_captive)
## [1] "Wild"    "Captive"

b.

table(Isler$Wild_captive)
## 
## Captive    Wild 
##     252    3363

c. 

Isler<-Isler[grepl("^(f|m)$", Isler$Sex), ]
unique(Isler$Sex)
## [1] "m" "f"

d. 

table(Isler$Sex)
## 
##    f    m 
## 1666 1848

e.

nrow(Isler)
## [1] 3514
ncol(Isler)
## [1] 23
length(unique(Isler$gen_sp))
## [1] 241

Question 3

a.

library(ggplot2)
papio_data<-subset(Isler, Genus == "Papio")
sex_species_table<-table(papio_data$Species, papio_data$Sex)
df<-as.data.frame(sex_species_table)
colnames(df)<-c("Species", "Sex", "Count")
ggplot(df, aes(x=Species, y=Count, fill=Sex))+
  geom_bar(stat="identity", position="dodge")+
  scale_fill_manual(values=c("f"="lightgreen", "m"="lightblue2"))+
  labs(title="Distribution of Male and Female Specimens within Papio Species",
       x="Species",
       y="Count")+
  theme_minimal()

Question 4

a.

library(ggplot2)
papio_data<-subset(Isler, Genus == "Papio")
ggplot(papio_data, aes(x=Species, y=ECV_cc))+
  geom_boxplot(fill="lightblue", color="black")+
  labs(title="Distribution of Endocranial Volume (ECV) within Papio Species",
       x="Species",
       y="Endocranial Volume (cc)")+
  theme_minimal()

b.

#I used the mean and standard deviation because they show the average ECV for each species and how much the values vary. The mean helps show the typical value, and the standard deviation tells me how spread out the ECV values are around that average. 

mean_ECV<-tapply(papio_data$ECV_cc, papio_data$Species, mean, na.rm=TRUE)
sd_ECV<-tapply(papio_data$ECV_cc, papio_data$Species, sd, na.rm=TRUE)
location_spread_measures<-data.frame(Species=names(mean_ECV),
                                     Mean_ECV=mean_ECV,
                                     SD_ECV=sd_ECV)
location_spread_measures
##                   Species Mean_ECV    SD_ECV
## anubis             anubis 169.1115 19.768789
## cynocephalus cynocephalus 165.0348 21.815230
## hamadryas       hamadryas 153.1000 17.214012
## ursinus           ursinus 188.6433  3.954698

Question 5

a.

# I'm going to stop using ggplot here, because I am running out of time to do this assignment, and figuring out how to use it is taking up too much time. I will work on learning it better next week. 

hist(Isler$Body.mass_g,
     main="Distribution of Body Mass Across All Species",
     xlab="Body Mass (g)",
     col="lightblue",
     border="black")

hist(Isler$ECV_cc,
     main="Distribution of Endocranial Volume Across All Species",
     xlab="Endocranial Volume (cc)",
     col="lightpink",
     border="black")

b.

plot(log(Isler$Body.mass_g), log(Isler$ECV_cc),
     xlab="Log Body Mass (g)",
     ylab="Log Endocranial Volume (cc)",
     main="Log-transformed ECV_cc vs Body Mass",
     pch=16,
     col="lightgreen")

c. 

median_body_mass<-median(Isler$Body.mass_g, na.rm=TRUE)
iqr_body_mass<-IQR(Isler$Body.mass_g, na.rm=TRUE)

median_ecv<-median(Isler$ECV_cc, na.rm=TRUE)
iqr_ecv<-IQR(Isler$ECV_cc, na.rm=TRUE)

median_body_mass
## [1] 3629
iqr_body_mass
## [1] 5583.75
median_ecv
## [1] 62.3
iqr_ecv
## [1] 75.1475

d. 

plot(Isler$Body.mass_g, Isler$ECV_cc,
     xlab="Body Mass (g)",
     ylab="Endocranial Volume (cc)",
     main="ECV_cc vs Body Mass",
     pch=16,
     col="lightgreen")