install.packages("ggplot2")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/4.0'
## (as 'lib' is unspecified)
install.packages("ggthemes")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/4.0'
## (as 'lib' is unspecified)
install.packages("tidyverse")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/4.0'
## (as 'lib' is unspecified)
install.packages("dplyr")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/4.0'
## (as 'lib' is unspecified)
library("ggthemes")
library("tidyverse")
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tibble  3.0.6     ✓ dplyr   1.0.4
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library("ggplot2")
library("dplyr")
data("msleep")
summary(msleep)
##      name              genus               vore              order          
##  Length:83          Length:83          Length:83          Length:83         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  conservation        sleep_total      sleep_rem      sleep_cycle    
##  Length:83          Min.   : 1.90   Min.   :0.100   Min.   :0.1167  
##  Class :character   1st Qu.: 7.85   1st Qu.:0.900   1st Qu.:0.1833  
##  Mode  :character   Median :10.10   Median :1.500   Median :0.3333  
##                     Mean   :10.43   Mean   :1.875   Mean   :0.4396  
##                     3rd Qu.:13.75   3rd Qu.:2.400   3rd Qu.:0.5792  
##                     Max.   :19.90   Max.   :6.600   Max.   :1.5000  
##                                     NA's   :22      NA's   :51      
##      awake          brainwt            bodywt        
##  Min.   : 4.10   Min.   :0.00014   Min.   :   0.005  
##  1st Qu.:10.25   1st Qu.:0.00290   1st Qu.:   0.174  
##  Median :13.90   Median :0.01240   Median :   1.670  
##  Mean   :13.57   Mean   :0.28158   Mean   : 166.136  
##  3rd Qu.:16.15   3rd Qu.:0.12550   3rd Qu.:  41.750  
##  Max.   :22.10   Max.   :5.71200   Max.   :6654.000  
##                  NA's   :27
glimpse(msleep)
## Rows: 83
## Columns: 11
## $ name         <chr> "Cheetah", "Owl monkey", "Mountain beaver", "Greater sho…
## $ genus        <chr> "Acinonyx", "Aotus", "Aplodontia", "Blarina", "Bos", "Br…
## $ vore         <chr> "carni", "omni", "herbi", "omni", "herbi", "herbi", "car…
## $ order        <chr> "Carnivora", "Primates", "Rodentia", "Soricomorpha", "Ar…
## $ conservation <chr> "lc", NA, "nt", "lc", "domesticated", NA, "vu", NA, "dom…
## $ sleep_total  <dbl> 12.1, 17.0, 14.4, 14.9, 4.0, 14.4, 8.7, 7.0, 10.1, 3.0, …
## $ sleep_rem    <dbl> NA, 1.8, 2.4, 2.3, 0.7, 2.2, 1.4, NA, 2.9, NA, 0.6, 0.8,…
## $ sleep_cycle  <dbl> NA, NA, NA, 0.1333333, 0.6666667, 0.7666667, 0.3833333, …
## $ awake        <dbl> 11.9, 7.0, 9.6, 9.1, 20.0, 9.6, 15.3, 17.0, 13.9, 21.0, …
## $ brainwt      <dbl> NA, 0.01550, NA, 0.00029, 0.42300, NA, NA, NA, 0.07000, …
## $ bodywt       <dbl> 50.000, 0.480, 1.350, 0.019, 600.000, 3.850, 20.490, 0.0…

2 How many mammals and variables are in the dataset?

Using the summary function, I was able to find the total count of the rows in the dataset which corresponds to the number of mammals. This equals 83 mammals. Using the glimpse function, we can find the number of variables which corresponds to the number of columns. This equals 11 variables.

3

ggplot(msleep, mapping=aes(x=sleep_total, y=bodywt))+
  geom_point()+
  geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(msleep, mapping=aes(x=sleep_total, y=bodywt))+
  geom_point()+
  geom_smooth()+
  scale_x_log10() 
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(msleep, mapping=aes(x=sleep_total, y=bodywt))+
  geom_point()+
  geom_smooth()+
  scale_y_log10()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(msleep, mapping=aes(x=sleep_total, y=bodywt))+
  geom_point()+
  geom_smooth()+
  scale_x_log10()+
  scale_y_log10()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

4.4 Which plot seems the best (most linear)?

I would say that the plot where only the response variable (body weight) is log transformed seems to be the most linear.

ggplot(msleep, mapping=aes(x=sleep_total, y=bodywt, color=vore))+
  geom_point()+
  geom_smooth(method="lm")+
  scale_x_log10()+
  scale_y_log10()+
  theme_bw()+
  labs(title="Relationship Between Body Weight and Time Slept",  
  x= "Total Sleep (hrs)", y= "Body weight") 
## `geom_smooth()` using formula 'y ~ x'

ggplot(msleep, mapping=aes(x=sleep_total, y=bodywt, color=vore))+
  geom_point()+
  geom_smooth(method="lm", se=F)+
  scale_x_log10()+
  scale_y_log10()+
  theme_bw()+
  labs(title="Relationship Between Body Weight and Time Slept",  
  x= "Total Sleep (hrs)", y= "Body weight")
## `geom_smooth()` using formula 'y ~ x'

ggplot(msleep, mapping=aes(x=vore, y= sleep_total, fill =vore))+
  geom_boxplot()+
  labs(x="Diet", y= "Sleep Total") +
  scale_fill_colorblind()+
  theme(legend.position="none")

ggplot(msleep, aes(x=bodywt, y= sleep_total))+
  geom_point()+
  geom_smooth(method="lm", se = F)+
  facet_wrap(~vore)+
  scale_x_log10()+
  labs(title="Sleep Total vs Body Weight by Diet (Logged Scales)", 
       y= "Sleep Total", x= "Body Weight")+
  theme_bw()
## `geom_smooth()` using formula 'y ~ x'