install.packages("ggplot2")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/4.0'
## (as 'lib' is unspecified)
install.packages("ggthemes")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/4.0'
## (as 'lib' is unspecified)
install.packages("tidyverse")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/4.0'
## (as 'lib' is unspecified)
install.packages("dplyr")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/4.0'
## (as 'lib' is unspecified)
library("ggthemes")
library("tidyverse")
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.0.6 ✓ dplyr 1.0.4
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library("ggplot2")
library("dplyr")
data("msleep")
summary(msleep)
## name genus vore order
## Length:83 Length:83 Length:83 Length:83
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## conservation sleep_total sleep_rem sleep_cycle
## Length:83 Min. : 1.90 Min. :0.100 Min. :0.1167
## Class :character 1st Qu.: 7.85 1st Qu.:0.900 1st Qu.:0.1833
## Mode :character Median :10.10 Median :1.500 Median :0.3333
## Mean :10.43 Mean :1.875 Mean :0.4396
## 3rd Qu.:13.75 3rd Qu.:2.400 3rd Qu.:0.5792
## Max. :19.90 Max. :6.600 Max. :1.5000
## NA's :22 NA's :51
## awake brainwt bodywt
## Min. : 4.10 Min. :0.00014 Min. : 0.005
## 1st Qu.:10.25 1st Qu.:0.00290 1st Qu.: 0.174
## Median :13.90 Median :0.01240 Median : 1.670
## Mean :13.57 Mean :0.28158 Mean : 166.136
## 3rd Qu.:16.15 3rd Qu.:0.12550 3rd Qu.: 41.750
## Max. :22.10 Max. :5.71200 Max. :6654.000
## NA's :27
glimpse(msleep)
## Rows: 83
## Columns: 11
## $ name <chr> "Cheetah", "Owl monkey", "Mountain beaver", "Greater sho…
## $ genus <chr> "Acinonyx", "Aotus", "Aplodontia", "Blarina", "Bos", "Br…
## $ vore <chr> "carni", "omni", "herbi", "omni", "herbi", "herbi", "car…
## $ order <chr> "Carnivora", "Primates", "Rodentia", "Soricomorpha", "Ar…
## $ conservation <chr> "lc", NA, "nt", "lc", "domesticated", NA, "vu", NA, "dom…
## $ sleep_total <dbl> 12.1, 17.0, 14.4, 14.9, 4.0, 14.4, 8.7, 7.0, 10.1, 3.0, …
## $ sleep_rem <dbl> NA, 1.8, 2.4, 2.3, 0.7, 2.2, 1.4, NA, 2.9, NA, 0.6, 0.8,…
## $ sleep_cycle <dbl> NA, NA, NA, 0.1333333, 0.6666667, 0.7666667, 0.3833333, …
## $ awake <dbl> 11.9, 7.0, 9.6, 9.1, 20.0, 9.6, 15.3, 17.0, 13.9, 21.0, …
## $ brainwt <dbl> NA, 0.01550, NA, 0.00029, 0.42300, NA, NA, NA, 0.07000, …
## $ bodywt <dbl> 50.000, 0.480, 1.350, 0.019, 600.000, 3.850, 20.490, 0.0…
Using the summary function, I was able to find the total count of the rows in the dataset which corresponds to the number of mammals. This equals 83 mammals. Using the glimpse function, we can find the number of variables which corresponds to the number of columns. This equals 11 variables.
Does total time slept affect mammal body weight?
The response variable is the body weight of the mammal, and it is a quantitative variable.
The explanatory variable is the total time slept, and it is also a quantitative variable.
Since they are both quantitative variables, I would use a scatter plot to graph the relation between time slept and body weight.
ggplot(msleep, mapping=aes(x=sleep_total, y=bodywt))+
geom_point()+
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(msleep, mapping=aes(x=sleep_total, y=bodywt))+
geom_point()+
geom_smooth()+
scale_x_log10()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(msleep, mapping=aes(x=sleep_total, y=bodywt))+
geom_point()+
geom_smooth()+
scale_y_log10()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(msleep, mapping=aes(x=sleep_total, y=bodywt))+
geom_point()+
geom_smooth()+
scale_x_log10()+
scale_y_log10()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
I would say that the plot where only the response variable (body weight) is log transformed seems to be the most linear.
ggplot(msleep, mapping=aes(x=sleep_total, y=bodywt, color=vore))+
geom_point()+
geom_smooth(method="lm")+
scale_x_log10()+
scale_y_log10()+
theme_bw()+
labs(title="Relationship Between Body Weight and Time Slept",
x= "Total Sleep (hrs)", y= "Body weight")
## `geom_smooth()` using formula 'y ~ x'
ggplot(msleep, mapping=aes(x=sleep_total, y=bodywt, color=vore))+
geom_point()+
geom_smooth(method="lm", se=F)+
scale_x_log10()+
scale_y_log10()+
theme_bw()+
labs(title="Relationship Between Body Weight and Time Slept",
x= "Total Sleep (hrs)", y= "Body weight")
## `geom_smooth()` using formula 'y ~ x'
ggplot(msleep, mapping=aes(x=vore, y= sleep_total, fill =vore))+
geom_boxplot()+
labs(x="Diet", y= "Sleep Total") +
scale_fill_colorblind()+
theme(legend.position="none")
ggplot(msleep, aes(x=bodywt, y= sleep_total))+
geom_point()+
geom_smooth(method="lm", se = F)+
facet_wrap(~vore)+
scale_x_log10()+
labs(title="Sleep Total vs Body Weight by Diet (Logged Scales)",
y= "Sleep Total", x= "Body Weight")+
theme_bw()
## `geom_smooth()` using formula 'y ~ x'