In this file I will conduct a basic analysis of the Old Faithful dataset in R Studio
- First I will load my data and summarize.
data("faithful")
str(faithful)
## 'data.frame': 272 obs. of 2 variables:
## $ eruptions: num 3.6 1.8 3.33 2.28 4.53 ...
## $ waiting : num 79 54 74 62 85 55 88 85 51 85 ...
summary(faithful)
## eruptions waiting
## Min. :1.600 Min. :43.0
## 1st Qu.:2.163 1st Qu.:58.0
## Median :4.000 Median :76.0
## Mean :3.488 Mean :70.9
## 3rd Qu.:4.454 3rd Qu.:82.0
## Max. :5.100 Max. :96.0
head(faithful)
## eruptions waiting
## 1 3.600 79
## 2 1.800 54
## 3 3.333 74
## 4 2.283 62
## 5 4.533 85
## 6 2.883 55
tail(faithful)
## eruptions waiting
## 267 4.750 75
## 268 4.117 81
## 269 2.150 46
## 270 4.417 90
## 271 1.817 46
## 272 4.467 74
- Now I will load libaries necessary.
library(ggplot2)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.1.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
- And will display visually in a few simple examples of plots.
ggplot(faithful, aes(x=waiting, y=eruptions, color=eruptions)) +
geom_point() +
ggtitle("Old Faithful Data")

ggplot(faithful, aes(x=eruptions)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(faithful, aes(x=waiting)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(faithful, aes(x = eruptions, y = waiting)) +
geom_density_2d() +
geom_point()
