summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(readxl)
overdosedata <- read_excel("excel overdose data/overdosedata.xlsx")
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
head(overdosedata)
## # A tibble: 6 × 7
## ...1 ...2 ...3 Source: 2011-2023 De…¹ ...5 ...6 ...7
## <chr> <chr> <chr> <dbl> <chr> <chr> <chr>
## 1 Demographic Drug Type <NA> 2019 2020 2021 2022
## 2 All Deaths All Drugs Deaths p… 10.0 10.7… 13.7… 17.7…
## 3 <NA> <NA> Total wi… 254 278 356 444
## 4 <NA> All Opioids Deaths p… 4.51 3.97… 6.57… 9.91…
## 5 <NA> <NA> Total wi… 128 123 181 257
## 6 <NA> Benzodiazepines Deaths p… -1 3.10… -1 -0.6…
## # ℹ abbreviated name:
## # ¹​`Source: 2011-2023 Death Certificate Data, Texas Center for Health Statistics`
summary(overdosedata)
## ...1 ...2 ...3
## Length:23 Length:23 Length:23
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## Source: 2011-2023 Death Certificate Data, Texas Center for Health Statistics
## Min. : -2.0000
## 1st Qu.: 0.6027
## Median : 10.0000
## Mean : 120.0584
## 3rd Qu.: 40.5000
## Max. :2019.0000
## ...5 ...6 ...7
## Length:23 Length:23 Length:23
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
betteroverdosedata <- overdosedata %>% select(...5,...6,...7) %>% na.omit (.)
str(betteroverdosedata)
## tibble [23 × 3] (S3: tbl_df/tbl/data.frame)
## $ ...5: chr [1:23] "2020" "10.753749459" "278" "3.9708564882999999" ...
## $ ...6: chr [1:23] "2021" "13.794426864" "356" "6.5767955967000002" ...
## $ ...7: chr [1:23] "2022" "17.777471253000002" "444" "9.9124768709999991" ...
head(betteroverdosedata)
## # A tibble: 6 × 3
## ...5 ...6 ...7
## <chr> <chr> <chr>
## 1 2020 2021 2022
## 2 10.753749459 13.794426864 17.777471253000002
## 3 278 356 444
## 4 3.9708564882999999 6.5767955967000002 9.9124768709999991
## 5 123 181 257
## 6 3.103889999999998E-3 -1 -0.66617054340000004
tibble(betteroverdosedata)
## # A tibble: 23 × 3
## ...5 ...6 ...7
## <chr> <chr> <chr>
## 1 2020 2021 2022
## 2 10.753749459 13.794426864 17.777471253000002
## 3 278 356 444
## 4 3.9708564882999999 6.5767955967000002 9.9124768709999991
## 5 123 181 257
## 6 3.103889999999998E-3 -1 -0.66617054340000004
## 7 20 18 27
## 8 2.2450420395999999 2.0464028075999998 3.7255236544999999
## 9 47 64 81
## 10 -0.32816018330000007 -0.64083567050000001 0.3457000788000002
## # ℹ 13 more rows
summary(betteroverdosedata)
## ...5 ...6 ...7
## Length:23 Length:23 Length:23
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
summary(betteroverdosedata$...5,...6,...7)
## Length Class Mode
## 23 character character
summary(betteroverdosedata$...5)
## Length Class Mode
## 23 character character
summary(betteroverdosedata$...6)
## Length Class Mode
## 23 character character
summary(betteroverdosedata$...7)
## Length Class Mode
## 23 character character
betteroverdosedata$...5<-as.numeric(betteroverdosedata$...5)
hist(betteroverdosedata$...5)
betteroverdosedata$...6 <-as.numeric(betteroverdosedata$...6)
hist(betteroverdosedata$...6)
betteroverdosedata$...7<-as.numeric(betteroverdosedata$...7)
hist(betteroverdosedata$...7)
plot(betteroverdosedata$...5,betteroverdosedata$...6)
plot(betteroverdosedata$...6,betteroverdosedata$...7)
plot(betteroverdosedata$...5,betteroverdosedata$...7)
cor(betteroverdosedata$...5,betteroverdosedata$...6)
## [1] 0.9977561
cor(betteroverdosedata$...6,betteroverdosedata$...7)
## [1] 0.9970425
cor(betteroverdosedata$...5,betteroverdosedata$...7)
## [1] 0.9898489
homework;Exploratory statistics and their commands:
summary(data$x)
hist(data$x) - for continuous variables
plot(data\(x,data\)y) - to compare variables
(ggplot is fine if you are comfortable with it)