summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(readxl)
overdosedata <- read_excel("excel overdose data/overdosedata.xlsx")
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
head(overdosedata)
## # A tibble: 6 × 7
##   ...1        ...2            ...3      Source: 2011-2023 De…¹ ...5  ...6  ...7 
##   <chr>       <chr>           <chr>                      <dbl> <chr> <chr> <chr>
## 1 Demographic Drug Type       <NA>                     2019    2020  2021  2022 
## 2 All Deaths  All Drugs       Deaths p…                  10.0  10.7… 13.7… 17.7…
## 3 <NA>        <NA>            Total wi…                 254    278   356   444  
## 4 <NA>        All Opioids     Deaths p…                   4.51 3.97… 6.57… 9.91…
## 5 <NA>        <NA>            Total wi…                 128    123   181   257  
## 6 <NA>        Benzodiazepines Deaths p…                  -1    3.10… -1    -0.6…
## # ℹ abbreviated name:
## #   ¹​`Source: 2011-2023 Death Certificate Data, Texas Center for Health Statistics`
summary(overdosedata)
##      ...1               ...2               ...3          
##  Length:23          Length:23          Length:23         
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
##  Source: 2011-2023 Death Certificate Data, Texas Center for Health Statistics
##  Min.   :  -2.0000                                                           
##  1st Qu.:   0.6027                                                           
##  Median :  10.0000                                                           
##  Mean   : 120.0584                                                           
##  3rd Qu.:  40.5000                                                           
##  Max.   :2019.0000                                                           
##      ...5               ...6               ...7          
##  Length:23          Length:23          Length:23         
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
## 
betteroverdosedata <- overdosedata %>% select(...5,...6,...7) %>% na.omit (.)
str(betteroverdosedata)
## tibble [23 × 3] (S3: tbl_df/tbl/data.frame)
##  $ ...5: chr [1:23] "2020" "10.753749459" "278" "3.9708564882999999" ...
##  $ ...6: chr [1:23] "2021" "13.794426864" "356" "6.5767955967000002" ...
##  $ ...7: chr [1:23] "2022" "17.777471253000002" "444" "9.9124768709999991" ...
head(betteroverdosedata)
## # A tibble: 6 × 3
##   ...5                 ...6               ...7                
##   <chr>                <chr>              <chr>               
## 1 2020                 2021               2022                
## 2 10.753749459         13.794426864       17.777471253000002  
## 3 278                  356                444                 
## 4 3.9708564882999999   6.5767955967000002 9.9124768709999991  
## 5 123                  181                257                 
## 6 3.103889999999998E-3 -1                 -0.66617054340000004
tibble(betteroverdosedata)
## # A tibble: 23 × 3
##    ...5                 ...6                 ...7                
##    <chr>                <chr>                <chr>               
##  1 2020                 2021                 2022                
##  2 10.753749459         13.794426864         17.777471253000002  
##  3 278                  356                  444                 
##  4 3.9708564882999999   6.5767955967000002   9.9124768709999991  
##  5 123                  181                  257                 
##  6 3.103889999999998E-3 -1                   -0.66617054340000004
##  7 20                   18                   27                  
##  8 2.2450420395999999   2.0464028075999998   3.7255236544999999  
##  9 47                   64                   81                  
## 10 -0.32816018330000007 -0.64083567050000001 0.3457000788000002  
## # ℹ 13 more rows
summary(betteroverdosedata)
##      ...5               ...6               ...7          
##  Length:23          Length:23          Length:23         
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character
summary(betteroverdosedata$...5,...6,...7)
##    Length     Class      Mode 
##        23 character character
summary(betteroverdosedata$...5)
##    Length     Class      Mode 
##        23 character character
summary(betteroverdosedata$...6)
##    Length     Class      Mode 
##        23 character character
summary(betteroverdosedata$...7)
##    Length     Class      Mode 
##        23 character character
betteroverdosedata$...5<-as.numeric(betteroverdosedata$...5)
hist(betteroverdosedata$...5)

betteroverdosedata$...6 <-as.numeric(betteroverdosedata$...6)
hist(betteroverdosedata$...6)

betteroverdosedata$...7<-as.numeric(betteroverdosedata$...7)
hist(betteroverdosedata$...7)

plot(betteroverdosedata$...5,betteroverdosedata$...6)

plot(betteroverdosedata$...6,betteroverdosedata$...7)

plot(betteroverdosedata$...5,betteroverdosedata$...7)

cor(betteroverdosedata$...5,betteroverdosedata$...6)
## [1] 0.9977561
cor(betteroverdosedata$...6,betteroverdosedata$...7)
## [1] 0.9970425
cor(betteroverdosedata$...5,betteroverdosedata$...7)
## [1] 0.9898489

homework;Exploratory statistics and their commands:

  1. summary(data$x)

  2. hist(data$x) - for continuous variables

  3. plot(data\(x,data\)y) - to compare variables

(ggplot is fine if you are comfortable with it)

  1. cor(data\(x,data\)y) - to see a correlation between two variables