library(tidyverse)
## -- Attaching packages ----------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1     v purrr   0.3.2
## v tibble  2.1.3     v dplyr   0.8.3
## v tidyr   1.0.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts -------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(skimr)
## 
## Attaching package: 'skimr'
## The following object is masked from 'package:stats':
## 
##     filter
library(knitr)
## 
## Attaching package: 'knitr'
## The following object is masked from 'package:skimr':
## 
##     kable
library(ggfortify)
library(visdat)
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(broom)
library(corrplot)
## corrplot 0.84 loaded
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(esquisse)
USArrests <- read_csv("USArrests.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   Murder = col_double(),
##   Assault = col_double(),
##   UrbanPop = col_double(),
##   Rape = col_double()
## )
df <- USArrests %>% 
  rename(state = X1) %>% 
  rename (other = Rape) %>% 
  clean_names()

Let’s look at crime rate by state

df %>% 
  select(-urban_pop) %>% 
  gather(crime, rate, -state) %>% 
  ggplot() +
  geom_col(aes(state, rate, fill = crime)) + 
  facet_wrap( ~ crime) +
  coord_flip() +
  theme_light() + 
  theme(legend.position = "none")

Just murder

example <- df %>% 
  mutate(state = fct_reorder(state, murder) %>% 
    fct_rev()) %>%
    ggplot() +
    geom_col(aes(state, murder)) +
   theme(axis.text.x = element_text(angle = 90)) +
  labs(x= "State", y = "Rate")
  

ggplotly(example)

Linear plot. Does urban population impact crime rates. Faceted by crime

df %>% 
  gather(crime, rate, -state, -urban_pop) %>%
  ggplot(aes(urban_pop, rate, colour = crime)) +
  geom_point() + 
  geom_smooth(aes(urban_pop, rate), method = "lm", se = F, colour =  "black") +
  facet_wrap( ~ crime, ncol = 1, scales = "free_y") +
  theme_light()

How about using plotly?

example2 <- df %>% 
  gather(crime, rate, -state, -urban_pop) %>%
  ggplot() +
  ###note the use of tooltip addition with text =
  geom_point(aes(urban_pop, rate, colour = crime, text = paste("State:", state))) + 
    facet_wrap( ~ crime, ncol = 1, scales = "free_y") +
  theme_light()
## Warning: Ignoring unknown aesthetics: text
###plot and only display the state (our "text" as defined above)
ggplotly(example2, tooltip = "text")

To add another geom, eg a trendline I need to add it serperately like so:

ggplotly(example2 + geom_smooth(aes(urban_pop, rate), method = "lm", se = F, colour =  "black"))

geom_smooth(aes(urban_pop, rate), method = “lm”, se = F, colour = “black”) +

Does urban population assault rates? For assaults the relationship is not significant

test <- df %>%
  ### create long form data by stacking all crimes in one column
  gather(crime, rate, -state, -urban_pop) %>% 
  ### look at assaults only
  filter(crime == "assault")

###simple linear model. Using "test" dataset, is the crime rate impacted by the urban population
fit <- lm(rate ~ urban_pop, test)

### use broom to give a tidy output
tidy(fit)
## # A tibble: 2 x 5
##   term        estimate std.error statistic p.value
##   <chr>          <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)    73.1     53.9        1.36  0.181 
## 2 urban_pop       1.49     0.803      1.86  0.0695

How would we test all crimes in one model? By using “nest”

blah <- df %>%
  ###long form as before
 gather(crime, rate, -state, -urban_pop) %>%
  ## group by the crime
  group_by(crime) %>% 
  ### nest the rest of the data by the groups
  nest() %>% 
  ### make new column called fit. use the nested data (default of new nested column is "data", is the crime rate impacted by the urban population?, data = . -  need to understand why)
  mutate(fit = map(data, ~lm(rate ~ urban_pop, data = .))) %>% 
  ### make a new column called results, take the list from the "fit" column and convert to nested cell. The list/new cell contains "glance"d summary results
  mutate(results = map(fit,glance)) %>% 
  ### convert nested cells to new columns
  unnest(results)

Using broom

blah2 <- df %>% 
 gather(crime, rate, -state, -urban_pop) %>% 
  group_by(crime) %>% 
  nest() %>% 
  mutate(fit = map(data, ~lm(rate ~ urban_pop, data = .))) %>% 
  mutate(results = map(fit,tidy)) %>% 
  unnest(results) 

or this way

blah3 <- df %>% 
 gather(crime, rate, -state, -urban_pop) %>% 
  group_by(crime) %>% 
  nest() %>% 
  mutate(fit = map(data, ~lm(rate ~ urban_pop, data = .) %>% tidy)) %>% 
    unnest(fit) 

Example of corrplot

###remove character column
data <- df %>% 
  select(-state)

corrplot(cor(data), order = "hclust")

Using plotly directly, do a scatter plot and add regression line

####make new df
df2 <-df %>% 
  ####calculate the lm of murder based on assaults and extract the fitted values to a new column called slope
    mutate(slope = lm(murder ~assault) %>% fitted.values())

###plot df2
  plot_ly(df2, x = ~assault, y = ~murder)  %>% 
    ###as a scatter plot
  add_markers() %>% 
    ###and add a regression line using assault and the calculated slope
  add_lines(x = ~assault, y = ~slope) %>% 
    layout(showlegend = FALSE)