set.seed(1324)
urldata <- "https://raw.githubusercontent.com/kglan/MSDS/main/DATA607/Data%20Transformation/GunViolence/guns.csv"
gundata.r <- read_csv(url(urldata))
## New names:
## Rows: 1173 Columns: 14
## -- Column specification
## -------------------------------------------------------- Delimiter: "," chr
## (2): state, law dbl (12): ...1, year, violent, murder, robbery, prisoners,
## afam, cauc, male,...
## i Use `spec()` to retrieve the full column specification for this data. i
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## * `` -> `...1`
gundata_clean <- gundata.r %>%
select(-"...1", -"male")%>%
rename("%Black" = "afam",
"%White"= "cauc",
"Crime/100k" = "violent",
"Murder/100k" = "murder",
"Robbery/100k" = "robbery",
"Prisoners/100k" = "prisoners",
"Income/capitaofstate" = "income",
"Density(sqrmi/1000)" = "density",
ShallCarryLaw = "law",
"population/mil" = population)
gundata_clean
## # A tibble: 1,173 x 12
## year Crime~1 Murde~2 Robbe~3 Priso~4 %Blac~5 %Whit~6 popul~7 Incom~8 Densi~9
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1977 414. 14.2 96.8 83 8.38 55.1 3.78 9563. 0.0746
## 2 1978 419. 13.3 99.1 94 8.35 55.1 3.83 9932 0.0756
## 3 1979 413. 13.2 110. 144 8.33 55.1 3.87 9877. 0.0762
## 4 1980 448. 13.2 132. 141 8.41 54.9 3.90 9541. 0.0768
## 5 1981 470. 11.9 126. 149 8.48 54.9 3.92 9548. 0.0772
## 6 1982 448. 10.6 112 183 8.51 54.9 3.93 9479. 0.0773
## 7 1983 416 9.2 98.4 215 8.55 54.8 3.93 9783 0.0775
## 8 1984 431. 9.4 96.1 243 8.56 54.8 3.95 10357. 0.0778
## 9 1985 458. 9.8 105. 256 8.56 54.7 3.97 10726. 0.0782
## 10 1986 558 10.1 112. 267 8.57 54.5 3.99 11092. 0.0786
## # ... with 1,163 more rows, 2 more variables: state <chr>, ShallCarryLaw <chr>,
## # and abbreviated variable names 1: `Crime/100k`, 2: `Murder/100k`,
## # 3: `Robbery/100k`, 4: `Prisoners/100k`, 5: `%Black`, 6: `%White`,
## # 7: `population/mil`, 8: `Income/capitaofstate`, 9: `Density(sqrmi/1000)`
summary(gundata_clean)
## year Crime/100k Murder/100k Robbery/100k
## Min. :1977 Min. : 47.0 Min. : 0.200 Min. : 6.4
## 1st Qu.:1982 1st Qu.: 283.1 1st Qu.: 3.700 1st Qu.: 71.1
## Median :1988 Median : 443.0 Median : 6.400 Median : 124.1
## Mean :1988 Mean : 503.1 Mean : 7.665 Mean : 161.8
## 3rd Qu.:1994 3rd Qu.: 650.9 3rd Qu.: 9.800 3rd Qu.: 192.7
## Max. :1999 Max. :2921.8 Max. :80.600 Max. :1635.1
## Prisoners/100k %Black %White population/mil
## Min. : 19.0 Min. : 0.2482 Min. :21.78 Min. : 0.4027
## 1st Qu.: 114.0 1st Qu.: 2.2022 1st Qu.:59.94 1st Qu.: 1.1877
## Median : 187.0 Median : 4.0262 Median :65.06 Median : 3.2713
## Mean : 226.6 Mean : 5.3362 Mean :62.95 Mean : 4.8163
## 3rd Qu.: 291.0 3rd Qu.: 6.8507 3rd Qu.:69.20 3rd Qu.: 5.6856
## Max. :1913.0 Max. :26.9796 Max. :76.53 Max. :33.1451
## Income/capitaofstate Density(sqrmi/1000) state ShallCarryLaw
## Min. : 8555 Min. : 0.000707 Length:1173 Length:1173
## 1st Qu.:11935 1st Qu.: 0.031911 Class :character Class :character
## Median :13402 Median : 0.081569 Mode :character Mode :character
## Mean :13725 Mean : 0.352038
## 3rd Qu.:15271 3rd Qu.: 0.177718
## Max. :23647 Max. :11.102120
gundata_pairs <- gundata_clean%>%
mutate(ShallCarryLaw = ifelse(ShallCarryLaw == "no",0,1))
pairs(data=gundata_pairs,
~ year+ `Crime/100k`+ `Murder/100k` + `Robbery/100k`+ `Prisoners/100k`+
`Density(sqrmi/1000)` + `Income/capitaofstate`)
library(corrplot)
## corrplot 0.92 loaded
library(PerformanceAnalytics)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
gundata_corplot <- gundata_clean %>%
select(-"state", -"ShallCarryLaw")
chart.Correlation(gundata_corplot,
method="pearson",
histogram=TRUE,
pch=16)
gundata_cor <- cor(gundata_corplot, method = "pearson")
col_gd <- colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))
corrplot(gundata_cor, method = "color", col = col_gd(200),
type = "upper", order = "hclust",
addCoef.col = "black",
tl.col = "black", tl.srt = 45,)
library(ggplot2)
ggplot(gundata_clean, aes(x= year, y= `Crime/100k`, group = `year`)) +
geom_boxplot( fill="skyblue", notch=FALSE) +
geom_jitter( size=1, color="orange", width=0.3)
gunstate<- gundata_clean%>%
select(1,2,11)
gunstate
## # A tibble: 1,173 x 3
## year `Crime/100k` state
## <dbl> <dbl> <chr>
## 1 1977 414. Alabama
## 2 1978 419. Alabama
## 3 1979 413. Alabama
## 4 1980 448. Alabama
## 5 1981 470. Alabama
## 6 1982 448. Alabama
## 7 1983 416 Alabama
## 8 1984 431. Alabama
## 9 1985 458. Alabama
## 10 1986 558 Alabama
## # ... with 1,163 more rows
library(lattice)
xyplot(log(`Crime/100k`) ~ as.numeric(as.character(year)) | state, data = gunstate, type = "l")