#install.packages(c("readxl", "tidyverse", "ggplot2", "GGally", "DescTools", "plm", "table1", "compareGroups", "plotly", "lme4", "nlme", "lmerTest", "ggrepel", "magrittr"), dependencies = T)
crime = read.csv("C:\\Thach\\VN trips\\VN trip 5 (Apr 2023)\\Datasets\\Crime dataset reduced.csv", header = T)
dim(crime)
## [1] 110 9
head(crime)
## city year police robbery population blackpct femheadpct welfarepercap
## 1 fresno 71 1.748210 2.287489 168744 9.5 14.96 424.6913
## 2 fresno 72 1.736392 2.459889 172772 9.5 14.72 449.7607
## 3 fresno 73 1.764706 3.371041 176800 9.5 14.48 423.4234
## 4 fresno 74 1.849548 3.484163 176800 9.5 14.24 383.3671
## 5 fresno 75 1.722380 3.212465 176500 9.5 14.00 351.3011
## 6 fresno 76 1.781900 4.064081 177900 9.5 13.76 376.0984
## edexppercap
## 1 762.9630
## 2 811.0048
## 3 797.2973
## 4 811.3589
## 5 847.5837
## 6 934.9736
tail(crime)
## city year police robbery population blackpct femheadpct
## 105 sanfran 87 2.435108 6.060425 767933 11.44 9.9
## 106 sanfran 88 2.391478 6.455532 753927 11.26 9.9
## 107 sanfran 89 2.355639 6.664767 750964 11.08 9.9
## 108 sanfran 90 2.462847 9.742264 723959 10.90 9.9
## 109 sanfran 91 2.489720 9.498822 739039 10.72 9.9
## 110 sanfran 92 2.459764 11.024325 750885 10.54 9.9
## welfarepercap edexppercap
## 105 359.4338 859.8782
## 106 366.2318 852.3222
## 107 376.6129 878.2258
## 108 383.3206 909.7169
## 109 422.8782 936.5314
## 110 462.4359 963.3459
crime$yr = crime$year + 1900
crime$pop = crime$population/1000
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.1 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
crime = crime %>% mutate(
yr = year + 1900,
pop = population/1000)
crime$period[crime$year >= 71 & crime$year <= 79] = "1970-1979"
crime$period[crime$year >= 80 & crime$year <= 89] = "1980-1989"
crime$period[crime$year >= 90 & crime$year <= 92] = "1990-1992"
crime = crime %>%
mutate(period = cut(year,
breaks=c(-Inf, 79, 89, 92),
labels=c("1970-1979","1980-1989","1990-1992")))
crime_new1 = crime[, c("city", "year", "police", "robbery")]
crime_new1 = crime[, c(1, 2, 3, 4)]
crime_new1 = crime %>% dplyr::select(city, year, police, robbery)
crime_new2 = subset(crime, year == 90 & city == "losangel")
crime %>% dplyr::select(city, police, robbery) %>% group_by(city) %>% summarise_all(median)
## # A tibble: 5 × 3
## city police robbery
## <chr> <dbl> <dbl>
## 1 fresno 1.57 4.58
## 2 losangel 2.30 8.26
## 3 oakland 1.77 8.88
## 4 sacramen 1.78 5.32
## 5 sanfran 2.55 8.37
library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
table1 (~ police + robbery + population + blackpct | city, data = crime)
fresno (N=22) |
losangel (N=22) |
oakland (N=22) |
sacramen (N=22) |
sanfran (N=22) |
Overall (N=110) |
|
---|---|---|---|---|---|---|
police | ||||||
Mean (SD) | 1.52 (0.238) | 2.36 (0.191) | 1.83 (0.150) | 1.79 (0.137) | 2.59 (0.165) | 2.02 (0.432) |
Median [Min, Max] | 1.57 [1.12, 1.85] | 2.30 [2.12, 2.76] | 1.77 [1.60, 2.09] | 1.78 [1.56, 2.00] | 2.55 [2.36, 2.89] | 1.94 [1.12, 2.89] |
robbery | ||||||
Mean (SD) | 4.59 (1.32) | 7.71 (2.14) | 9.28 (1.20) | 5.10 (1.18) | 8.49 (1.70) | 7.03 (2.41) |
Median [Min, Max] | 4.58 [2.29, 8.42] | 8.26 [4.78, 11.2] | 8.88 [8.03, 12.5] | 5.32 [2.78, 6.91] | 8.37 [6.06, 11.2] | 6.73 [2.29, 12.5] |
population | ||||||
Mean (SD) | 241000 (66400) | 3090000 (290000) | 356000 (15600) | 300000 (41400) | 707000 (35400) | 938000 (1100000) |
Median [Min, Max] | 224000 [169000, 367000] | 3070000 [2730000, 3620000] | 358000 [328000, 386000] | 285000 [260000, 383000] | 707000 [659000, 768000] | 364000 [169000, 3620000] |
blackpct | ||||||
Mean (SD) | 9.07 (0.506) | 16.1 (1.44) | 43.3 (3.17) | 13.5 (1.47) | 12.2 (0.901) | 18.8 (12.6) |
Median [Min, Max] | 9.32 [8.06, 9.50] | 16.6 [13.4, 17.8] | 44.3 [35.7, 46.9] | 13.7 [10.9, 15.7] | 12.4 [10.5, 13.3] | 13.5 [8.06, 46.9] |
set.1 = crime %>% filter(city %in% c("fresno", "losangel"))
t.test(police ~ city, data = set.1)
##
## Welch Two Sample t-test
##
## data: police by city
## t = -12.863, df = 40.085, p-value = 8.199e-16
## alternative hypothesis: true difference in means between group fresno and group losangel is not equal to 0
## 95 percent confidence interval:
## -0.9675795 -0.7048258
## sample estimates:
## mean in group fresno mean in group losangel
## 1.524691 2.360894
library(compareGroups)
createTable(compareGroups(city ~ police, data = set.1))
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## --------Summary descriptives table by 'city'---------
##
## ________________________________________
## fresno losangel p.overall
## N=22 N=22
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## police 1.52 (0.24) 2.36 (0.19) <0.001
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
set.2 = crime %>% filter(city %in% c("fresno", "losangel", "sacramen"))
summary(aov(police ~ city, data = set.2))
## Df Sum Sq Mean Sq F value Pr(>F)
## city 2 8.024 4.012 107.7 <2e-16 ***
## Residuals 63 2.347 0.037
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
createTable(compareGroups(city ~ police, data = set.2))
##
## --------Summary descriptives table by 'city'---------
##
## ____________________________________________________
## fresno losangel sacramen p.overall
## N=22 N=22 N=22
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## police 1.52 (0.24) 2.36 (0.19) 1.79 (0.14) <0.001
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
summary(aov(police ~ city, data = crime))
## Df Sum Sq Mean Sq F value Pr(>F)
## city 4 16.986 4.246 131.5 <2e-16 ***
## Residuals 105 3.392 0.032
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
createTable(compareGroups(city ~ police, data = crime))
##
## --------Summary descriptives table by 'city'---------
##
## ____________________________________________________________________________
## fresno losangel oakland sacramen sanfran p.overall
## N=22 N=22 N=22 N=22 N=22
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## police 1.52 (0.24) 2.36 (0.19) 1.83 (0.15) 1.79 (0.14) 2.59 (0.16) <0.001
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯