1. Import dataset Crime
crime <- read.csv("~/Desktop/R-dir/R studying/dataset/Crime dataset reduced.csv")
head(crime, 10)
## city year police robbery population blackpct femheadpct welfarepercap
## 1 fresno 71 1.748210 2.287489 168744 9.5 14.96 424.6913
## 2 fresno 72 1.736392 2.459889 172772 9.5 14.72 449.7607
## 3 fresno 73 1.764706 3.371041 176800 9.5 14.48 423.4234
## 4 fresno 74 1.849548 3.484163 176800 9.5 14.24 383.3671
## 5 fresno 75 1.722380 3.212465 176500 9.5 14.00 351.3011
## 6 fresno 76 1.781900 4.064081 177900 9.5 13.76 376.0984
## 7 fresno 77 1.823369 5.245607 182629 9.5 13.52 391.0891
## 8 fresno 78 1.689137 5.657839 194774 9.5 13.28 409.5092
## 9 fresno 79 1.755205 5.178369 194849 9.5 13.04 370.5234
## 10 fresno 80 1.615629 5.719698 215396 9.5 12.80 347.0874
## edexppercap
## 1 762.9630
## 2 811.0048
## 3 797.2973
## 4 811.3589
## 5 847.5837
## 6 934.9736
## 7 920.7921
## 8 943.2515
## 9 789.2562
## 10 771.8447
tail(crime, 20)
## city year police robbery population blackpct femheadpct
## 91 sanfran 73 2.888191 7.105422 677933 13.19 14.52
## 92 sanfran 74 2.888204 6.543448 677930 13.12 13.86
## 93 sanfran 75 2.679196 8.488351 669977 13.05 13.20
## 94 sanfran 76 2.506767 9.966917 665000 12.98 12.54
## 95 sanfran 77 2.451875 8.014779 676625 12.91 11.88
## 96 sanfran 78 2.517419 9.882920 658611 12.84 11.22
## 97 sanfran 79 2.357489 10.155103 659176 12.77 10.56
## 98 sanfran 80 2.578061 11.165171 674150 12.70 9.90
## 99 sanfran 81 2.599365 10.671992 692092 12.52 9.90
## 100 sanfran 82 2.782806 10.099142 708278 12.34 9.90
## 101 sanfran 83 2.666508 8.243572 721168 12.16 9.90
## 102 sanfran 84 2.676639 7.257223 719559 11.98 9.90
## 103 sanfran 85 2.630015 6.968380 733456 11.80 9.90
## 104 sanfran 86 2.530165 6.775086 750544 11.62 9.90
## 105 sanfran 87 2.435108 6.060425 767933 11.44 9.90
## 106 sanfran 88 2.391478 6.455532 753927 11.26 9.90
## 107 sanfran 89 2.355639 6.664767 750964 11.08 9.90
## 108 sanfran 90 2.462847 9.742264 723959 10.90 9.90
## 109 sanfran 91 2.489720 9.498822 739039 10.72 9.90
## 110 sanfran 92 2.459764 11.024325 750885 10.54 9.90
## welfarepercap edexppercap
## 91 423.4234 797.2973
## 92 383.3671 811.3589
## 93 351.3011 847.5837
## 94 376.0984 934.9736
## 95 391.0891 920.7921
## 96 409.5092 943.2515
## 97 370.5234 789.2562
## 98 347.0874 771.8447
## 99 382.8383 787.6788
## 100 375.0273 748.5074
## 101 355.5932 723.0884
## 102 343.9584 726.2258
## 103 350.4481 785.3521
## 104 353.3180 826.7067
## 105 359.4338 859.8782
## 106 366.2318 852.3222
## 107 376.6129 878.2258
## 108 383.3206 909.7169
## 109 422.8782 936.5314
## 110 462.4359 963.3459
Variable explanation
- city (fresno, losangle, oakland, sacramen,
sanfran)
- year (from 1971 to 1992)
- police number of police officers per 1,000
inhabitants
- robbery the number of theft cases per 1,000
people
- population
- blackpct percentage of black people
2. Create new variables
crime$yr = crime$year + 1900
crime$pop = round(crime$population / 1000, 0)
crime$period[crime$year >= 71 & crime$year <= 79] = "1970 - 1979"
crime$period[crime$year >= 80 & crime$year <= 89] = "1980 - 1989"
crime$period[crime$year >= 90 ] = "1990 - 1992"
3. Create new subset dataset with 4 variables (city, year, police,
robbery)
suppressMessages(library(dplyr))
crime_new1 = crime[, c("city", "year", "police", "robbery")]
new = crime %>% select(city, year, police, robbery) # alternative way using pipe operator
crime_new2 = crime %>% filter(year <= 90 & city == "losangel")
#
median(crime$police)
## [1] 1.943653
median(crime$robbery)
## [1] 6.731714
4. Compare
suppressMessages(library(compareGroups))
4.1 Compare between Fresno & Los Angeles
two_cities = crime %>% filter(city %in% c("fresno", "losangel"))
createTable(compareGroups(city ~ police + robbery + population + blackpct, data = two_cities))
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## --------Summary descriptives table by 'city'---------
##
## ____________________________________________________
## fresno losangel p.overall
## N=22 N=22
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## police 1.52 (0.24) 2.36 (0.19) <0.001
## robbery 4.59 (1.32) 7.71 (2.14) <0.001
## population 241421 (66407) 3086460 (290100) <0.001
## blackpct 9.07 (0.51) 16.1 (1.44) <0.001
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
4.2 Compare between Fresno, Los Angeles and Sacramento
three_cities = crime %>% filter(city %in% c("fresno", "losangel", "sacramen"))
createTable(compareGroups(city ~ police + robbery + population + blackpct, data = three_cities))
##
## --------Summary descriptives table by 'city'---------
##
## ___________________________________________________________________
## fresno losangel sacramen p.overall
## N=22 N=22 N=22
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## police 1.52 (0.24) 2.36 (0.19) 1.79 (0.14) <0.001
## robbery 4.59 (1.32) 7.71 (2.14) 5.10 (1.18) <0.001
## population 241421 (66407) 3086460 (290100) 299897 (41403) <0.001
## blackpct 9.07 (0.51) 16.1 (1.44) 13.5 (1.47) <0.001
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
4.3 Compare in five cities
table(crime$city)
##
## fresno losangel oakland sacramen sanfran
## 22 22 22 22 22
createTable(compareGroups(city ~ police + robbery + population + blackpct, data = crime))
##
## --------Summary descriptives table by 'city'---------
##
## _________________________________________________________________________________________________
## fresno losangel oakland sacramen sanfran p.overall
## N=22 N=22 N=22 N=22 N=22
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## police 1.52 (0.24) 2.36 (0.19) 1.83 (0.15) 1.79 (0.14) 2.59 (0.16) <0.001
## robbery 4.59 (1.32) 7.71 (2.14) 9.28 (1.20) 5.10 (1.18) 8.49 (1.70) <0.001
## population 241421 (66407) 3086460 (290100) 355641 (15635) 299897 (41403) 707330 (35363) <0.001
## blackpct 9.07 (0.51) 16.1 (1.44) 43.3 (3.17) 13.5 (1.47) 12.2 (0.90) <0.001
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯