1. Import dataset Crime

crime <- read.csv("~/Desktop/R-dir/R studying/dataset/Crime dataset reduced.csv")
head(crime, 10)
##      city year   police  robbery population blackpct femheadpct welfarepercap
## 1  fresno   71 1.748210 2.287489     168744      9.5      14.96      424.6913
## 2  fresno   72 1.736392 2.459889     172772      9.5      14.72      449.7607
## 3  fresno   73 1.764706 3.371041     176800      9.5      14.48      423.4234
## 4  fresno   74 1.849548 3.484163     176800      9.5      14.24      383.3671
## 5  fresno   75 1.722380 3.212465     176500      9.5      14.00      351.3011
## 6  fresno   76 1.781900 4.064081     177900      9.5      13.76      376.0984
## 7  fresno   77 1.823369 5.245607     182629      9.5      13.52      391.0891
## 8  fresno   78 1.689137 5.657839     194774      9.5      13.28      409.5092
## 9  fresno   79 1.755205 5.178369     194849      9.5      13.04      370.5234
## 10 fresno   80 1.615629 5.719698     215396      9.5      12.80      347.0874
##    edexppercap
## 1     762.9630
## 2     811.0048
## 3     797.2973
## 4     811.3589
## 5     847.5837
## 6     934.9736
## 7     920.7921
## 8     943.2515
## 9     789.2562
## 10    771.8447
tail(crime, 20)
##        city year   police   robbery population blackpct femheadpct
## 91  sanfran   73 2.888191  7.105422     677933    13.19      14.52
## 92  sanfran   74 2.888204  6.543448     677930    13.12      13.86
## 93  sanfran   75 2.679196  8.488351     669977    13.05      13.20
## 94  sanfran   76 2.506767  9.966917     665000    12.98      12.54
## 95  sanfran   77 2.451875  8.014779     676625    12.91      11.88
## 96  sanfran   78 2.517419  9.882920     658611    12.84      11.22
## 97  sanfran   79 2.357489 10.155103     659176    12.77      10.56
## 98  sanfran   80 2.578061 11.165171     674150    12.70       9.90
## 99  sanfran   81 2.599365 10.671992     692092    12.52       9.90
## 100 sanfran   82 2.782806 10.099142     708278    12.34       9.90
## 101 sanfran   83 2.666508  8.243572     721168    12.16       9.90
## 102 sanfran   84 2.676639  7.257223     719559    11.98       9.90
## 103 sanfran   85 2.630015  6.968380     733456    11.80       9.90
## 104 sanfran   86 2.530165  6.775086     750544    11.62       9.90
## 105 sanfran   87 2.435108  6.060425     767933    11.44       9.90
## 106 sanfran   88 2.391478  6.455532     753927    11.26       9.90
## 107 sanfran   89 2.355639  6.664767     750964    11.08       9.90
## 108 sanfran   90 2.462847  9.742264     723959    10.90       9.90
## 109 sanfran   91 2.489720  9.498822     739039    10.72       9.90
## 110 sanfran   92 2.459764 11.024325     750885    10.54       9.90
##     welfarepercap edexppercap
## 91       423.4234    797.2973
## 92       383.3671    811.3589
## 93       351.3011    847.5837
## 94       376.0984    934.9736
## 95       391.0891    920.7921
## 96       409.5092    943.2515
## 97       370.5234    789.2562
## 98       347.0874    771.8447
## 99       382.8383    787.6788
## 100      375.0273    748.5074
## 101      355.5932    723.0884
## 102      343.9584    726.2258
## 103      350.4481    785.3521
## 104      353.3180    826.7067
## 105      359.4338    859.8782
## 106      366.2318    852.3222
## 107      376.6129    878.2258
## 108      383.3206    909.7169
## 109      422.8782    936.5314
## 110      462.4359    963.3459

Variable explanation

2. Create new variables

crime$yr = crime$year + 1900
crime$pop = round(crime$population / 1000, 0)
crime$period[crime$year >= 71 & crime$year <= 79] = "1970 - 1979"
crime$period[crime$year >= 80 & crime$year <= 89] = "1980 - 1989"
crime$period[crime$year >= 90 ] = "1990 - 1992"

3. Create new subset dataset with 4 variables (city, year, police, robbery)

suppressMessages(library(dplyr))
crime_new1 = crime[, c("city", "year", "police", "robbery")]
new = crime %>% select(city, year, police, robbery) # alternative way using pipe operator

crime_new2 = crime  %>% filter(year <= 90 & city == "losangel")
# 
median(crime$police)
## [1] 1.943653
median(crime$robbery)
## [1] 6.731714

4. Compare

suppressMessages(library(compareGroups))

4.1 Compare between Fresno & Los Angeles

two_cities = crime %>% filter(city %in% c("fresno", "losangel"))
createTable(compareGroups(city ~ police + robbery + population + blackpct, data = two_cities))
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## --------Summary descriptives table by 'city'---------
## 
## ____________________________________________________ 
##                fresno         losangel     p.overall 
##                 N=22            N=22                 
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## police      1.52 (0.24)     2.36 (0.19)     <0.001   
## robbery     4.59 (1.32)     7.71 (2.14)     <0.001   
## population 241421 (66407) 3086460 (290100)  <0.001   
## blackpct    9.07 (0.51)     16.1 (1.44)     <0.001   
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯

4.2 Compare between Fresno, Los Angeles and Sacramento

three_cities = crime %>% filter(city %in% c("fresno", "losangel", "sacramen"))
createTable(compareGroups(city ~ police + robbery + population + blackpct, data = three_cities))
## 
## --------Summary descriptives table by 'city'---------
## 
## ___________________________________________________________________ 
##                fresno         losangel        sacramen    p.overall 
##                 N=22            N=22            N=22                
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## police      1.52 (0.24)     2.36 (0.19)     1.79 (0.14)    <0.001   
## robbery     4.59 (1.32)     7.71 (2.14)     5.10 (1.18)    <0.001   
## population 241421 (66407) 3086460 (290100) 299897 (41403)  <0.001   
## blackpct    9.07 (0.51)     16.1 (1.44)     13.5 (1.47)    <0.001   
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯

4.3 Compare in five cities

table(crime$city)
## 
##   fresno losangel  oakland sacramen  sanfran 
##       22       22       22       22       22
createTable(compareGroups(city ~ police + robbery + population + blackpct, data = crime))
## 
## --------Summary descriptives table by 'city'---------
## 
## _________________________________________________________________________________________________ 
##                fresno         losangel        oakland        sacramen       sanfran     p.overall 
##                 N=22            N=22            N=22           N=22           N=22                
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## police      1.52 (0.24)     2.36 (0.19)     1.83 (0.15)    1.79 (0.14)    2.59 (0.16)    <0.001   
## robbery     4.59 (1.32)     7.71 (2.14)     9.28 (1.20)    5.10 (1.18)    8.49 (1.70)    <0.001   
## population 241421 (66407) 3086460 (290100) 355641 (15635) 299897 (41403) 707330 (35363)  <0.001   
## blackpct    9.07 (0.51)     16.1 (1.44)     43.3 (3.17)    13.5 (1.47)    12.2 (0.90)    <0.001   
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯