cdata=read.csv("http://www.ats.ucla.edu/stat/data/crime.csv")
#Desription of data
colnames(cdata)
## [1] "sid" "state" "crime" "murder" "pctmetro" "pctwhite"
## [7] "pcths" "poverty" "single"
##[1] "sid" "state" "crime" "murder" "pctmetro"
##[6] "pctwhite" "pcths" "poverty" "single"
summary(cdata)
## sid state crime murder
## Min. : 1.0 ak : 1 Min. : 82.0 Min. : 1.600
## 1st Qu.:13.5 al : 1 1st Qu.: 326.5 1st Qu.: 3.900
## Median :26.0 ar : 1 Median : 515.0 Median : 6.800
## Mean :26.0 az : 1 Mean : 612.8 Mean : 8.727
## 3rd Qu.:38.5 ca : 1 3rd Qu.: 773.0 3rd Qu.:10.350
## Max. :51.0 co : 1 Max. :2922.0 Max. :78.500
## (Other):45
## pctmetro pctwhite pcths poverty
## Min. : 24.00 Min. :31.80 Min. :64.30 Min. : 8.00
## 1st Qu.: 49.55 1st Qu.:79.35 1st Qu.:73.50 1st Qu.:10.70
## Median : 69.80 Median :87.60 Median :76.70 Median :13.10
## Mean : 67.39 Mean :84.12 Mean :76.22 Mean :14.26
## 3rd Qu.: 83.95 3rd Qu.:92.60 3rd Qu.:80.10 3rd Qu.:17.40
## Max. :100.00 Max. :98.50 Max. :86.60 Max. :26.40
##
## single
## Min. : 8.40
## 1st Qu.:10.05
## Median :10.90
## Mean :11.33
## 3rd Qu.:12.05
## Max. :22.10
##
#Structure of Data
str(cdata)
## 'data.frame': 51 obs. of 9 variables:
## $ sid : int 1 2 3 4 5 6 7 8 9 10 ...
## $ state : Factor w/ 51 levels "ak","al","ar",..: 1 2 3 4 5 6 7 9 10 11 ...
## $ crime : int 761 780 593 715 1078 567 456 686 1206 723 ...
## $ murder : num 9 11.6 10.2 8.6 13.1 5.8 6.3 5 8.9 11.4 ...
## $ pctmetro: num 41.8 67.4 44.7 84.7 96.7 81.8 95.7 82.7 93 67.7 ...
## $ pctwhite: num 75.2 73.5 82.9 88.6 79.3 92.5 89 79.4 83.5 70.8 ...
## $ pcths : num 86.6 66.9 66.3 78.7 76.2 84.4 79.2 77.5 74.4 70.9 ...
## $ poverty : num 9.1 17.4 20 15.4 18.2 9.9 8.5 10.2 17.8 13.5 ...
## $ single : num 14.3 11.5 10.7 12.1 12.5 12.1 10.1 11.4 10.6 13 ...
#convert factor to integer
cdata$state=as.integer(cdata$state)
#correlation of Dataset
cor(cdata)
## sid state crime murder pctmetro
## sid 1.00000000 0.91438914 -0.02415886 0.1472809 -0.059470078
## state 0.91438914 1.00000000 -0.33605860 -0.2385265 -0.149900624
## crime -0.02415886 -0.33605860 1.00000000 0.8861963 0.544038822
## murder 0.14728088 -0.23852648 0.88619634 1.0000000 0.316114166
## pctmetro -0.05947008 -0.14990062 0.54403882 0.3161142 1.000000000
## pctwhite 0.09077618 0.32514433 -0.67717567 -0.7061927 -0.337220734
## pcths -0.02352875 0.00839625 -0.25605205 -0.2860708 -0.003977358
## poverty 0.12108646 -0.03530466 0.50950799 0.5658711 -0.060538499
## single 0.05402958 -0.24915755 0.83887477 0.8589106 0.259810085
## pctwhite pcths poverty single
## sid 0.09077618 -0.023528747 0.12108646 0.05402958
## state 0.32514433 0.008396250 -0.03530466 -0.24915755
## crime -0.67717567 -0.256052045 0.50950799 0.83887477
## murder -0.70619268 -0.286070828 0.56587107 0.85891063
## pctmetro -0.33722073 -0.003977358 -0.06053850 0.25981008
## pctwhite 1.00000000 0.338547615 -0.38929368 -0.65643738
## pcths 0.33854762 1.000000000 -0.74393825 -0.21978289
## poverty -0.38929368 -0.743938249 1.00000000 0.54858904
## single -0.65643738 -0.219782892 0.54858904 1.00000000
# Plot of Correlations
library(corrplot)
## Warning: package 'corrplot' was built under R version 3.2.5
me<-cor(cdata)
corrplot(me, method = "circle")
#Apply Regression Model
nfit=lm(crime~poverty+single,cdata)
#Summary of Model
summary(nfit)
##
## Call:
## lm(formula = crime ~ poverty + single, data = cdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -811.14 -114.27 -22.44 121.86 689.82
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1368.189 187.205 -7.308 2.48e-09 ***
## poverty 6.787 8.989 0.755 0.454
## single 166.373 19.423 8.566 3.12e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 243.6 on 48 degrees of freedom
## Multiple R-squared: 0.7072, Adjusted R-squared: 0.695
## F-statistic: 57.96 on 2 and 48 DF, p-value: 1.578e-13
#Regression model by cosidering more influencing factors(varibles)
newfit=lm(crime~murder+pctmetro+poverty+single,cdata)
# summary of new model
summary(newfit)
##
## Call:
## lm(formula = crime ~ murder + pctmetro + poverty + single, data = cdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -336.19 -97.58 -14.32 82.84 432.86
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -809.917 215.010 -3.767 0.000469 ***
## murder 19.807 4.099 4.832 1.54e-05 ***
## pctmetro 6.496 1.069 6.076 2.23e-07 ***
## poverty 9.553 5.956 1.604 0.115585
## single 59.683 19.732 3.025 0.004064 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 149.9 on 46 degrees of freedom
## Multiple R-squared: 0.8938, Adjusted R-squared: 0.8845
## F-statistic: 96.76 on 4 and 46 DF, p-value: < 2.2e-16
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.