This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com. ## Data analysis on murders data set #Q1. store the colum name state in a vector
library(dslabs)
data(murders)
head(murders)
## state abb region population total
## 1 Alabama AL South 4779736 135
## 2 Alaska AK West 710231 19
## 3 Arizona AZ West 6392017 232
## 4 Arkansas AR South 2915918 93
## 5 California CA West 37253956 1257
## 6 Colorado CO West 5029196 65
colnames(murders)
## [1] "state" "abb" "region" "population" "total"
state_names<-c(murders$state)
state_names
## [1] "Alabama" "Alaska" "Arizona"
## [4] "Arkansas" "California" "Colorado"
## [7] "Connecticut" "Delaware" "District of Columbia"
## [10] "Florida" "Georgia" "Hawaii"
## [13] "Idaho" "Illinois" "Indiana"
## [16] "Iowa" "Kansas" "Kentucky"
## [19] "Louisiana" "Maine" "Maryland"
## [22] "Massachusetts" "Michigan" "Minnesota"
## [25] "Mississippi" "Missouri" "Montana"
## [28] "Nebraska" "Nevada" "New Hampshire"
## [31] "New Jersey" "New Mexico" "New York"
## [34] "North Carolina" "North Dakota" "Ohio"
## [37] "Oklahoma" "Oregon" "Pennsylvania"
## [40] "Rhode Island" "South Carolina" "South Dakota"
## [43] "Tennessee" "Texas" "Utah"
## [46] "Vermont" "Virginia" "Washington"
## [49] "West Virginia" "Wisconsin" "Wyoming"
abrevation<-c(murders$abb)
abrevation
## [1] "AL" "AK" "AZ" "AR" "CA" "CO" "CT" "DE" "DC" "FL" "GA" "HI" "ID" "IL" "IN"
## [16] "IA" "KS" "KY" "LA" "ME" "MD" "MA" "MI" "MN" "MS" "MO" "MT" "NE" "NV" "NH"
## [31] "NJ" "NM" "NY" "NC" "ND" "OH" "OK" "OR" "PA" "RI" "SC" "SD" "TN" "TX" "UT"
## [46] "VT" "VA" "WA" "WV" "WI" "WY"
##Q3.what is the total population of us
total_population_us<-sum(murders$population)
total_population_us
## [1] 309864228
sum(murders$total)
## [1] 9403
highest_population<-murders$state[which.max(murders$population)]
highest_population
## [1] "California"
max_murders<-murders$state[which.max(murders$total)]
max_murders
## [1] "California"
min_murders<-murders$state[which.min(murders$total)]
min_murders
## [1] "Vermont"
correlatoin<-cor(murders$population,murders$total)
correlatoin
## [1] 0.9635956
model<-lm(population~total,data=murders)
summary(model)
##
## Call:
## lm(formula = population ~ total, data = murders)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6207527 -714805 43591 1190781 3989640
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 913823 330431 2.766 0.00799 **
## total 27997 1110 25.228 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1853000 on 49 degrees of freedom
## Multiple R-squared: 0.9285, Adjusted R-squared: 0.9271
## F-statistic: 636.5 on 1 and 49 DF, p-value: < 2.2e-16
x<-c(murders$population)
x
## [1] 4779736 710231 6392017 2915918 37253956 5029196 3574097 897934
## [9] 601723 19687653 9920000 1360301 1567582 12830632 6483802 3046355
## [17] 2853118 4339367 4533372 1328361 5773552 6547629 9883640 5303925
## [25] 2967297 5988927 989415 1826341 2700551 1316470 8791894 2059179
## [33] 19378102 9535483 672591 11536504 3751351 3831074 12702379 1052567
## [41] 4625364 814180 6346105 25145561 2763885 625741 8001024 6724540
## [49] 1852994 5686986 563626
y<-c(murders$total)
y
## [1] 135 19 232 93 1257 65 97 38 99 669 376 7 12 364 142
## [16] 21 63 116 351 11 293 118 413 53 120 321 12 32 84 5
## [31] 246 67 517 286 4 310 111 36 457 16 207 8 219 805 22
## [46] 2 250 93 27 97 5
n<-length(x)
n
## [1] 51
sum(x)
## [1] 309864228
sum(y)
## [1] 9403
mean(x)
## [1] 6075769
mean(y)
## [1] 184.3725
x*y
## [1] 645264360 13494389 1482947944 271180374 46828222692 326897740
## [7] 346687409 34121492 59570577 13171039857 3729920000 9522107
## [13] 18810984 4670350048 920699884 63973455 179746434 503366572
## [19] 1591213572 14611971 1691650736 772620222 4081943320 281108025
## [25] 356075640 1922445567 11872980 58442912 226846284 6582350
## [31] 2162805924 137964993 10018478734 2727148138 2690364 3576316240
## [37] 416399961 137918664 5804987203 16841072 957450348 6513440
## [43] 1389796995 20242176605 60805470 1251482 2000256000 625382220
## [49] 50030838 551637642 2818130
sum(x*y)
## [1] 135180900360
sum(x^2)
## [1] 4.236103e+15
sum(x)^2
## [1] 9.601584e+16
b<-(n*sum(x*y))-(sum(x)*sum(y))/(n*sum(x^2))-(sum(x)^2)
b
## [1] -9.600895e+16
a<-mean(x)-mean(y)*b
a
## [1] 1.770141e+19
numerator<-(n*sum(x*y))-(sum(x)*sum(y))
numerator
## [1] 3.980573e+12
denomirator<-sqrt(((n*sum(x^2))-sum(x)^2))*((n*sum(y^2))-(sum(y)^2))
denomirator
## [1] 4.925665e+16
r<-numerator/denomirator
r
## [1] 8.081289e-05
pop<-murders$population[murders$state == "Washington"]
pop
## [1] 6724540
help(pop)
## No documentation for 'pop' in specified packages and libraries:
## you could try '??pop'
murders_alaska<-murders$total[murders$state=="Alaska"]
murders_alaska
## [1] 19
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.