R Markdown

Data analysis on murders data set

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com. ## Data analysis on murders data set #Q1. store the colum name state in a vector

library(dslabs)
data(murders)
head(murders)
##        state abb region population total
## 1    Alabama  AL  South    4779736   135
## 2     Alaska  AK   West     710231    19
## 3    Arizona  AZ   West    6392017   232
## 4   Arkansas  AR  South    2915918    93
## 5 California  CA   West   37253956  1257
## 6   Colorado  CO   West    5029196    65
colnames(murders)
## [1] "state"      "abb"        "region"     "population" "total"
state_names<-c(murders$state)
state_names
##  [1] "Alabama"              "Alaska"               "Arizona"             
##  [4] "Arkansas"             "California"           "Colorado"            
##  [7] "Connecticut"          "Delaware"             "District of Columbia"
## [10] "Florida"              "Georgia"              "Hawaii"              
## [13] "Idaho"                "Illinois"             "Indiana"             
## [16] "Iowa"                 "Kansas"               "Kentucky"            
## [19] "Louisiana"            "Maine"                "Maryland"            
## [22] "Massachusetts"        "Michigan"             "Minnesota"           
## [25] "Mississippi"          "Missouri"             "Montana"             
## [28] "Nebraska"             "Nevada"               "New Hampshire"       
## [31] "New Jersey"           "New Mexico"           "New York"            
## [34] "North Carolina"       "North Dakota"         "Ohio"                
## [37] "Oklahoma"             "Oregon"               "Pennsylvania"        
## [40] "Rhode Island"         "South Carolina"       "South Dakota"        
## [43] "Tennessee"            "Texas"                "Utah"                
## [46] "Vermont"              "Virginia"             "Washington"          
## [49] "West Virginia"        "Wisconsin"            "Wyoming"

Q2 ,store the abrevation in a separate colum

abrevation<-c(murders$abb)
abrevation
##  [1] "AL" "AK" "AZ" "AR" "CA" "CO" "CT" "DE" "DC" "FL" "GA" "HI" "ID" "IL" "IN"
## [16] "IA" "KS" "KY" "LA" "ME" "MD" "MA" "MI" "MN" "MS" "MO" "MT" "NE" "NV" "NH"
## [31] "NJ" "NM" "NY" "NC" "ND" "OH" "OK" "OR" "PA" "RI" "SC" "SD" "TN" "TX" "UT"
## [46] "VT" "VA" "WA" "WV" "WI" "WY"

##Q3.what is the total population of us

total_population_us<-sum(murders$population)
total_population_us
## [1] 309864228

Q4.How many murders have been done according to this data set

sum(murders$total)
## [1] 9403

Q5.which state has highest population

highest_population<-murders$state[which.max(murders$population)]
highest_population
## [1] "California"

Q6.which state has highest number of murders

max_murders<-murders$state[which.max(murders$total)]
max_murders
## [1] "California"

Q7.which state has the lowest number of murders

min_murders<-murders$state[which.min(murders$total)]
min_murders
## [1] "Vermont"

Q8.compute correlation between population and murders

correlatoin<-cor(murders$population,murders$total)
correlatoin
## [1] 0.9635956

Q9. 1st method fit a regression line between population and total number of muders

model<-lm(population~total,data=murders)
summary(model)
## 
## Call:
## lm(formula = population ~ total, data = murders)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -6207527  -714805    43591  1190781  3989640 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   913823     330431   2.766  0.00799 ** 
## total          27997       1110  25.228  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1853000 on 49 degrees of freedom
## Multiple R-squared:  0.9285, Adjusted R-squared:  0.9271 
## F-statistic: 636.5 on 1 and 49 DF,  p-value: < 2.2e-16

2nd method fit a regression line between population and total number of murders

x<-c(murders$population) 
x
##  [1]  4779736   710231  6392017  2915918 37253956  5029196  3574097   897934
##  [9]   601723 19687653  9920000  1360301  1567582 12830632  6483802  3046355
## [17]  2853118  4339367  4533372  1328361  5773552  6547629  9883640  5303925
## [25]  2967297  5988927   989415  1826341  2700551  1316470  8791894  2059179
## [33] 19378102  9535483   672591 11536504  3751351  3831074 12702379  1052567
## [41]  4625364   814180  6346105 25145561  2763885   625741  8001024  6724540
## [49]  1852994  5686986   563626
y<-c(murders$total)
y
##  [1]  135   19  232   93 1257   65   97   38   99  669  376    7   12  364  142
## [16]   21   63  116  351   11  293  118  413   53  120  321   12   32   84    5
## [31]  246   67  517  286    4  310  111   36  457   16  207    8  219  805   22
## [46]    2  250   93   27   97    5
n<-length(x)
n
## [1] 51
sum(x)
## [1] 309864228
sum(y)
## [1] 9403
mean(x)
## [1] 6075769
mean(y)
## [1] 184.3725
x*y
##  [1]   645264360    13494389  1482947944   271180374 46828222692   326897740
##  [7]   346687409    34121492    59570577 13171039857  3729920000     9522107
## [13]    18810984  4670350048   920699884    63973455   179746434   503366572
## [19]  1591213572    14611971  1691650736   772620222  4081943320   281108025
## [25]   356075640  1922445567    11872980    58442912   226846284     6582350
## [31]  2162805924   137964993 10018478734  2727148138     2690364  3576316240
## [37]   416399961   137918664  5804987203    16841072   957450348     6513440
## [43]  1389796995 20242176605    60805470     1251482  2000256000   625382220
## [49]    50030838   551637642     2818130
sum(x*y)
## [1] 135180900360
sum(x^2)
## [1] 4.236103e+15
sum(x)^2
## [1] 9.601584e+16
b<-(n*sum(x*y))-(sum(x)*sum(y))/(n*sum(x^2))-(sum(x)^2)
b
## [1] -9.600895e+16
a<-mean(x)-mean(y)*b
a
## [1] 1.770141e+19
numerator<-(n*sum(x*y))-(sum(x)*sum(y))
numerator
## [1] 3.980573e+12
denomirator<-sqrt(((n*sum(x^2))-sum(x)^2))*((n*sum(y^2))-(sum(y)^2))
denomirator
## [1] 4.925665e+16
r<-numerator/denomirator
r
## [1] 8.081289e-05

Q10,display the population of washington

pop<-murders$population[murders$state == "Washington"]
pop
## [1] 6724540
help(pop)
## No documentation for 'pop' in specified packages and libraries:
## you could try '??pop'

Q11 display the number of murders in alaska

murders_alaska<-murders$total[murders$state=="Alaska"]
murders_alaska
## [1] 19

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.