USA_Murder

This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

# loading the dslabs package and the murders dataset
library(dslabs)
data(murders)

# determining that the murders dataset is of the "data frame" class
class(murders)

## [1] "data.frame"

# finding out more about the structure of the object
str(murders)

## 'data.frame':    51 obs. of  5 variables:
##  $ state     : chr  "Alabama" "Alaska" "Arizona" "Arkansas" ...
##  $ abb       : chr  "AL" "AK" "AZ" "AR" ...
##  $ region    : Factor w/ 4 levels "Northeast","South",..: 2 4 4 2 4 4 1 2 2 2 ...
##  $ population: num  4779736 710231 6392017 2915918 37253956 ...
##  $ total     : num  135 19 232 93 1257 ...

# showing the first 6 lines of the dataset
head(murders)

# using the accessor operator to obtain the population column
murders$population

##  [1]  4779736   710231  6392017  2915918 37253956  5029196  3574097   897934
##  [9]   601723 19687653  9920000  1360301  1567582 12830632  6483802  3046355
## [17]  2853118  4339367  4533372  1328361  5773552  6547629  9883640  5303925
## [25]  2967297  5988927   989415  1826341  2700551  1316470  8791894  2059179
## [33] 19378102  9535483   672591 11536504  3751351  3831074 12702379  1052567
## [41]  4625364   814180  6346105 25145561  2763885   625741  8001024  6724540
## [49]  1852994  5686986   563626

# displaying the variable names in the murders dataset
names(murders)

## [1] "state"      "abb"        "region"     "population" "total"

# determining how many entries are in a vector
pop <- murders$population
length(pop)

## [1] 51

# vectors can be of class numeric and character
class(pop)

## [1] "numeric"

class(murders$state)

## [1] "character"

# logical vectors are either TRUE or FALSE
z <- 3 == 2
z

## [1] FALSE

class(z)

## [1] "logical"

# factors are another type of class
class(murders$region)

## [1] "factor"

# obtaining the levels of a factor
levels(murders$region)

## [1] "Northeast"     "South"         "North Central" "West"

sort(murders$total)

##  [1]    2    4    5    5    7    8   11   12   12   16   19   21   22   27   32
## [16]   36   38   53   63   65   67   84   93   93   97   97   99  111  116  118
## [31]  120  135  142  207  219  232  246  250  286  293  310  321  351  364  376
## [46]  413  457  517  669  805 1257

x <- c(31, 4, 15, 92, 65)
x

## [1] 31  4 15 92 65

sort(x)    # puts elements in order

## [1]  4 15 31 65 92

index <- order(x)    # returns index that will put x in order
x[index]    # rearranging by this index puts elements in order

## [1]  4 15 31 65 92

order(x)

## [1] 2 3 1 5 4

murders$state[1:10]

##  [1] "Alabama"              "Alaska"               "Arizona"             
##  [4] "Arkansas"             "California"           "Colorado"            
##  [7] "Connecticut"          "Delaware"             "District of Columbia"
## [10] "Florida"

murders$abb[1:10]

##  [1] "AL" "AK" "AZ" "AR" "CA" "CO" "CT" "DE" "DC" "FL"

index <- order(murders$total)
murders$abb[index]    # order abbreviations by total murders

##  [1] "VT" "ND" "NH" "WY" "HI" "SD" "ME" "ID" "MT" "RI" "AK" "IA" "UT" "WV" "NE"
## [16] "OR" "DE" "MN" "KS" "CO" "NM" "NV" "AR" "WA" "CT" "WI" "DC" "OK" "KY" "MA"
## [31] "MS" "AL" "IN" "SC" "TN" "AZ" "NJ" "VA" "NC" "MD" "OH" "MO" "LA" "IL" "GA"
## [46] "MI" "PA" "NY" "FL" "TX" "CA"

max(murders$total)    # highest number of total murders

## [1] 1257

i_max <- which.max(murders$total)    # index with highest number of murders
murders$state[i_max]    # state name with highest number of total murders

## [1] "California"

x <- c(31, 4, 15, 92, 65)
x

## [1] 31  4 15 92 65

rank(x)    # returns ranks (smallest to largest)

## [1] 3 1 2 5 4

ind <- order(murders$total) 
murders$abb[ind]

##  [1] "VT" "ND" "NH" "WY" "HI" "SD" "ME" "ID" "MT" "RI" "AK" "IA" "UT" "WV" "NE"
## [16] "OR" "DE" "MN" "KS" "CO" "NM" "NV" "AR" "WA" "CT" "WI" "DC" "OK" "KY" "MA"
## [31] "MS" "AL" "IN" "SC" "TN" "AZ" "NJ" "VA" "NC" "MD" "OH" "MO" "LA" "IL" "GA"
## [46] "MI" "PA" "NY" "FL" "TX" "CA"

murders$state[which.max(murders$population)]

## [1] "California"

murder_rate <- murders$total / murders$population * 100000
murders$abb[order(murder_rate)]

##  [1] "VT" "NH" "HI" "ND" "IA" "ID" "UT" "ME" "WY" "OR" "SD" "MN" "MT" "CO" "WA"
## [16] "WV" "RI" "WI" "NE" "MA" "IN" "KS" "NY" "KY" "AK" "OH" "CT" "NJ" "AL" "IL"
## [31] "OK" "NC" "NV" "VA" "AR" "TX" "NM" "CA" "FL" "TN" "PA" "AZ" "GA" "MS" "MI"
## [46] "DE" "SC" "MD" "MO" "LA" "DC"

ind <- murder_rate <= 0.71
murders$state[ind]

## [1] "Hawaii"        "Iowa"          "New Hampshire" "North Dakota" 
## [5] "Vermont"

west <- murders$region == "West"
safe <- murder_rate <= 1
safe

##  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE
## [13]  TRUE FALSE FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE  TRUE
## [25] FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE FALSE
## [37] FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE FALSE  TRUE  TRUE FALSE FALSE
## [49] FALSE FALSE  TRUE

murders$state[safe]

##  [1] "Hawaii"        "Idaho"         "Iowa"          "Maine"        
##  [5] "Minnesota"     "New Hampshire" "North Dakota"  "Oregon"       
##  [9] "South Dakota"  "Utah"          "Vermont"       "Wyoming"

ind <- safe & west
murders$state[ind]

## [1] "Hawaii"  "Idaho"   "Oregon"  "Utah"    "Wyoming"

ind <- which(murders$state == "California")
murder_rate[ind]

## [1] 3.374138

ind <- match(c("New York", "Florida", "Texas"), murders$state)
ind

## [1] 33 10 44

murder_rate[ind]

## [1] 2.667960 3.398069 3.201360

c("Boston", "Dakota", "Washington") %in% murders$state

## [1] FALSE FALSE  TRUE

which(murders$state%in%c("New York", "Florida", "Texas"))

## [1] 10 33 44

x <- murders$population / 10^6
y <- murders$total
with(murders, plot(population, total))

x <- with(murders, total / population * 100000)
hist(x)

murders$state[which.max(x)]

## [1] "District of Columbia"

murders$rate <- with(murders, total / population * 100000)
boxplot(rate~region, data = murders)

population_in_millions <- murders$population/10^6
total_gun_murders <- murders$total
plot(population_in_millions, total_gun_murders)

USA_Murder_rates

Raul_Lopez