This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

# loading the dslabs package and the murders dataset
library(dslabs)
data(murders)
# determining that the murders dataset is of the "data frame" class
class(murders)
## [1] "data.frame"
# finding out more about the structure of the object
str(murders)
## 'data.frame':    51 obs. of  5 variables:
##  $ state     : chr  "Alabama" "Alaska" "Arizona" "Arkansas" ...
##  $ abb       : chr  "AL" "AK" "AZ" "AR" ...
##  $ region    : Factor w/ 4 levels "Northeast","South",..: 2 4 4 2 4 4 1 2 2 2 ...
##  $ population: num  4779736 710231 6392017 2915918 37253956 ...
##  $ total     : num  135 19 232 93 1257 ...
# showing the first 6 lines of the dataset
head(murders)
# using the accessor operator to obtain the population column
murders$population
##  [1]  4779736   710231  6392017  2915918 37253956  5029196  3574097   897934
##  [9]   601723 19687653  9920000  1360301  1567582 12830632  6483802  3046355
## [17]  2853118  4339367  4533372  1328361  5773552  6547629  9883640  5303925
## [25]  2967297  5988927   989415  1826341  2700551  1316470  8791894  2059179
## [33] 19378102  9535483   672591 11536504  3751351  3831074 12702379  1052567
## [41]  4625364   814180  6346105 25145561  2763885   625741  8001024  6724540
## [49]  1852994  5686986   563626
# displaying the variable names in the murders dataset
names(murders)
## [1] "state"      "abb"        "region"     "population" "total"
# determining how many entries are in a vector
pop <- murders$population
length(pop)
## [1] 51
# vectors can be of class numeric and character
class(pop)
## [1] "numeric"
class(murders$state)
## [1] "character"
# logical vectors are either TRUE or FALSE
z <- 3 == 2
z
## [1] FALSE
class(z)
## [1] "logical"
# factors are another type of class
class(murders$region)
## [1] "factor"
# obtaining the levels of a factor
levels(murders$region)
## [1] "Northeast"     "South"         "North Central" "West"
sort(murders$total)
##  [1]    2    4    5    5    7    8   11   12   12   16   19   21   22   27   32
## [16]   36   38   53   63   65   67   84   93   93   97   97   99  111  116  118
## [31]  120  135  142  207  219  232  246  250  286  293  310  321  351  364  376
## [46]  413  457  517  669  805 1257
x <- c(31, 4, 15, 92, 65)
x
## [1] 31  4 15 92 65
sort(x)    # puts elements in order
## [1]  4 15 31 65 92
index <- order(x)    # returns index that will put x in order
x[index]    # rearranging by this index puts elements in order
## [1]  4 15 31 65 92
order(x)
## [1] 2 3 1 5 4
murders$state[1:10]
##  [1] "Alabama"              "Alaska"               "Arizona"             
##  [4] "Arkansas"             "California"           "Colorado"            
##  [7] "Connecticut"          "Delaware"             "District of Columbia"
## [10] "Florida"
murders$abb[1:10]
##  [1] "AL" "AK" "AZ" "AR" "CA" "CO" "CT" "DE" "DC" "FL"
index <- order(murders$total)
murders$abb[index]    # order abbreviations by total murders
##  [1] "VT" "ND" "NH" "WY" "HI" "SD" "ME" "ID" "MT" "RI" "AK" "IA" "UT" "WV" "NE"
## [16] "OR" "DE" "MN" "KS" "CO" "NM" "NV" "AR" "WA" "CT" "WI" "DC" "OK" "KY" "MA"
## [31] "MS" "AL" "IN" "SC" "TN" "AZ" "NJ" "VA" "NC" "MD" "OH" "MO" "LA" "IL" "GA"
## [46] "MI" "PA" "NY" "FL" "TX" "CA"
max(murders$total)    # highest number of total murders
## [1] 1257
i_max <- which.max(murders$total)    # index with highest number of murders
murders$state[i_max]    # state name with highest number of total murders
## [1] "California"
x <- c(31, 4, 15, 92, 65)
x
## [1] 31  4 15 92 65
rank(x)    # returns ranks (smallest to largest)
## [1] 3 1 2 5 4
ind <- order(murders$total) 
murders$abb[ind] 
##  [1] "VT" "ND" "NH" "WY" "HI" "SD" "ME" "ID" "MT" "RI" "AK" "IA" "UT" "WV" "NE"
## [16] "OR" "DE" "MN" "KS" "CO" "NM" "NV" "AR" "WA" "CT" "WI" "DC" "OK" "KY" "MA"
## [31] "MS" "AL" "IN" "SC" "TN" "AZ" "NJ" "VA" "NC" "MD" "OH" "MO" "LA" "IL" "GA"
## [46] "MI" "PA" "NY" "FL" "TX" "CA"
murders$state[which.max(murders$population)]
## [1] "California"
murder_rate <- murders$total / murders$population * 100000
murders$abb[order(murder_rate)]
##  [1] "VT" "NH" "HI" "ND" "IA" "ID" "UT" "ME" "WY" "OR" "SD" "MN" "MT" "CO" "WA"
## [16] "WV" "RI" "WI" "NE" "MA" "IN" "KS" "NY" "KY" "AK" "OH" "CT" "NJ" "AL" "IL"
## [31] "OK" "NC" "NV" "VA" "AR" "TX" "NM" "CA" "FL" "TN" "PA" "AZ" "GA" "MS" "MI"
## [46] "DE" "SC" "MD" "MO" "LA" "DC"
ind <- murder_rate <= 0.71
murders$state[ind]
## [1] "Hawaii"        "Iowa"          "New Hampshire" "North Dakota" 
## [5] "Vermont"
west <- murders$region == "West"
safe <- murder_rate <= 1
safe
##  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE
## [13]  TRUE FALSE FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE  TRUE
## [25] FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE FALSE
## [37] FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE FALSE  TRUE  TRUE FALSE FALSE
## [49] FALSE FALSE  TRUE
murders$state[safe]
##  [1] "Hawaii"        "Idaho"         "Iowa"          "Maine"        
##  [5] "Minnesota"     "New Hampshire" "North Dakota"  "Oregon"       
##  [9] "South Dakota"  "Utah"          "Vermont"       "Wyoming"
ind <- safe & west
murders$state[ind]
## [1] "Hawaii"  "Idaho"   "Oregon"  "Utah"    "Wyoming"
ind <- which(murders$state == "California")
murder_rate[ind]
## [1] 3.374138
ind <- match(c("New York", "Florida", "Texas"), murders$state)
ind
## [1] 33 10 44
murder_rate[ind]
## [1] 2.667960 3.398069 3.201360
c("Boston", "Dakota", "Washington") %in% murders$state
## [1] FALSE FALSE  TRUE
which(murders$state%in%c("New York", "Florida", "Texas"))
## [1] 10 33 44
x <- murders$population / 10^6
y <- murders$total
with(murders, plot(population, total))

x <- with(murders, total / population * 100000)
hist(x)

murders$state[which.max(x)]
## [1] "District of Columbia"
murders$rate <- with(murders, total / population * 100000)
boxplot(rate~region, data = murders)

population_in_millions <- murders$population/10^6
total_gun_murders <- murders$total
plot(population_in_millions, total_gun_murders)