This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
# loading the dslabs package and the murders dataset
library(dslabs)
data(murders)
# determining that the murders dataset is of the "data frame" class
class(murders)
## [1] "data.frame"
# finding out more about the structure of the object
str(murders)
## 'data.frame': 51 obs. of 5 variables:
## $ state : chr "Alabama" "Alaska" "Arizona" "Arkansas" ...
## $ abb : chr "AL" "AK" "AZ" "AR" ...
## $ region : Factor w/ 4 levels "Northeast","South",..: 2 4 4 2 4 4 1 2 2 2 ...
## $ population: num 4779736 710231 6392017 2915918 37253956 ...
## $ total : num 135 19 232 93 1257 ...
# showing the first 6 lines of the dataset
head(murders)
# using the accessor operator to obtain the population column
murders$population
## [1] 4779736 710231 6392017 2915918 37253956 5029196 3574097 897934
## [9] 601723 19687653 9920000 1360301 1567582 12830632 6483802 3046355
## [17] 2853118 4339367 4533372 1328361 5773552 6547629 9883640 5303925
## [25] 2967297 5988927 989415 1826341 2700551 1316470 8791894 2059179
## [33] 19378102 9535483 672591 11536504 3751351 3831074 12702379 1052567
## [41] 4625364 814180 6346105 25145561 2763885 625741 8001024 6724540
## [49] 1852994 5686986 563626
# displaying the variable names in the murders dataset
names(murders)
## [1] "state" "abb" "region" "population" "total"
# determining how many entries are in a vector
pop <- murders$population
length(pop)
## [1] 51
# vectors can be of class numeric and character
class(pop)
## [1] "numeric"
class(murders$state)
## [1] "character"
# logical vectors are either TRUE or FALSE
z <- 3 == 2
z
## [1] FALSE
class(z)
## [1] "logical"
# factors are another type of class
class(murders$region)
## [1] "factor"
# obtaining the levels of a factor
levels(murders$region)
## [1] "Northeast" "South" "North Central" "West"
sort(murders$total)
## [1] 2 4 5 5 7 8 11 12 12 16 19 21 22 27 32
## [16] 36 38 53 63 65 67 84 93 93 97 97 99 111 116 118
## [31] 120 135 142 207 219 232 246 250 286 293 310 321 351 364 376
## [46] 413 457 517 669 805 1257
x <- c(31, 4, 15, 92, 65)
x
## [1] 31 4 15 92 65
sort(x) # puts elements in order
## [1] 4 15 31 65 92
index <- order(x) # returns index that will put x in order
x[index] # rearranging by this index puts elements in order
## [1] 4 15 31 65 92
order(x)
## [1] 2 3 1 5 4
murders$state[1:10]
## [1] "Alabama" "Alaska" "Arizona"
## [4] "Arkansas" "California" "Colorado"
## [7] "Connecticut" "Delaware" "District of Columbia"
## [10] "Florida"
murders$abb[1:10]
## [1] "AL" "AK" "AZ" "AR" "CA" "CO" "CT" "DE" "DC" "FL"
index <- order(murders$total)
murders$abb[index] # order abbreviations by total murders
## [1] "VT" "ND" "NH" "WY" "HI" "SD" "ME" "ID" "MT" "RI" "AK" "IA" "UT" "WV" "NE"
## [16] "OR" "DE" "MN" "KS" "CO" "NM" "NV" "AR" "WA" "CT" "WI" "DC" "OK" "KY" "MA"
## [31] "MS" "AL" "IN" "SC" "TN" "AZ" "NJ" "VA" "NC" "MD" "OH" "MO" "LA" "IL" "GA"
## [46] "MI" "PA" "NY" "FL" "TX" "CA"
max(murders$total) # highest number of total murders
## [1] 1257
i_max <- which.max(murders$total) # index with highest number of murders
murders$state[i_max] # state name with highest number of total murders
## [1] "California"
x <- c(31, 4, 15, 92, 65)
x
## [1] 31 4 15 92 65
rank(x) # returns ranks (smallest to largest)
## [1] 3 1 2 5 4
ind <- order(murders$total)
murders$abb[ind]
## [1] "VT" "ND" "NH" "WY" "HI" "SD" "ME" "ID" "MT" "RI" "AK" "IA" "UT" "WV" "NE"
## [16] "OR" "DE" "MN" "KS" "CO" "NM" "NV" "AR" "WA" "CT" "WI" "DC" "OK" "KY" "MA"
## [31] "MS" "AL" "IN" "SC" "TN" "AZ" "NJ" "VA" "NC" "MD" "OH" "MO" "LA" "IL" "GA"
## [46] "MI" "PA" "NY" "FL" "TX" "CA"
murders$state[which.max(murders$population)]
## [1] "California"
murder_rate <- murders$total / murders$population * 100000
murders$abb[order(murder_rate)]
## [1] "VT" "NH" "HI" "ND" "IA" "ID" "UT" "ME" "WY" "OR" "SD" "MN" "MT" "CO" "WA"
## [16] "WV" "RI" "WI" "NE" "MA" "IN" "KS" "NY" "KY" "AK" "OH" "CT" "NJ" "AL" "IL"
## [31] "OK" "NC" "NV" "VA" "AR" "TX" "NM" "CA" "FL" "TN" "PA" "AZ" "GA" "MS" "MI"
## [46] "DE" "SC" "MD" "MO" "LA" "DC"
ind <- murder_rate <= 0.71
murders$state[ind]
## [1] "Hawaii" "Iowa" "New Hampshire" "North Dakota"
## [5] "Vermont"
west <- murders$region == "West"
safe <- murder_rate <= 1
safe
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
## [13] TRUE FALSE FALSE TRUE FALSE FALSE FALSE TRUE FALSE FALSE FALSE TRUE
## [25] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE TRUE FALSE
## [37] FALSE TRUE FALSE FALSE FALSE TRUE FALSE FALSE TRUE TRUE FALSE FALSE
## [49] FALSE FALSE TRUE
murders$state[safe]
## [1] "Hawaii" "Idaho" "Iowa" "Maine"
## [5] "Minnesota" "New Hampshire" "North Dakota" "Oregon"
## [9] "South Dakota" "Utah" "Vermont" "Wyoming"
ind <- safe & west
murders$state[ind]
## [1] "Hawaii" "Idaho" "Oregon" "Utah" "Wyoming"
ind <- which(murders$state == "California")
murder_rate[ind]
## [1] 3.374138
ind <- match(c("New York", "Florida", "Texas"), murders$state)
ind
## [1] 33 10 44
murder_rate[ind]
## [1] 2.667960 3.398069 3.201360
c("Boston", "Dakota", "Washington") %in% murders$state
## [1] FALSE FALSE TRUE
which(murders$state%in%c("New York", "Florida", "Texas"))
## [1] 10 33 44
x <- murders$population / 10^6
y <- murders$total
with(murders, plot(population, total))
x <- with(murders, total / population * 100000)
hist(x)
murders$state[which.max(x)]
## [1] "District of Columbia"
murders$rate <- with(murders, total / population * 100000)
boxplot(rate~region, data = murders)
population_in_millions <- murders$population/10^6
total_gun_murders <- murders$total
plot(population_in_millions, total_gun_murders)