This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(dslabs)
You can also embed plots, for example:
data(murders)
class(murders)
## [1] "data.frame"
#for structure:
str(murders)
## 'data.frame': 51 obs. of 5 variables:
## $ state : chr "Alabama" "Alaska" "Arizona" "Arkansas" ...
## $ abb : chr "AL" "AK" "AZ" "AR" ...
## $ region : Factor w/ 4 levels "Northeast","South",..: 2 4 4 2 4 4 1 2 2 2 ...
## $ population: num 4779736 710231 6392017 2915918 37253956 ...
## $ total : num 135 19 232 93 1257 ...
#for viewing 51 rows in murder dataset:
head(murders)
## state abb region population total
## 1 Alabama AL South 4779736 135
## 2 Alaska AK West 710231 19
## 3 Arizona AZ West 6392017 232
## 4 Arkansas AR South 2915918 93
## 5 California CA West 37253956 1257
## 6 Colorado CO West 5029196 65
#for viewing 51 rows in murder dataset:
head(murders)
## state abb region population total
## 1 Alabama AL South 4779736 135
## 2 Alaska AK West 710231 19
## 3 Arizona AZ West 6392017 232
## 4 Arkansas AR South 2915918 93
## 5 California CA West 37253956 1257
## 6 Colorado CO West 5029196 65
head(murders,51)
## state abb region population total
## 1 Alabama AL South 4779736 135
## 2 Alaska AK West 710231 19
## 3 Arizona AZ West 6392017 232
## 4 Arkansas AR South 2915918 93
## 5 California CA West 37253956 1257
## 6 Colorado CO West 5029196 65
## 7 Connecticut CT Northeast 3574097 97
## 8 Delaware DE South 897934 38
## 9 District of Columbia DC South 601723 99
## 10 Florida FL South 19687653 669
## 11 Georgia GA South 9920000 376
## 12 Hawaii HI West 1360301 7
## 13 Idaho ID West 1567582 12
## 14 Illinois IL North Central 12830632 364
## 15 Indiana IN North Central 6483802 142
## 16 Iowa IA North Central 3046355 21
## 17 Kansas KS North Central 2853118 63
## 18 Kentucky KY South 4339367 116
## 19 Louisiana LA South 4533372 351
## 20 Maine ME Northeast 1328361 11
## 21 Maryland MD South 5773552 293
## 22 Massachusetts MA Northeast 6547629 118
## 23 Michigan MI North Central 9883640 413
## 24 Minnesota MN North Central 5303925 53
## 25 Mississippi MS South 2967297 120
## 26 Missouri MO North Central 5988927 321
## 27 Montana MT West 989415 12
## 28 Nebraska NE North Central 1826341 32
## 29 Nevada NV West 2700551 84
## 30 New Hampshire NH Northeast 1316470 5
## 31 New Jersey NJ Northeast 8791894 246
## 32 New Mexico NM West 2059179 67
## 33 New York NY Northeast 19378102 517
## 34 North Carolina NC South 9535483 286
## 35 North Dakota ND North Central 672591 4
## 36 Ohio OH North Central 11536504 310
## 37 Oklahoma OK South 3751351 111
## 38 Oregon OR West 3831074 36
## 39 Pennsylvania PA Northeast 12702379 457
## 40 Rhode Island RI Northeast 1052567 16
## 41 South Carolina SC South 4625364 207
## 42 South Dakota SD North Central 814180 8
## 43 Tennessee TN South 6346105 219
## 44 Texas TX South 25145561 805
## 45 Utah UT West 2763885 22
## 46 Vermont VT Northeast 625741 2
## 47 Virginia VA South 8001024 250
## 48 Washington WA West 6724540 93
## 49 West Virginia WV South 1852994 27
## 50 Wisconsin WI North Central 5686986 97
## 51 Wyoming WY West 563626 5
#for viewing the last murder dataset:
tail(murders)
## state abb region population total
## 46 Vermont VT Northeast 625741 2
## 47 Virginia VA South 8001024 250
## 48 Washington WA West 6724540 93
## 49 West Virginia WV South 1852994 27
## 50 Wisconsin WI North Central 5686986 97
## 51 Wyoming WY West 563626 5
#q1: A :the 51 state:
states<-c(murders$state)
states
## [1] "Alabama" "Alaska" "Arizona"
## [4] "Arkansas" "California" "Colorado"
## [7] "Connecticut" "Delaware" "District of Columbia"
## [10] "Florida" "Georgia" "Hawaii"
## [13] "Idaho" "Illinois" "Indiana"
## [16] "Iowa" "Kansas" "Kentucky"
## [19] "Louisiana" "Maine" "Maryland"
## [22] "Massachusetts" "Michigan" "Minnesota"
## [25] "Mississippi" "Missouri" "Montana"
## [28] "Nebraska" "Nevada" "New Hampshire"
## [31] "New Jersey" "New Mexico" "New York"
## [34] "North Carolina" "North Dakota" "Ohio"
## [37] "Oklahoma" "Oregon" "Pennsylvania"
## [40] "Rhode Island" "South Carolina" "South Dakota"
## [43] "Tennessee" "Texas" "Utah"
## [46] "Vermont" "Virginia" "Washington"
## [49] "West Virginia" "Wisconsin" "Wyoming"
total_murder_state<-c(murders$total)
total_murder_state
## [1] 135 19 232 93 1257 65 97 38 99 669 376 7 12 364 142
## [16] 21 63 116 351 11 293 118 413 53 120 321 12 32 84 5
## [31] 246 67 517 286 4 310 111 36 457 16 207 8 219 805 22
## [46] 2 250 93 27 97 5
murders_rates<-c(murders$rates)
murders_rates
## NULL
states_name<-c(murders$state)
states_name
## [1] "Alabama" "Alaska" "Arizona"
## [4] "Arkansas" "California" "Colorado"
## [7] "Connecticut" "Delaware" "District of Columbia"
## [10] "Florida" "Georgia" "Hawaii"
## [13] "Idaho" "Illinois" "Indiana"
## [16] "Iowa" "Kansas" "Kentucky"
## [19] "Louisiana" "Maine" "Maryland"
## [22] "Massachusetts" "Michigan" "Minnesota"
## [25] "Mississippi" "Missouri" "Montana"
## [28] "Nebraska" "Nevada" "New Hampshire"
## [31] "New Jersey" "New Mexico" "New York"
## [34] "North Carolina" "North Dakota" "Ohio"
## [37] "Oklahoma" "Oregon" "Pennsylvania"
## [40] "Rhode Island" "South Carolina" "South Dakota"
## [43] "Tennessee" "Texas" "Utah"
## [46] "Vermont" "Virginia" "Washington"
## [49] "West Virginia" "Wisconsin" "Wyoming"
states_abb<-c(murders$abb)
states_abb
## [1] "AL" "AK" "AZ" "AR" "CA" "CO" "CT" "DE" "DC" "FL" "GA" "HI" "ID" "IL" "IN"
## [16] "IA" "KS" "KY" "LA" "ME" "MD" "MA" "MI" "MN" "MS" "MO" "MT" "NE" "NV" "NH"
## [31] "NJ" "NM" "NY" "NC" "ND" "OH" "OK" "OR" "PA" "RI" "SC" "SD" "TN" "TX" "UT"
## [46] "VT" "VA" "WA" "WV" "WI" "WY"
murders_region<-c(murders$region)
murders_region
## [1] South West West South West
## [6] West Northeast South South South
## [11] South West West North Central North Central
## [16] North Central North Central South South Northeast
## [21] South Northeast North Central North Central South
## [26] North Central West North Central West Northeast
## [31] Northeast West Northeast South North Central
## [36] North Central South West Northeast Northeast
## [41] South North Central South South West
## [46] Northeast South West South North Central
## [51] West
## Levels: Northeast South North Central West
states_population<-c(murders$population)
states_population
## [1] 4779736 710231 6392017 2915918 37253956 5029196 3574097 897934
## [9] 601723 19687653 9920000 1360301 1567582 12830632 6483802 3046355
## [17] 2853118 4339367 4533372 1328361 5773552 6547629 9883640 5303925
## [25] 2967297 5988927 989415 1826341 2700551 1316470 8791894 2059179
## [33] 19378102 9535483 672591 11536504 3751351 3831074 12702379 1052567
## [41] 4625364 814180 6346105 25145561 2763885 625741 8001024 6724540
## [49] 1852994 5686986 563626
total_murders_2010<-sum(murders$murder_2010)
total_murders_2010
## [1] 0
str(murders)
## 'data.frame': 51 obs. of 5 variables:
## $ state : chr "Alabama" "Alaska" "Arizona" "Arkansas" ...
## $ abb : chr "AL" "AK" "AZ" "AR" ...
## $ region : Factor w/ 4 levels "Northeast","South",..: 2 4 4 2 4 4 1 2 2 2 ...
## $ population: num 4779736 710231 6392017 2915918 37253956 ...
## $ total : num 135 19 232 93 1257 ...
#q2:What are the column names used by the data frame for these fve variables?
column_name<-colnames(murders)
column_name
## [1] "state" "abb" "region" "population" "total"
a<-(murders$abb)
a
## [1] "AL" "AK" "AZ" "AR" "CA" "CO" "CT" "DE" "DC" "FL" "GA" "HI" "ID" "IL" "IN"
## [16] "IA" "KS" "KY" "LA" "ME" "MD" "MA" "MI" "MN" "MS" "MO" "MT" "NE" "NV" "NH"
## [31] "NJ" "NM" "NY" "NC" "ND" "OH" "OK" "OR" "PA" "RI" "SC" "SD" "TN" "TX" "UT"
## [46] "VT" "VA" "WA" "WV" "WI" "WY"
class(a)
## [1] "character"
#q4:Now use the square brackets to extract the state abbreviations and assign them to the object b. Use the identical function to determine if a and b are the same.
b<-murders[[" states_abbreviation"]]
b
## NULL
a==b
## logical(0)
#q5: . We saw that the region column stores a factor. You can corroborate this by typing:
class(murders$region)
## [1] "factor"
levels(murders$region)
## [1] "Northeast" "South" "North Central" "West"
length(murders$region)
## [1] 51
length(levels(murders$region))
## [1] 4
#q6:The function table takes a vector and returns the frequency of each element. You can quickly see how many states are in each region by applying this function. Use this function in one line of code to create a table of states per region.
table(murders$region)
##
## Northeast South North Central West
## 9 17 12 13
table(murders$states_data)
## < table of extent 0 >
#q7:1. Use the function c to create a vector with the average high temperatures in January for Beijing, Lagos,Paris, Rio de Janeiro, San Juan and Toronto, which are 35, 88, 42, 84, 81, and 30 degrees Fahrenheit.
temp<-c(35, 88, 42, 84, 81,30)
temp
## [1] 35 88 42 84 81 30
#q8: Now create a vector with the city names and call the object city.
city<-c("Beijing","Lagos","Paris", "Rio de Janeiro","San Juan","Toronto")
city
## [1] "Beijing" "Lagos" "Paris" "Rio de Janeiro"
## [5] "San Juan" "Toronto"
city<-c("Beijing","Lagos","Paris", "Rio de Janeiro","San Juan","Toronto")
temp<-c(35, 88, 42, 84, 81,30)
names(temp)<-city
temp
## Beijing Lagos Paris Rio de Janeiro San Juan
## 35 88 42 84 81
## Toronto
## 30
#q10:Use the [ and : operators to access the temperature of the first three cities on the list.
first_three_tempreture<-temp[1:3]
first_three_tempreture
## Beijing Lagos Paris
## 35 88 42
#q11:. Use the [ operator to access the temperature of Paris and San Juan.
Paris_SanJuan_tempreture<-temp[c(3:5)]
Paris_SanJuan_tempreture
## Paris Rio de Janeiro San Juan
## 42 84 81
#q12:. Use the : operator to create a sequence of numbers 12, 13, 14,…, 73.
sequence<-(12:73)
sequence
## [1] 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## [26] 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
## [51] 62 63 64 65 66 67 68 69 70 71 72 73
#q13:Create a vector containing all the positive odd numbers smaller than 100.
odd_number<-seq(1,99,by=2)
odd_number
## [1] 1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31 33 35 37 39 41 43 45 47 49
## [26] 51 53 55 57 59 61 63 65 67 69 71 73 75 77 79 81 83 85 87 89 91 93 95 97 99
numbers<-seq(6,55,by=4/7)
numbers
## [1] 6.000000 6.571429 7.142857 7.714286 8.285714 8.857143 9.428571
## [8] 10.000000 10.571429 11.142857 11.714286 12.285714 12.857143 13.428571
## [15] 14.000000 14.571429 15.142857 15.714286 16.285714 16.857143 17.428571
## [22] 18.000000 18.571429 19.142857 19.714286 20.285714 20.857143 21.428571
## [29] 22.000000 22.571429 23.142857 23.714286 24.285714 24.857143 25.428571
## [36] 26.000000 26.571429 27.142857 27.714286 28.285714 28.857143 29.428571
## [43] 30.000000 30.571429 31.142857 31.714286 32.285714 32.857143 33.428571
## [50] 34.000000 34.571429 35.142857 35.714286 36.285714 36.857143 37.428571
## [57] 38.000000 38.571429 39.142857 39.714286 40.285714 40.857143 41.428571
## [64] 42.000000 42.571429 43.142857 43.714286 44.285714 44.857143 45.428571
## [71] 46.000000 46.571429 47.142857 47.714286 48.285714 48.857143 49.428571
## [78] 50.000000 50.571429 51.142857 51.714286 52.285714 52.857143 53.428571
## [85] 54.000000 54.571429
length_number<-length(numbers)
length_number
## [1] 86
#q15:What is the class of the following object a <- seq(1, 10, 0.5)?
a <- seq(1, 10, 0.5)
a
## [1] 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0 5.5 6.0 6.5 7.0 7.5 8.0
## [16] 8.5 9.0 9.5 10.0
class(a)
## [1] "numeric"
#q16:What is the class of the following object a <- seq(1, 10)?
a <- seq(1, 10)
a
## [1] 1 2 3 4 5 6 7 8 9 10
class(a)
## [1] "integer"
#q17:The class of class(a<-1) is numeric, not integer. R defaults to numeric and to force an integer, you need to add the letter L. Confrm that the class of 1L is integer
x<-1L
x
## [1] 1
class(x)
## [1] "integer"
#q18:Defne the following vector:x <- c(“1”, “3”, “5”)and coerce it to get integers
x <- c("1", "3", "5")
x <-as.integer(x)
x
## [1] 1 3 5
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.