This code is from chapter 3 of your textbook.
##########################################
# section 3.1 Introduction to R
##########################################
# examine the imported dataset
head(yearly_sales)
summary(yearly_sales)
cust_id sales_total num_of_orders gender
Min. :100001 Min. : 30.02 Min. : 1.000 Length:10000
1st Qu.:102501 1st Qu.: 80.29 1st Qu.: 2.000 Class :character
Median :105001 Median : 151.65 Median : 2.000 Mode :character
Mean :105001 Mean : 249.46 Mean : 2.428
3rd Qu.:107500 3rd Qu.: 295.50 3rd Qu.: 3.000
Max. :110000 Max. :7606.09 Max. :22.000
# plot num_of_orders vs. sales
plot(yearly_sales$num_of_orders,yearly_sales$sales_total,
main="Number of Orders vs. Sales")
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).
##########################################
# section 3.1.2 Data Import and Export
##########################################
# add a column for the average sales per order
sales$per_order <- sales$sales_total/sales$num_of_orders
# export data as tab delimited without the row names
write.table(sales,"sales_modified.txt", sep="\t", row.names=FALSE)
##########################################
# section 3.1.3 Attribute and Data Types
##########################################
# Numeric, Character, and Logical Data Types
i <- 1 # create a numeric variable
sport <- "football" # create a character variable
flag <- TRUE # create a logical variable
class(i) # returns "numeric"
[1] "numeric"
typeof(i) # returns "double"
[1] "double"
class(sport) # returns "character"
[1] "character"
typeof(sport) # returns "character"
[1] "character"
class(flag) # returns "logical"
[1] "logical"
typeof(flag) # returns "logical"
[1] "logical"
is.integer(i) # returns FALSE
[1] FALSE
j <- as.integer(i) # coerces contents of i into an integer
is.integer(j) # returns TRUE
[1] TRUE
length(i) # returns 1
[1] 1
length(flag) # returns 1
[1] 1
length(sport) # returns 1 (not 8 for "football")
[1] 1
# Vectors
is.vector(i) # returns TRUE
[1] TRUE
is.vector(flag) # returns TRUE
[1] TRUE
is.vector(sport) # returns TRUE
[1] TRUE
u <- c("red", "yellow", "blue") # create a vector "red" "yellow" "blue"
u # returns "red" "yellow" "blue"
[1] "red" "yellow" "blue"
u[1] # returns "red" (1st element in u)
[1] "red"
v <- 1:5 # create a vector 1 2 3 4 5
v # returns 1 2 3 4 5
[1] 1 2 3 4 5
sum(v) # returns 15
[1] 15
w <- v * 2 # create a vector 2 4 6 8 10
w # returns 2 4 6 8 10
[1] 2 4 6 8 10
w[3] # returns 6 (the 3rd element of w)
[1] 6
z <- v + w # sums two vectors element by element
z # returns 3 6 9 12 15
[1] 3 6 9 12 15
z > 8 # returns FALSE FALSE TRUE TRUE TRUE
[1] FALSE FALSE TRUE TRUE TRUE
z[z > 8] # returns 9 12 15
[1] 9 12 15
z[z > 8 | z < 5] # returns 3 9 12 15 ("|" denotes "or")
[1] 3 9 12 15
a <- vector(length=3) # create a logical vector of length 3
a # returns FALSE FALSE FALSE
[1] FALSE FALSE FALSE
b <- vector(mode="numeric", 3) # create a numeric vector of length 3
typeof(b) # returns "double"
[1] "double"
b[2] <- 3.1 # assign 3.1 to the 2nd element
b # returns 0.0 3.1 0.0
[1] 0.0 3.1 0.0
c <- vector(mode="integer", 0) # create an integer vector of length 0
c # returns integer(0)
integer(0)
length(c) # returns 0
[1] 0
length(b) # returns 3
[1] 3
length(sales$num_of_orders) # returns 10000 (number of customers)
[1] 10000
is.vector(sales$cust_id) # returns TRUE
[1] TRUE
is.vector(sales$sales_total) # returns TRUE
[1] TRUE
is.vector(sales$num_of_orders) # returns TRUE
[1] TRUE
is.vector(sales$gender) # returns FALSE
[1] TRUE
is.factor(sales$gender) # returns TRUE
[1] FALSE
str(sales) # display structure of the data frame object
Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 10000 obs. of 5 variables:
$ cust_id : int 100001 100002 100003 100004 100005 100006 100007 100008 100009 100010 ...
$ sales_total : num 800.6 217.5 74.6 498.6 723.1 ...
$ num_of_orders: int 3 3 2 3 4 2 2 2 2 2 ...
$ gender : chr "F" "F" "M" "M" ...
$ per_order : num 266.9 72.5 37.3 166.2 180.8 ...
- attr(*, "spec")=List of 2
..$ cols :List of 4
.. ..$ cust_id : list()
.. .. ..- attr(*, "class")= chr "collector_integer" "collector"
.. ..$ sales_total : list()
.. .. ..- attr(*, "class")= chr "collector_double" "collector"
.. ..$ num_of_orders: list()
.. .. ..- attr(*, "class")= chr "collector_integer" "collector"
.. ..$ gender : list()
.. .. ..- attr(*, "class")= chr "collector_character" "collector"
..$ default: list()
.. ..- attr(*, "class")= chr "collector_guess" "collector"
..- attr(*, "class")= chr "col_spec"
# Note: We used Tidyverse to do data manipulation
# However, using the $ notation to access columns is useful
# extract the fourth column of the sales data frame
sales[,4]
# extract the gender column of the sales data frame
sales$gender
[1] "F" "F" "M" "M" "F" "F" "M" "M" "F" "M" "F" "F" "M" "M" "M" "M" "M" "M" "M" "F" "M" "M" "F" "F" "F" "F"
[27] "F" "M" "M" "F" "F" "F" "M" "M" "M" "M" "M" "F" "F" "M" "F" "M" "M" "M" "M" "F" "F" "M" "F" "M" "M" "F"
[53] "F" "F" "M" "M" "M" "M" "M" "F" "M" "M" "M" "M" "M" "M" "M" "F" "F" "M" "F" "F" "M" "F" "M" "M" "F" "F"
[79] "F" "F" "F" "F" "M" "F" "F" "M" "M" "F" "M" "M" "M" "F" "F" "F" "M" "F" "F" "M" "F" "F" "M" "F" "M" "M"
[105] "M" "F" "F" "M" "M" "F" "M" "F" "M" "M" "M" "F" "F" "M" "F" "M" "M" "F" "M" "F" "F" "M" "M" "M" "F" "M"
[131] "M" "F" "M" "F" "M" "M" "M" "F" "M" "M" "F" "M" "M" "F" "F" "M" "F" "M" "M" "F" "F" "F" "M" "M" "M" "M"
[157] "F" "M" "M" "M" "M" "M" "F" "F" "F" "M" "M" "M" "M" "M" "F" "F" "F" "F" "F" "F" "M" "M" "F" "F" "M" "M"
[183] "F" "M" "M" "M" "M" "F" "F" "F" "F" "F" "M" "F" "M" "F" "M" "M" "F" "F" "F" "F" "M" "M" "F" "M" "M" "M"
[209] "M" "F" "F" "M" "F" "F" "M" "F" "F" "M" "M" "M" "M" "F" "M" "F" "M" "M" "M" "M" "M" "M" "M" "M" "F" "M"
[235] "F" "M" "F" "M" "M" "F" "M" "F" "M" "M" "M" "M" "M" "F" "F" "F" "F" "M" "F" "M" "M" "F" "M" "F" "F" "F"
[261] "F" "M" "M" "F" "F" "M" "M" "M" "F" "M" "M" "F" "M" "F" "F" "F" "F" "M" "M" "M" "M" "M" "M" "F" "M" "F"
[287] "M" "F" "F" "F" "F" "F" "F" "F" "F" "F" "F" "F" "M" "M" "M" "M" "M" "M" "M" "M" "F" "F" "M" "M" "F" "M"
[313] "F" "F" "M" "F" "M" "M" "M" "M" "F" "F" "M" "M" "M" "F" "M" "M" "M" "F" "M" "F" "M" "M" "F" "F" "F" "M"
[339] "F" "F" "F" "M" "F" "F" "F" "M" "F" "F" "M" "F" "F" "M" "M" "M" "F" "F" "F" "F" "M" "F" "M" "F" "M" "F"
[365] "M" "F" "M" "M" "F" "M" "M" "F" "M" "F" "M" "F" "M" "F" "F" "M" "M" "M" "F" "F" "M" "F" "M" "M" "F" "F"
[391] "M" "F" "F" "F" "F" "F" "M" "F" "M" "M" "F" "F" "F" "M" "F" "M" "F" "F" "M" "M" "F" "F" "F" "M" "F" "M"
[417] "F" "M" "M" "M" "M" "M" "M" "F" "M" "F" "F" "M" "M" "M" "M" "F" "M" "F" "F" "M" "F" "M" "F" "F" "F" "F"
[443] "M" "F" "F" "F" "M" "M" "M" "M" "F" "M" "F" "F" "F" "F" "F" "M" "F" "M" "M" "F" "F" "F" "F" "M" "M" "M"
[469] "M" "F" "F" "M" "M" "M" "F" "F" "M" "M" "M" "F" "M" "F" "F" "M" "M" "M" "M" "F" "F" "M" "M" "F" "M" "M"
[495] "M" "M" "M" "F" "F" "M" "F" "F" "F" "F" "F" "M" "M" "F" "F" "F" "F" "F" "M" "F" "M" "F" "M" "M" "M" "M"
[521] "F" "M" "M" "M" "M" "F" "M" "M" "M" "M" "M" "M" "F" "F" "M" "F" "M" "M" "F" "F" "M" "F" "F" "F" "F" "F"
[547] "M" "F" "M" "F" "M" "F" "F" "F" "F" "F" "M" "F" "F" "M" "M" "M" "M" "F" "M" "F" "M" "M" "F" "M" "F" "F"
[573] "M" "F" "F" "F" "M" "F" "F" "F" "M" "M" "F" "F" "M" "M" "F" "M" "M" "F" "M" "F" "M" "F" "M" "F" "F" "F"
[599] "M" "F" "F" "M" "F" "M" "M" "M" "M" "F" "F" "M" "F" "F" "M" "M" "M" "M" "M" "M" "F" "M" "M" "M" "M" "M"
[625] "M" "F" "F" "M" "F" "F" "M" "M" "F" "M" "F" "M" "M" "M" "M" "M" "F" "M" "F" "F" "F" "F" "F" "F" "M" "M"
[651] "M" "F" "F" "F" "F" "F" "F" "M" "M" "F" "M" "M" "F" "F" "F" "M" "M" "F" "M" "M" "F" "M" "M" "M" "F" "F"
[677] "F" "F" "F" "M" "M" "M" "M" "F" "F" "M" "M" "M" "M" "F" "M" "M" "F" "M" "F" "M" "M" "F" "M" "M" "M" "F"
[703] "F" "M" "M" "F" "M" "F" "M" "F" "M" "M" "F" "F" "M" "M" "M" "F" "M" "M" "M" "F" "M" "M" "M" "M" "M" "M"
[729] "M" "F" "F" "M" "F" "M" "M" "F" "F" "F" "M" "M" "M" "M" "M" "F" "M" "F" "F" "F" "F" "F" "M" "M" "M" "M"
[755] "F" "F" "M" "F" "F" "F" "F" "M" "F" "F" "M" "M" "F" "F" "M" "F" "M" "F" "M" "M" "M" "F" "M" "M" "M" "F"
[781] "M" "F" "F" "F" "F" "M" "M" "M" "M" "M" "M" "M" "F" "F" "F" "F" "F" "M" "M" "F" "M" "F" "F" "M" "M" "M"
[807] "M" "F" "M" "F" "M" "F" "F" "F" "M" "M" "F" "F" "M" "F" "F" "F" "M" "M" "M" "M" "F" "F" "M" "F" "M" "F"
[833] "F" "F" "F" "M" "M" "M" "M" "M" "F" "F" "F" "F" "M" "M" "M" "F" "M" "F" "F" "F" "M" "F" "M" "M" "F" "M"
[859] "F" "F" "M" "F" "M" "M" "F" "M" "F" "M" "F" "M" "M" "F" "M" "F" "F" "M" "F" "F" "F" "F" "M" "M" "F" "M"
[885] "F" "F" "M" "F" "M" "M" "F" "F" "M" "F" "F" "F" "M" "M" "F" "F" "M" "M" "F" "M" "M" "F" "F" "M" "M" "F"
[911] "F" "M" "F" "F" "M" "F" "M" "M" "F" "M" "F" "M" "F" "F" "M" "F" "F" "M" "M" "M" "M" "M" "F" "F" "F" "M"
[937] "F" "F" "M" "F" "M" "F" "M" "F" "M" "F" "M" "M" "F" "M" "M" "F" "F" "M" "F" "F" "F" "F" "M" "F" "M" "M"
[963] "M" "F" "F" "M" "F" "F" "F" "M" "F" "F" "M" "M" "F" "F" "F" "F" "M" "F" "F" "F" "M" "M" "F" "M" "M" "M"
[989] "F" "M" "F" "M" "M" "M" "F" "M" "M" "M" "F" "M"
[ reached getOption("max.print") -- omitted 9000 entries ]
# retrieve the first two rows of the data frame
sales[1:2,]
# retrieve the first, third, and fourth columns
sales[,c(1,3,4)]
# retrieve both the cust_id and the sales_total columns
sales[,c("cust_id", "sales_total")]
# retrieve all the records whose gender is female
sales[sales$gender=="F",]
class(sales)
[1] "tbl_df" "tbl" "data.frame"
typeof(sales)
[1] "list"
# Factors
class(sales$gender) # returns "factor"
[1] "character"
is.ordered(sales$gender) # returns FALSE
[1] FALSE
head(sales$gender) # display first six values and the levels
[1] "F" "F" "M" "M" "F" "F"
# build an empty character vector of the same length as sales
sales_group <- vector(mode="character",
length=length(sales$sales_total))
# group the customers according to the sales amount
sales_group[sales$sales_total<100] <- "small"
sales_group[sales$sales_total>=100 & sales$sales_total<500] <- "medium"
sales_group[sales$sales_total>=500] <- "big"
# create and add the ordered factor to the sales data frame
spender <- factor(sales_group,levels=c("small", "medium", "big"),
ordered = TRUE)
sales <- cbind(sales,spender)
str(sales$spender)
Ord.factor w/ 3 levels "small"<"medium"<..: 3 2 1 2 3 1 1 1 2 1 ...
head(sales$spender)
[1] big medium small medium big small
Levels: small < medium < big
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.