##################################################################################
## Topic: R Practice Session #####################################################
## Author: Eunhee (Emily) Ko #####################################################
## Term: Fall, 2020 ##############################################################
##################################################################################
# (1) Print your name
print("Eunhee (Emily) Ko")
## [1] "Eunhee (Emily) Ko"
# (2) Assign “IMC401: Marketing Research” to imc and then check if imc is a character value.
imc <- "IMC401: Marketing Research"
is.character(imc)
## [1] TRUE
typeof(imc)
## [1] "character"
# (3) Create a vector that consists of names of your team member and assign the vector to group.
group <- c("Jiwoo", "Sarah", "Edward", "Lauren")
is.vector(group)
## [1] TRUE
# (4) Create three vectors harryPotter, hobbits, loadRing where each vector consists of
#three character names in the movies (i.e., Harry Potter 1-7, Hobbits 1-3, The Load of the Rings 1-3).
#(If you are not familiar, google or bing it!). Combine the three vectors to become a 3x3 matrix.
harryPotter <- c("Hermione", "Harry", "Ron")
hobbits <- c("Bilbo", "Gandalf", "Golum")
loadRing <- c("Frodo", "Aragorn", "Sauron")
cbind(harryPotter, hobbits, loadRing)
## harryPotter hobbits loadRing
## [1,] "Hermione" "Bilbo" "Frodo"
## [2,] "Harry" "Gandalf" "Aragorn"
## [3,] "Ron" "Golum" "Sauron"
# (5) Assign the matrix to movie, change the data type to 'data frame' and name the data frame as movie.df
# Check if the matrix is correctly converted into data frame
movie <- cbind(harryPotter, hobbits, loadRing)
movie.df <- as.data.frame(movie)
is.data.frame(movie.df)
## [1] TRUE
## (6) Download the data named midwest in the desired folder of your local laptop and import it as csv file into R
# 1) Find factor variables by checking the structure of the data
midwest <- read.csv(file='C:/Users/ehk994/Desktop/Teaching/Marketing Research_Fall2020/R Session/midwest.csv', header = TRUE)
str(midwest) #county, state, category
## 'data.frame': 437 obs. of 29 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ PID : int 561 562 563 564 565 566 567 568 569 570 ...
## $ county : Factor w/ 320 levels "ADAMS","ALCONA",..: 1 3 25 26 28 30 33 35 36 37 ...
## $ state : Factor w/ 5 levels "IL","IN","MI",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ area : num 0.052 0.014 0.022 0.017 0.018 0.05 0.017 0.027 0.024 0.058 ...
## $ poptotal : int 66090 10626 14991 30806 5836 35688 5322 16805 13437 173025 ...
## $ popdensity : num 1271 759 681 1812 324 ...
## $ popwhite : int 63917 7054 14477 29344 5264 35157 5298 16519 13384 146506 ...
## $ popblack : int 1702 3496 429 127 547 50 1 111 16 16559 ...
## $ popamerindian : int 98 19 35 46 14 65 8 30 8 331 ...
## $ popasian : int 249 48 16 150 5 195 15 61 23 8033 ...
## $ popother : int 124 9 34 1139 6 221 0 84 6 1596 ...
## $ percwhite : num 96.7 66.4 96.6 95.3 90.2 ...
## $ percblack : num 2.575 32.9 2.862 0.412 9.373 ...
## $ percamerindan : num 0.148 0.179 0.233 0.149 0.24 ...
## $ percasian : num 0.3768 0.4517 0.1067 0.4869 0.0857 ...
## $ percother : num 0.1876 0.0847 0.2268 3.6973 0.1028 ...
## $ popadults : int 43298 6724 9669 19272 3979 23444 3583 11323 8825 95971 ...
## $ perchsd : num 75.1 59.7 69.3 75.5 68.9 ...
## $ percollege : num 19.6 11.2 17 17.3 14.5 ...
## $ percprof : num 4.36 2.87 4.49 4.2 3.37 ...
## $ poppovertyknown : int 63628 10529 14235 30337 4815 35107 5241 16455 13081 154934 ...
## $ percpovertyknown : num 96.3 99.1 95 98.5 82.5 ...
## $ percbelowpoverty : num 13.15 32.24 12.07 7.21 13.52 ...
## $ percchildbelowpovert: num 18 45.8 14 11.2 13 ...
## $ percadultpoverty : num 11.01 27.39 10.85 5.54 11.14 ...
## $ percelderlypoverty : num 12.44 25.23 12.7 6.22 19.2 ...
## $ inmetro : int 0 0 0 1 0 0 0 0 0 1 ...
## $ category : Factor w/ 16 levels "AAR","AAU","AHR",..: 1 15 1 6 1 1 13 1 1 8 ...
?midwest
## No documentation for 'midwest' in specified packages and libraries:
## you could try '??midwest'
# 2) Load ggplot2 and create scatter plot using geom_point where x is percollege and y is percadultpoverty
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following object is masked _by_ '.GlobalEnv':
##
## midwest
ggplot(data = midwest) +
geom_point(mapping = aes(x = percollege, y = percadultpoverty))

# 3) Add a third variable ('inmetro') using color and shape
ggplot(data = midwest) +
geom_point(mapping = aes(x = percollege, y = percadultpoverty, color = inmetro))

# 4) Change inmetro into factor variable and redo 3). Discuss what are differences between the result
#from 3) and from 4). Discuss the results; what pattern can you see? Is it negative relationship or
#positive relationship? How do you explain the relationship with the third variable?
midwest$inmetro <- as.factor(midwest$inmetro)
ggplot(data = midwest) +
geom_point(mapping = aes(x = percollege, y = percadultpoverty, color = inmetro))

ggplot(data = midwest) +
geom_point(mapping = aes(x = percollege, y = percadultpoverty, shape = inmetro))

## (7) Download the data named automobile_data in the desired folder of your local laptop and import it as csv file into R
automobile_data <- read.csv(file='C:/Users/ehk994/Desktop/Teaching/Marketing Research_Fall2020/R Session/automobile_data.csv', header = TRUE)
# 1) Load dplyr. Arrange rows based on length, width, and height and save it automobile_arranged
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
automobile_arranged <- arrange(automobile_data, length, width, height)
# 2) Select the cars whose engine type is ohc and length is greater than 160 and save it as ohc160 in your R session
ohc160 <- filter(automobile_data, engine.type == "ohc", length > 160) #117 obs
# 3) Create a new variable (avg.mpg) within a data frame (Hint: use mutate)
# where avg.mpg = (city.mpg + highway.mpg)/2 and save the new data file as automobile_data2
automobile_data2 <- mutate(automobile_data,
avg.mpg = (city.mpg + highway.mpg)/2
)
# 4) Select the following variables from automobile_data2 and save the new data as automobile_sub
# make, fuel.type, aspiration, price, and all variables ending with 'mpg'
automobile_sub <- select(automobile_data2,
make:aspiration,
ends_with("mpg"),
price
)