##################################################################################
## Topic: R Practice Session #####################################################
## Author: Eunhee (Emily) Ko #####################################################
## Term: Fall, 2020 ##############################################################
##################################################################################

# (1) Print your name
print("Eunhee (Emily) Ko")
## [1] "Eunhee (Emily) Ko"
# (2) Assign “IMC401: Marketing Research”  to imc and then check if imc is a character value.
imc <- "IMC401: Marketing Research"
is.character(imc)
## [1] TRUE
typeof(imc)
## [1] "character"
# (3) Create a vector that consists of names of your team member and assign the vector to group. 
group <- c("Jiwoo", "Sarah", "Edward", "Lauren")
is.vector(group)
## [1] TRUE
# (4) Create three vectors harryPotter, hobbits, loadRing where each vector consists of 
#three character names in the movies (i.e., Harry Potter 1-7, Hobbits 1-3, The Load of the Rings 1-3).
#(If you are not familiar, google or bing it!). Combine the three vectors to become a 3x3 matrix.
harryPotter <- c("Hermione", "Harry", "Ron")
hobbits <- c("Bilbo", "Gandalf", "Golum")
loadRing <- c("Frodo", "Aragorn", "Sauron")
cbind(harryPotter, hobbits, loadRing)
##      harryPotter hobbits   loadRing 
## [1,] "Hermione"  "Bilbo"   "Frodo"  
## [2,] "Harry"     "Gandalf" "Aragorn"
## [3,] "Ron"       "Golum"   "Sauron"
# (5) Assign the matrix to movie, change the data type to 'data frame' and name the data frame as movie.df
# Check if the matrix is correctly converted into data frame
movie <- cbind(harryPotter, hobbits, loadRing)
movie.df <- as.data.frame(movie)
is.data.frame(movie.df)
## [1] TRUE
## (6) Download the data named midwest in the desired folder of your local laptop and import it as csv file into R
# 1) Find factor variables by checking the structure of the data
midwest <- read.csv(file='C:/Users/ehk994/Desktop/Teaching/Marketing Research_Fall2020/R Session/midwest.csv', header = TRUE)
str(midwest) #county, state, category
## 'data.frame':    437 obs. of  29 variables:
##  $ X                   : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ PID                 : int  561 562 563 564 565 566 567 568 569 570 ...
##  $ county              : Factor w/ 320 levels "ADAMS","ALCONA",..: 1 3 25 26 28 30 33 35 36 37 ...
##  $ state               : Factor w/ 5 levels "IL","IN","MI",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ area                : num  0.052 0.014 0.022 0.017 0.018 0.05 0.017 0.027 0.024 0.058 ...
##  $ poptotal            : int  66090 10626 14991 30806 5836 35688 5322 16805 13437 173025 ...
##  $ popdensity          : num  1271 759 681 1812 324 ...
##  $ popwhite            : int  63917 7054 14477 29344 5264 35157 5298 16519 13384 146506 ...
##  $ popblack            : int  1702 3496 429 127 547 50 1 111 16 16559 ...
##  $ popamerindian       : int  98 19 35 46 14 65 8 30 8 331 ...
##  $ popasian            : int  249 48 16 150 5 195 15 61 23 8033 ...
##  $ popother            : int  124 9 34 1139 6 221 0 84 6 1596 ...
##  $ percwhite           : num  96.7 66.4 96.6 95.3 90.2 ...
##  $ percblack           : num  2.575 32.9 2.862 0.412 9.373 ...
##  $ percamerindan       : num  0.148 0.179 0.233 0.149 0.24 ...
##  $ percasian           : num  0.3768 0.4517 0.1067 0.4869 0.0857 ...
##  $ percother           : num  0.1876 0.0847 0.2268 3.6973 0.1028 ...
##  $ popadults           : int  43298 6724 9669 19272 3979 23444 3583 11323 8825 95971 ...
##  $ perchsd             : num  75.1 59.7 69.3 75.5 68.9 ...
##  $ percollege          : num  19.6 11.2 17 17.3 14.5 ...
##  $ percprof            : num  4.36 2.87 4.49 4.2 3.37 ...
##  $ poppovertyknown     : int  63628 10529 14235 30337 4815 35107 5241 16455 13081 154934 ...
##  $ percpovertyknown    : num  96.3 99.1 95 98.5 82.5 ...
##  $ percbelowpoverty    : num  13.15 32.24 12.07 7.21 13.52 ...
##  $ percchildbelowpovert: num  18 45.8 14 11.2 13 ...
##  $ percadultpoverty    : num  11.01 27.39 10.85 5.54 11.14 ...
##  $ percelderlypoverty  : num  12.44 25.23 12.7 6.22 19.2 ...
##  $ inmetro             : int  0 0 0 1 0 0 0 0 0 1 ...
##  $ category            : Factor w/ 16 levels "AAR","AAU","AHR",..: 1 15 1 6 1 1 13 1 1 8 ...
?midwest
## No documentation for 'midwest' in specified packages and libraries:
## you could try '??midwest'
# 2) Load ggplot2 and create scatter plot using geom_point where x is percollege and y is percadultpoverty
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following object is masked _by_ '.GlobalEnv':
## 
##     midwest
ggplot(data = midwest) + 
  geom_point(mapping = aes(x = percollege, y = percadultpoverty))

# 3) Add a third variable ('inmetro') using color and shape
ggplot(data = midwest) + 
  geom_point(mapping = aes(x = percollege, y = percadultpoverty, color = inmetro))

# 4) Change inmetro into factor variable and redo 3). Discuss what are differences between the result
#from 3) and from 4). Discuss the results; what pattern can you see? Is it negative relationship or 
#positive relationship? How do you explain the relationship with the third variable?
midwest$inmetro <- as.factor(midwest$inmetro)

ggplot(data = midwest) + 
  geom_point(mapping = aes(x = percollege, y = percadultpoverty, color = inmetro))

ggplot(data = midwest) + 
  geom_point(mapping = aes(x = percollege, y = percadultpoverty, shape = inmetro))

## (7) Download the data named automobile_data in the desired folder of your local laptop and import it as csv file into R
automobile_data <- read.csv(file='C:/Users/ehk994/Desktop/Teaching/Marketing Research_Fall2020/R Session/automobile_data.csv', header = TRUE)
# 1) Load dplyr. Arrange rows based on length, width, and height and save it automobile_arranged
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
automobile_arranged <- arrange(automobile_data, length, width, height)
# 2) Select the cars whose engine type is ohc and length is greater than 160 and save it as ohc160 in your R session
ohc160 <- filter(automobile_data, engine.type == "ohc", length > 160) #117 obs
# 3) Create a new variable (avg.mpg) within a data frame (Hint: use mutate)
# where avg.mpg = (city.mpg + highway.mpg)/2 and save the new data file as automobile_data2
automobile_data2 <- mutate(automobile_data, 
                           avg.mpg = (city.mpg + highway.mpg)/2 
                           )
# 4) Select the following variables from automobile_data2 and save the new data as automobile_sub
# make, fuel.type, aspiration, price, and all variables ending with 'mpg'
automobile_sub <- select(automobile_data2, 
                         make:aspiration, 
                         ends_with("mpg"), 
                         price 
)