Ricson Lee h12400780 HW1

Instruction 1:

I found this dataset on Kaggle titled “Dummy Data HSS” which was meant for using the data of TV, Influencer, Radio, and Social Media Advertisement budget to predict Sales.

Instruction 2:

mydata <- read.table("./Dummy Data HSS.csv",
                     header = TRUE,
                     sep = ",",
                     dec = ".")

I imported the data into R Studio using the read.table() function. The data was sourced from Indonesia, which is why the decimal point is used as the decimal separator instead of the decimal comma.

Instruction 3:

head(mydata)
##   TV     Radio Social.Media Influencer     Sales
## 1 16  6.566231     2.907983       Mega  54.73276
## 2 13  9.237765     2.409567       Mega  46.67790
## 3 41 15.886446     2.913410       Mega 150.17783
## 4 83 30.020028     6.922304       Mega 298.24634
## 5 15  8.437408     1.405998      Micro  56.59418
## 6 29  9.614382     1.027163       Mega 105.88915

Instruction 4:

This dataset has 4572 observations with 5 variables.

Instruction 5:

The data is sourced from Monash University, Indonesia. It was uploaded to Kaggle by Assistant Professor Harriman Samuel Saragih.

Instruction 6:

#I created a copy of mydata to work on for the sake of convenience
mydata2 <- mydata 
#I created a new column based on "Influencer" to categorise promotional programs
mydata2$InfluencerF <- factor(mydata2$Influencer,
                              levels = c("Mega", "Macro", "Nano", "Micro"),
                              labels = c("Mega", "Macro", "Nano", "Micro"))
#Below I shall rename several variables for the sake of clarity
library(dplyr)
## 
## 载入程序包:'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
mydata2 <- mydata2 %>%
  rename(TV_Budget = TV)
library(dplyr)
mydata2 <- mydata2 %>%
  rename(Radio_Budget = Radio)
library(dplyr)
mydata2 <- mydata2 %>%
  rename(SocialMedia_Budget = Social.Media)
#Here I shall remove the observations with missing values
library(rstatix)
## 
## 载入程序包:'rstatix'
## The following object is masked from 'package:stats':
## 
##     filter
mydata2 <- mydata2 %>%
  drop_na()
#Below is a dataframe that only contains promotional programs with Mega Influencers
mydata2_Mega <- mydata2 %>%
  subset(InfluencerF == "Mega")
summary(mydata2_Mega)
##    TV_Budget       Radio_Budget      SocialMedia_Budget   Influencer       
##  Min.   : 10.00   Min.   : 0.08417   Min.   : 0.000031   Length:1152       
##  1st Qu.: 32.00   1st Qu.:10.38416   1st Qu.: 1.594543   Class :character  
##  Median : 52.00   Median :17.59556   Median : 3.164299   Mode  :character  
##  Mean   : 53.49   Mean   :18.04841   Mean   : 3.370672                     
##  3rd Qu.: 75.00   3rd Qu.:25.51413   3rd Qu.: 4.826683                     
##  Max.   :100.00   Max.   :48.87116   Max.   :13.083957                     
##      Sales       InfluencerF 
##  Min.   : 31.4   Mega :1152  
##  1st Qu.:113.3   Macro:   0  
##  Median :184.1   Nano :   0  
##  Mean   :190.4   Micro:   0  
##  3rd Qu.:265.8               
##  Max.   :364.1
#Below is a dataframe that only contains promotional programs with Micro Influencers
mydata2_Micro <- mydata2 %>%
  subset(InfluencerF == "Micro")
summary(mydata2_Micro)
##    TV_Budget       Radio_Budget      SocialMedia_Budget   Influencer       
##  Min.   : 10.00   Min.   : 0.01449   Min.   : 0.003177   Length:1148       
##  1st Qu.: 31.00   1st Qu.:10.49782   1st Qu.: 1.478449   Class :character  
##  Median : 53.00   Median :17.94893   Median : 2.939348   Mode  :character  
##  Mean   : 53.84   Mean   :18.19743   Mean   : 3.270513                     
##  3rd Qu.: 77.00   3rd Qu.:25.48128   3rd Qu.: 4.759552                     
##  Max.   :100.00   Max.   :47.11629   Max.   :12.108017                     
##      Sales        InfluencerF 
##  Min.   : 33.72   Mega :   0  
##  1st Qu.:110.26   Macro:   0  
##  Median :187.84   Nano :   0  
##  Mean   :191.58   Micro:1148  
##  3rd Qu.:273.77               
##  Max.   :362.04

Instruction 7

#General overview of mydata2
summary(mydata2)
##    TV_Budget       Radio_Budget      SocialMedia_Budget   Influencer       
##  Min.   : 10.00   Min.   : 0.00068   Min.   : 0.000031   Length:4546       
##  1st Qu.: 32.00   1st Qu.:10.55536   1st Qu.: 1.530822   Class :character  
##  Median : 53.00   Median :17.85951   Median : 3.055565   Mode  :character  
##  Mean   : 54.06   Mean   :18.15753   Mean   : 3.323473                     
##  3rd Qu.: 77.00   3rd Qu.:25.64060   3rd Qu.: 4.804919                     
##  Max.   :100.00   Max.   :48.87116   Max.   :13.981662                     
##      Sales       InfluencerF 
##  Min.   : 31.2   Mega :1152  
##  1st Qu.:112.4   Macro:1112  
##  Median :189.0   Nano :1134  
##  Mean   :192.4   Micro:1148  
##  3rd Qu.:272.3               
##  Max.   :364.1
#Sample statistics of "Sales" categorised by "InfluencerF"
library(psych)
describeBy(mydata2$Sales, mydata2$InfluencerF)
## 
##  Descriptive statistics by group 
## group: Mega
##    vars    n   mean    sd median trimmed    mad  min    max  range skew
## X1    1 1152 190.41 92.31 184.07  189.14 114.07 31.4 364.08 332.68  0.1
##    kurtosis   se
## X1    -1.14 2.72
## ------------------------------------------------------------ 
## group: Macro
##    vars    n   mean    sd median trimmed    mad   min    max  range skew
## X1    1 1112 196.07 92.39 194.46  195.76 120.25 32.57 358.42 325.85 0.03
##    kurtosis   se
## X1     -1.2 2.77
## ------------------------------------------------------------ 
## group: Nano
##    vars    n   mean    sd median trimmed    mad  min    max  range skew
## X1    1 1134 191.71 93.27 189.66   190.7 118.43 31.2 357.12 325.93 0.07
##    kurtosis   se
## X1    -1.21 2.77
## ------------------------------------------------------------ 
## group: Micro
##    vars    n   mean    sd median trimmed    mad   min    max  range skew
## X1    1 1148 191.58 94.11 187.84  190.61 121.61 33.72 362.04 328.32 0.08
##    kurtosis   se
## X1     -1.2 2.78

Promotional programs with Mega Influencers:

Promotional programs with Micro Influencers:

Instruction 8

library(ggplot2)
## 
## 载入程序包:'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
ggplot(mydata2, aes(x = Sales, y = InfluencerF)) +
  geom_boxplot()

library(ggplot2)
ggplot(mydata2, aes(x = InfluencerF)) +
  geom_bar(colour = "black", fill = "skyblue")

library(ggplot2)
ggplot(mydata2, aes(x = TV_Budget, y = Sales)) +
  geom_point(colour = "forestgreen")