title: “Business Analytics Lab Worksheet 04 (bsad_lab04)”
author: Max van de Werken
date: 2/21/18
output:
html_document: default
html_notebook: default
pdf_document: default
subtitle: CME Group Foundation Business Analytics Lab

About

Qualitative Descriptive Analytics aims to gather an in-depth understanding of the underlying reasons and motivations for an event or observation. It is typically represented with visuals or charts.

Quantitative Descriptive Analytics focuses on investigating a phenomenon via statistical, mathematical, and computationaly techniques. It aims to quantify an event with metrics and numbers.

In this lab, we will explore both analytics using the data set provided.

Setup

Remember to always set your working directory to the source file location. Go to ‘Session’, scroll down to ‘Set Working Directory’, and click ‘To Source File Location’. Read carefully the below and follow the instructions to complete the tasks and answer any questions. Submit your work to RPubs as detailed in previous notes.

===========================================

PART I: QUANTITATIVE ANALYSIS

Begin by reading in the data from the ‘marketing.csv’ file, and viewing it to make sure it is read in correctly.

mydata = read.csv(file="data/marketing.csv")
head(mydata)
##   case_number sales radio paper  tv pos
## 1           1 11125    65    89 250 1.3
## 2           2 16121    73    55 260 1.6
## 3           3 16440    74    58 270 1.7
## 4           4 16876    75    82 270 1.3
## 5           5 13965    69    75 255 1.5
## 6           6 14999    70    71 255 2.1

Now calculate the Range, Min, Max, Mean, STDEV, and Variance for each variable. Below is an example of how to compute the items for the variable ‘sales’.

maxSales = max(mydata$sales) #This computes the maximum value in sales and put it in variable maxSales.
maxSales   #This calls variable maxSales to display its value. 
## [1] 20450
minSales = min(mydata$sales) #This calculates the minimum value in sales and put it in variable minSales.
minSales  #This calls variable minSales to display its value.
## [1] 11125
rangeSales = maxSales-minSales #This calculates the rangeSales which is the difference between max and min of the sales.
rangeSales #This displays the rangeSales value.
## [1] 9325
meanSales = mean(mydata$sales)  #This calculates the mean of sales.
meanSales  #This displays the meanSales value.
## [1] 16717.2
sdSales = sd(mydata$sales)  #This calculates the standard deviation of sales.
sdSales #This displays the sdSales value.
## [1] 2617.052
varSales = var(mydata$sales) #This calculates the variance of sales
varSales  #This displays the varSales value.
## [1] 6848961
##TASK 1A: Repeat the calculations of max, min, range, mean, standard deviation, and variance for the data in radio column. Name the variables as maxRadio, minRadio, rangeRadio, meanRadio, sdRadio, and varRadio consecutively. Don't forget to call the variables to display their values.
maxradio = max(mydata$radio)
maxradio
## [1] 89
minradio = min(mydata$radio)
minradio
## [1] 65
rangeradio = maxradio-minradio
rangeradio
## [1] 24
meanradio = mean(mydata$radio)
meanradio
## [1] 76.1
sdradio = sd(mydata$radio)
sdradio
## [1] 7.354912
varradio = var(mydata$radio)
varradio
## [1] 54.09474
##TASK 1B: Repeat the calculations of max, min, range, mean, standard deviation, and variance for the data in paper column paper. Name the variables as maxPaper, minPaper, rangePaper, meanPaper, sdPaper, and varPaper consecutively. Don't forget to call the variables to display their values.
maxpaper = max(mydata$paper)
maxpaper
## [1] 89
minpaper = min(mydata$paper)
minpaper
## [1] 35
rangepaper = maxpaper-minpaper
rangepaper
## [1] 54
meanpaper = mean(mydata$paper)
meanpaper
## [1] 62.3
sdpaper = sd(mydata$paper)
sdpaper
## [1] 15.35921
varpaper = var(mydata$paper)
varpaper
## [1] 235.9053
##TASK 2A: Repeat the calculations of max, min, range, mean, standard deviation, and variance for the data in tv column. Name the variables as maxTV, minTV, rangeTV, meanTV, sdTV, and varTV consecutively. Don't forget to call the variables to display their values.
maxtv = max(mydata$tv)
maxtv
## [1] 280
mintv = min(mydata$tv)
mintv
## [1] 250
rangetv = maxtv-mintv
rangetv
## [1] 30
meantv = mean(mydata$tv)
meantv
## [1] 266.6
sdtv = sd(mydata$tv)
sdtv
## [1] 11.3388
vartv = var(mydata$tv)
vartv
## [1] 128.5684
##TASK 2B: Repeat the calculations of max, min, range, mean, standard deviation, and variance for the data in pos column. Name the variables as maxPOS, minPOS, rangePOS, meanPOS, sdPOS, and varPOS consecutively. Don't forget to call the variables to display their values. 

maxPOS = max(mydata$pos)
maxPOS
## [1] 3
minPOS = min(mydata$pos)
minPOS
## [1] 0
rangePOS = maxtv-minPOS
rangePOS
## [1] 280
meanPOS = mean(mydata$pos)
meanPOS
## [1] 1.535
sdPOS = sd(mydata$pos)
sdPOS
## [1] 0.7499298
varPOS = var(mydata$pos)
varPOS
## [1] 0.5623947

An easy way to calculate some of the above statistics of all of these variables is with the summary() function. Below is an example.

summary(mydata)
##   case_number        sales           radio           paper      
##  Min.   : 1.00   Min.   :11125   Min.   :65.00   Min.   :35.00  
##  1st Qu.: 5.75   1st Qu.:15175   1st Qu.:70.00   1st Qu.:53.75  
##  Median :10.50   Median :16658   Median :74.50   Median :62.50  
##  Mean   :10.50   Mean   :16717   Mean   :76.10   Mean   :62.30  
##  3rd Qu.:15.25   3rd Qu.:18874   3rd Qu.:81.75   3rd Qu.:75.50  
##  Max.   :20.00   Max.   :20450   Max.   :89.00   Max.   :89.00  
##        tv             pos       
##  Min.   :250.0   Min.   :0.000  
##  1st Qu.:255.0   1st Qu.:1.200  
##  Median :270.0   Median :1.500  
##  Mean   :266.6   Mean   :1.535  
##  3rd Qu.:276.2   3rd Qu.:1.800  
##  Max.   :280.0   Max.   :3.000
summary(mydata$sales) #This is to get the summary of sales column
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   11125   15175   16658   16717   18874   20450
##TASK 3A: Repeat the above command to get the summary of radio column. 
summary(mydata$radio)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   65.00   70.00   74.50   76.10   81.75   89.00
##TASK 3B: Repeat the above command to get the summary of paper column.
summary(mydata$paper)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   35.00   53.75   62.50   62.30   75.50   89.00
##TASK 3C: Repeat the above command to get the summary of tv column.
summary(mydata$tv)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   250.0   255.0   270.0   266.6   276.2   280.0
##TASK 3D: Repeat the above command to get the summary of pos column.
summary(mydata$pos)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   1.200   1.500   1.535   1.800   3.000

TASK 3E: There are some statistics not calculated with the summary() function. Specify which statistics you computed in TASKS 1 and 2 that are not calculated with the summary() function.

Answer: The range, standard deviation, and variance are not calculated in the summary.

================================================

PART II: QUALITATIVE ANALYSIS

Now, we will produce a basic blot of the ‘sales’ variable . Here we utilize the plot function and within the plot() function we call the variable we want to plot.

plot(mydata$sales)

We can customize the plot by adding labels to the x- and y- axis.

#xlab labels the x axis, ylab labels the y axis
plot(mydata$sales, type="b", xlab = "Case Number-sales", ylab = "Sales in $1,000") 

There are further ways to customize plots, such as changing the colors of the lines, adding a heading, or even making them interactive.

Now, lets plot the other variables’ graphs alongside each other.

layout(matrix(1:4,2,2)) #Layout allows us to see all the 4 graphs that you're creating below on one screen. Make sure to run the codes for TASKS 4A, 4B, 5A, and 5B in the same chunk so they are on the same layout.

##TASK 4A: Plot of radio data. Label the x axis properly, and label the y axis as "Amount". Write your code below:
plot(mydata$radio, type="b", xlab = "radio-data", ylab = "Amount") 

##TASK 4B: Plot of paper data. Label the x axis properly, and label the y axis as "Amount". Write your code below:
plot(mydata$paper, type="b", xlab = "paper-data", ylab = "Amount") 

##TASK 5A: Plot of tv data. Label the x axis properly, and label the y axis as "Amount". Write your code below:
plot(mydata$tv, type="b", xlab = "tv-data", ylab = "Amount") 

##TASK 5B: Plot of pos data. Label the x axis properly, and label the y axis as "Amount". Write your code below:
 plot(mydata$pos, type="b", xlab = "pos-data", ylab = "Amount") 

When looking at these plots it is hard to see a particular trend. One way to observe any possible trend in the sales data would be to re-order the data from low to high. The 20 months case studies are in no particular chronological time sequence. The 20 case numbers are independent sequentially generated numbers. Since each case is independent, we can reorder them.

#Re-order sales from low to high, and save re-ordered data in a new set called  newdata. As sales data is re-reorded associated other column fields follow.
newdata = mydata[order(mydata$sales),]
head(newdata)
##    case_number sales radio paper  tv pos
## 1            1 11125    65    89 250 1.3
## 19          19 12369    65    37 250 2.5
## 20          20 13882    68    80 252 1.4
## 5            5 13965    69    75 255 1.5
## 6            6 14999    70    71 255 2.1
## 11          11 15234    70    66 255 1.5
#Plot of sales data from the sorted newdata.
plot(newdata$sales, type="b", xlab = "Case Number-sales", ylab = "Sales in $1,000") 

layout(matrix(1:4,2,2)) #Layout allows us to see all the 4 graphs that you're creating below on one screen. Make sure to run the codes for TASKS 6A, 6B, 7A, and 7B in the same chunk so they are on the same layout.

##TASK 6A: Plot of radio data from the sorted newdata. Label the x axis properly, and label the y axis as "Amount". Write your code below:
newdata = mydata[order(mydata$radio),]
head(newdata)
##    case_number sales radio paper  tv pos
## 1            1 11125    65    89 250 1.3
## 19          19 12369    65    37 250 2.5
## 20          20 13882    68    80 252 1.4
## 5            5 13965    69    75 255 1.5
## 6            6 14999    70    71 255 2.1
## 11          11 15234    70    66 255 1.5
plot(newdata$radio, type="b", xlab = "Radio-Newdata", ylab = "Amount") 
##TASK 6B: Plot of paper data from the sorted newdata. Label the x axis properly, and label the y axis as "Amount". Write your code below:
newdata = mydata[order(mydata$paper),]
head(newdata)
##    case_number sales radio paper  tv pos
## 18          18 19641    85    35 280 2.5
## 19          19 12369    65    37 250 2.5
## 15          15 18723    81    41 275 1.0
## 13          13 17933    79    47 275 0.2
## 12          12 17522    78    50 270 0.0
## 2            2 16121    73    55 260 1.6
plot(newdata$paper, type="b", xlab = "Paper-Newdata", ylab = "Amount") 

##TASK 7A: Plot of tv data from the sorted newdata. Label the x axis properly, and label the y axis as "Amount". Write your code below:
newdata = mydata[order(mydata$tv),]
head(newdata)
##    case_number sales radio paper  tv pos
## 1            1 11125    65    89 250 1.3
## 19          19 12369    65    37 250 2.5
## 20          20 13882    68    80 252 1.4
## 5            5 13965    69    75 255 1.5
## 6            6 14999    70    71 255 2.1
## 11          11 15234    70    66 255 1.5
plot(newdata$tv, type="b", xlab = "TV-Newdata", ylab = "Amount") 

##TASK 7B: Plot of pos data from the sorted newdata. Label the x axis properly, and label the y axis as "Amount". Write your code below:
 newdata = mydata[order(mydata$pos),]
head(newdata)
##    case_number sales radio paper  tv pos
## 12          12 17522    78    50 270 0.0
## 13          13 17933    79    47 275 0.2
## 14          14 18390    81    78 275 0.9
## 15          15 18723    81    41 275 1.0
## 7            7 20167    87    59 280 1.2
## 17          17 19399    84    77 280 1.2
plot(newdata$pos, type="b", xlab = "POS-Newdata", ylab = "Amount")