I do not own any rights of the data an code in the below exercises asthey were entirely used for practise & learning purposes. Enjoy!
##1. Data, Exploratory Analysis, Performance Analysis
#Import CSV File
#Create a variable
MyExploratoryData <- read.csv("C:/Data Science for Marketing/Exercise_Files/02_02/exploratory-r.csv")
#Get a sanapshot of the data. This hsows the first lines of the data
head(MyExploratoryData)
#Visualize the data, in this case, a specific subset / column of the data
hist(MyExploratoryData$cpa) #In this case, we chose Cost Per Acquisition(cpa)
#Shift the names of the data. Measuring performance against the key words.
#We'll do some transformation first
row.names(MyExploratoryData) <- MyExploratoryData$keyword
#Realise after the above chunk of code, the rows are now named and not numbered.
head(MyExploratoryData)
#Transform into a matrix. In order to create a heatmap
MyDataMatrix <- data.matrix(MyExploratoryData)
#Create a heatmap.
#This can give us a clear performance of each of the keywords.
heatmap(MyDataMatrix, Rowv = NA, Colv = NA, scale = "column")
##2. Regression in R
#Import CSV File
#Create a variable
RegressionData <- read.csv("C:/Data Science for Marketing/Exercise_Files/03_02/regression-r.csv")
#Plot our data (broadcast & sales). This gives us a scatterplot
plot(RegressionData$BROADCAST, RegressionData$NET.SALES)
#Fit a line by using the 'lm' fxn.
Mylm <- lm(RegressionData$NET.SALES ~ RegressionData$BROADCAST)
#Visulize the regresion line. A line of best fit
#Note that this will not play on its on, at least if using R notebooks
lines(RegressionData$BROADCAST, Mylm$fitted.values)
#show our coefficients.
#There is a positive correlation between broadcastinga and sales
Mylm$coefficients
## (Intercept) RegressionData$BROADCAST
## 133108.78 12141.94
##3. Prediction with R.
#Connect to our data
PredictionData <- read.csv("C:/Data Science for Marketing/Exercise_Files/04_02/prediction-r.csv")
#Sum our classifications so we can see them.
table(PredictionData$sales.classification)
##
## A B C
## 85 138 77
#Output column names for easy reference
names(PredictionData)
## [1] "sales" "sales.classification"
## [3] "capita" "drive.by.traffic"
## [5] "complimentary.establishments" "competition"
## [7] "weather" "unemployment.rate"
## [9] "var1" "var2"
## [11] "var3"
#install tree package
library("tree")
#Configure algorithm to create the tree
#variable name | tree fxn | predictors
DecisionTree <- tree(sales.classification ~ capita + drive.by.traffic + complimentary.establishments + competition + weather + unemployment.rate + var1 + var2 + var3, data=PredictionData)
#Plot the tree
#A tree (dendrogram) showing a taxonomic relationship has been created. However, not labelled
plot(DecisionTree)
#Label the tree
text(DecisionTree)
The result below best advices what conditions are to be met in order for a new store / business to be fruitful in a certain location.This is based on the question we were trying to solve in this exercise.
#Prune the tree. Identifying the biggest predictors
PrunedTree <- prune.tree(DecisionTree, best = 3)
#plot the pruned tree
plot(PrunedTree)
#Label the pruned tree
text(PrunedTree)
##4. Cluster Analysis in R
#Connect to the data
ClusterData <- read.csv("C:/Data Science for Marketing/Exercise_Files/05_02/cluster-r.csv")
#Review the data
head(ClusterData)
#Standardize the data. Transform the data to get the equal weight.
ClusterDataStdzd <- scale(ClusterData[-1]) #remove the first column of the data, as the algo is only looking for numerical data
#Run kmeans on the standardised data
Groups <- kmeans(ClusterDataStdzd, 3)
#Load Cluster Library
library("cluster")
#Visualize the clusters
clusplot(ClusterDataStdzd, Groups$cluster)
#Summarise the data
Groups$size
## [1] 95 115 90
##5. Conjoint Analysis in R
ConjointData <- read.csv("C:/Data Science for Marketing/Exercise_Files/06_02/conjoint-r.csv")
ConjointRprofiles <- read.csv("C:/Data Science for Marketing/Exercise_Files/06_02/conjoint-r-profiles-matrix.csv")
ConjointDataLevels <- read.csv("C:/Data Science for Marketing/Exercise_Files/06_02/conjoint-r-level-names.csv")
#Install Conjoint package
#install.packages("conjoint")
library("conjoint")
#model some of our data
#(only want to look the first row)
caUtilities(y = ConjointData[1,], x = ConjointRprofiles, z = ConjointDataLevels)
##
## Call:
## lm(formula = frml)
##
## Residuals:
## ALL 6 residuals are 0: no residual degrees of freedom!
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2,667e+00 NA NA NA
## factor(x$photos)1 6,667e-01 NA NA NA
## factor(x$photos)2 -3,333e-01 NA NA NA
## factor(x$content.source)1 9,065e-17 NA NA NA
## factor(x$content.type)1 1,667e+00 NA NA NA
## factor(x$content.type)2 -3,333e-01 NA NA NA
##
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: NaN
## F-statistic: NaN on 5 and 0 DF, p-value: NA
## [1] 2.666667e+00 6.666667e-01 -3.333333e-01 -3.333333e-01 9.064933e-17
## [6] -9.064933e-17 1.666667e+00 -3.333333e-01 -1.333333e+00
#From the result below, we see that this individual in more interested in the editing fxn more than filter or collage features.
ConjointData[1,]
#here, we see that profile 4 has been considered to have a higher value than the rest.
#Model all Dataset
caUtilities(y = ConjointData, x = ConjointRprofiles, z = ConjointDataLevels)
##
## Call:
## lm(formula = frml)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1,992 -1,056 -0,056 0,944 2,082
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2,52667 0,02702 93,520 < 2e-16 ***
## factor(x$photos)1 -0,43533 0,03417 -12,739 < 2e-16 ***
## factor(x$photos)2 0,42667 0,05403 7,896 4,00e-15 ***
## factor(x$content.source)1 -0,00200 0,03625 -0,055 0,956
## factor(x$content.type)1 0,45867 0,09042 5,073 4,16e-07 ***
## factor(x$content.type)2 -0,03333 0,03417 -0,975 0,329
## ---
## Signif. codes: 0 '***' 0,001 '**' 0,01 '*' 0,05 '.' 0,1 ' ' 1
##
## Residual standard error: 1,146 on 2994 degrees of freedom
## Multiple R-squared: 0,09312, Adjusted R-squared: 0,0916
## F-statistic: 61,48 on 5 and 2994 DF, p-value: < 2,2e-16
## [1] 2.526666667 -0.435333333 0.426666667 0.008666667 -0.002000000
## [6] 0.002000000 0.458666667 -0.033333333 -0.425333333