Reading Data
dataset <- read.csv('Social_Network_Ads.csv')
Describing Data
library(psych)
## Warning: package 'psych' was built under R version 3.4.3
describe(dataset)
## vars n mean sd median trimmed
## User.ID 1 515 15689466.94 71282.69 15692819 15688984.03
## Gender* 2 515 1.50 0.50 1 1.50
## Age 3 515 37.60 10.38 37 37.36
## EstimatedSalary 4 515 68100.97 34416.90 65000 65464.89
## Purchased 5 515 0.37 0.48 0 0.33
## mad min max range skew kurtosis se
## User.ID 91417.12 15566689 15815236 248547 0.01 -1.18 3141.09
## Gender* 0.00 1 2 1 0.01 -2.00 0.02
## Age 11.86 18 60 42 0.17 -0.68 0.46
## EstimatedSalary 32617.20 15000 150000 135000 0.54 -0.42 1516.59
## Purchased 0.00 0 1 1 0.56 -1.69 0.02
One Way Contingency Table
mytable <- xtabs(~ Gender+Purchased, data=dataset)
mytable
## Purchased
## Gender 0 1
## Female 159 100
## Male 168 88
BoxPlot
boxplot(Purchased ~ EstimatedSalary , data=dataset)

boxplot(Purchased ~ Age , data=dataset)

Histograms
a <- dataset$Purchased
hist( a , data = dataset,
main = "Distrution of Purcahsed", xlab="Purchased or Not", col='grey' )
## Warning in plot.window(xlim, ylim, "", ...): "data" is not a graphical
## parameter
## Warning in title(main = main, sub = sub, xlab = xlab, ylab = ylab, ...):
## "data" is not a graphical parameter
## Warning in axis(1, ...): "data" is not a graphical parameter
## Warning in axis(2, ...): "data" is not a graphical parameter

b <- dataset$Age
hist( b , data = dataset, main = "Distrution of Age", xlab="Different Ages", col='blue' )
## Warning in plot.window(xlim, ylim, "", ...): "data" is not a graphical
## parameter
## Warning in title(main = main, sub = sub, xlab = xlab, ylab = ylab, ...):
## "data" is not a graphical parameter
## Warning in axis(1, ...): "data" is not a graphical parameter
## Warning in axis(2, ...): "data" is not a graphical parameter

c <- dataset$EstimatedSalary
hist( c , data = dataset,
main = "Distrution of EstimatedSalaries", xlab="Different Salaries ", col='green' )
## Warning in plot.window(xlim, ylim, "", ...): "data" is not a graphical
## parameter
## Warning in title(main = main, sub = sub, xlab = xlab, ylab = ylab, ...):
## "data" is not a graphical parameter
## Warning in axis(1, ...): "data" is not a graphical parameter
## Warning in axis(2, ...): "data" is not a graphical parameter

Correlation matrix
library(corrgram)
## Warning: package 'corrgram' was built under R version 3.4.3
corrgram(dataset, order=TRUE, lower.panel=panel.shade,
upper.panel=panel.pie, text.panel=panel.txt,
main="MBA Starting Salaries")

Correlation matrix using corrgram
library(corpcor)
library(tseries)
## Warning: package 'tseries' was built under R version 3.4.3
data_mat <- as.matrix(dataset[,3:5])
covmat = cov(data_mat)
cov2cor(covmat)
## Age EstimatedSalary Purchased
## Age 1.0000000 0.1241452 0.6387109
## EstimatedSalary 0.1241452 1.0000000 0.2954804
## Purchased 0.6387109 0.2954804 1.0000000
ScatterPlot Matrix
library(car)
## Warning: package 'car' was built under R version 3.4.3
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
scatterplotMatrix( formula = ~ Gender + Age + EstimatedSalary + Purchased , cex = 0.6 , data = dataset)

Null Hypothesis
mytable <- xtabs(~ Purchased + Gender, data=dataset)
mytable
## Gender
## Purchased Female Male
## 0 159 168
## 1 100 88
chisq test
chisq.test(mytable)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: mytable
## X-squared = 0.82189, df = 1, p-value = 0.3646
t-test
t.test( Purchased ~ Gender , data = dataset)
##
## Welch Two Sample t-test
##
## data: Purchased by Gender
## t = 0.99728, df = 512.91, p-value = 0.3191
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.04107827 0.12577905
## sample estimates:
## mean in group Female mean in group Male
## 0.3861004 0.3437500