YanQiWeek5CodingPractice

Data Exploration

data <- read.csv("Factor-Hair-Revised.csv")
dim(data)

## [1] 100  13

str(data)

## 'data.frame':    100 obs. of  13 variables:
##  $ ID          : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ ProdQual    : num  8.5 8.2 9.2 6.4 9 6.5 6.9 6.2 5.8 6.4 ...
##  $ Ecom        : num  3.9 2.7 3.4 3.3 3.4 2.8 3.7 3.3 3.6 4.5 ...
##  $ TechSup     : num  2.5 5.1 5.6 7 5.2 3.1 5 3.9 5.1 5.1 ...
##  $ CompRes     : num  5.9 7.2 5.6 3.7 4.6 4.1 2.6 4.8 6.7 6.1 ...
##  $ Advertising : num  4.8 3.4 5.4 4.7 2.2 4 2.1 4.6 3.7 4.7 ...
##  $ ProdLine    : num  4.9 7.9 7.4 4.7 6 4.3 2.3 3.6 5.9 5.7 ...
##  $ SalesFImage : num  6 3.1 5.8 4.5 4.5 3.7 5.4 5.1 5.8 5.7 ...
##  $ ComPricing  : num  6.8 5.3 4.5 8.8 6.8 8.5 8.9 6.9 9.3 8.4 ...
##  $ WartyClaim  : num  4.7 5.5 6.2 7 6.1 5.1 4.8 5.4 5.9 5.4 ...
##  $ OrdBilling  : num  5 3.9 5.4 4.3 4.5 3.6 2.1 4.3 4.4 4.1 ...
##  $ DelSpeed    : num  3.7 4.9 4.5 3 3.5 3.3 2 3.7 4.6 4.4 ...
##  $ Satisfaction: num  8.2 5.7 8.9 4.8 7.1 4.7 5.7 6.3 7 5.5 ...

names(data)

##  [1] "ID"           "ProdQual"     "Ecom"         "TechSup"      "CompRes"     
##  [6] "Advertising"  "ProdLine"     "SalesFImage"  "ComPricing"   "WartyClaim"  
## [11] "OrdBilling"   "DelSpeed"     "Satisfaction"

data_X <- subset(data,select=-c(1))

Correlation Analysis

library(corrplot)

## corrplot 0.95 loaded

datamatrix <- cor(data_X[-12])
corrplot(datamatrix, method="number")

library(ppcor)

## Warning: package 'ppcor' was built under R version 4.5.1

## Loading required package: MASS

coeff <- pcor(data_X[-12], method="pearson")

corrplot(coeff$estimate, type ="upper", order ="hclust",
         p.mat = coeff$p.value, sig.level=0.01, insig="blank")

library(car)

## Loading required package: carData

model <- lm(Satisfaction ~., data=data_X)
vif(model)

##    ProdQual        Ecom     TechSup     CompRes Advertising    ProdLine 
##    1.635797    2.756694    2.976796    4.730448    1.508933    3.488185 
## SalesFImage  ComPricing  WartyClaim  OrdBilling    DelSpeed 
##    3.439420    1.635000    3.198337    2.902999    6.516014

library(psych)

## Warning: package 'psych' was built under R version 4.5.1

## 
## Attaching package: 'psych'

## The following object is masked from 'package:car':
## 
##     logit

data_fa <-data_X[,-12]
datamatrix <- cor(data_fa)
KMO(r=datamatrix)

## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = datamatrix)
## Overall MSA =  0.65
## MSA for each item = 
##    ProdQual        Ecom     TechSup     CompRes Advertising    ProdLine 
##        0.51        0.63        0.52        0.79        0.78        0.62 
## SalesFImage  ComPricing  WartyClaim  OrdBilling    DelSpeed 
##        0.62        0.75        0.51        0.76        0.67

cortest.bartlett(datamatrix, nrow(data_X))

## $chisq
## [1] 619.2726
## 
## $p.value
## [1] 1.79337e-96
## 
## $df
## [1] 55

Factor Analysis

ev <- eigen(cor(data_fa))
ev$values

##  [1] 3.42697133 2.55089671 1.69097648 1.08655606 0.60942409 0.55188378
##  [7] 0.40151815 0.24695154 0.20355327 0.13284158 0.09842702

library(psych)
Factor = c(1,2,3,4,5,6,7,8,9,10,11)
Eigen_Values <- ev$values
scree <- data.frame(Factor, Eigen_Values)
plot(scree, main="scree Plot", col="Blue", ylim=c(0,4))
lines(scree,col="Red")
abline(h =1, col="Green")

nfactors <-4
fit1 <-factanal(data_fa, nfactors, scores=c("regression"), rotation="varimax")
print(fit1)

## 
## Call:
## factanal(x = data_fa, factors = nfactors, scores = c("regression"),     rotation = "varimax")
## 
## Uniquenesses:
##    ProdQual        Ecom     TechSup     CompRes Advertising    ProdLine 
##       0.682       0.360       0.228       0.178       0.679       0.005 
## SalesFImage  ComPricing  WartyClaim  OrdBilling    DelSpeed 
##       0.017       0.636       0.163       0.347       0.076 
## 
## Loadings:
##             Factor1 Factor2 Factor3 Factor4
## ProdQual                             0.557 
## Ecom                 0.793                 
## TechSup                      0.872   0.102 
## CompRes      0.884   0.142           0.135 
## Advertising  0.190   0.521          -0.110 
## ProdLine     0.502           0.104   0.856 
## SalesFImage  0.119   0.974          -0.130 
## ComPricing           0.225  -0.216  -0.514 
## WartyClaim                   0.894   0.158 
## OrdBilling   0.794   0.101   0.105         
## DelSpeed     0.928   0.189           0.164 
## 
##                Factor1 Factor2 Factor3 Factor4
## SS loadings      2.592   1.977   1.638   1.423
## Proportion Var   0.236   0.180   0.149   0.129
## Cumulative Var   0.236   0.415   0.564   0.694
## 
## Test of the hypothesis that 4 factors are sufficient.
## The chi square statistic is 24.26 on 17 degrees of freedom.
## The p-value is 0.113

fa_var <- fa(r=data_fa, nfactors=4, rotate="varimax", fm="pa")
fa.diagram(fa_var)

head(fa_var$scores)

##          PA1        PA2          PA3         PA4
## 1 -0.1338871  0.9175166 -1.719604873  0.09135411
## 2  1.6297604 -2.0090053 -0.596361722  0.65808192
## 3  0.3637658  0.8361736  0.002979966  1.37548765
## 4 -1.2225230 -0.5491336  1.245473305 -0.64421384
## 5 -0.4854209 -0.4276223 -0.026980304  0.47360747
## 6 -0.5950924 -1.3035333 -1.183019401 -0.95913571

Regression

regdata <- cbind(data_X[12], fa_var$scores)

names(regdata) <- c("Satisfaction", "Purchase", "Marketing",
                    "Post_purchase", "Prod_positioning")
head(regdata)

##   Satisfaction   Purchase  Marketing Post_purchase Prod_positioning
## 1          8.2 -0.1338871  0.9175166  -1.719604873       0.09135411
## 2          5.7  1.6297604 -2.0090053  -0.596361722       0.65808192
## 3          8.9  0.3637658  0.8361736   0.002979966       1.37548765
## 4          4.8 -1.2225230 -0.5491336   1.245473305      -0.64421384
## 5          7.1 -0.4854209 -0.4276223  -0.026980304       0.47360747
## 6          4.7 -0.5950924 -1.3035333  -1.183019401      -0.95913571

set.seed(100)
indices = sample(1:nrow(regdata), 0.7*nrow(regdata))
train=regdata[indices,]
test=regdata[-indices,]

model1=lm(Satisfaction~., train)
summary(model1)

## 
## Call:
## lm(formula = Satisfaction ~ ., data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.76280 -0.48717  0.06799  0.46459  1.24022 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       6.91852    0.08068  85.757  < 2e-16 ***
## Purchase          0.50230    0.07641   6.574 9.74e-09 ***
## Marketing         0.75488    0.08390   8.998 5.00e-13 ***
## Post_purchase     0.08755    0.08216   1.066    0.291    
## Prod_positioning  0.58074    0.08781   6.614 8.30e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6629 on 65 degrees of freedom
## Multiple R-squared:  0.7261, Adjusted R-squared:  0.7093 
## F-statistic: 43.08 on 4 and 65 DF,  p-value: < 2.2e-16

print(vif(model1))

##         Purchase        Marketing    Post_purchase Prod_positioning 
##         1.009648         1.008235         1.015126         1.024050

pred_test1 <- predict(model1, newdata=test, type="response")
pred_test1

##        6        8       11       13       17       19       26       33 
## 4.975008 5.908267 6.951629 8.677431 6.613838 6.963113 6.313513 6.141338 
##       34       35       37       40       42       44       49       50 
## 6.158993 7.415742 6.589746 6.858206 7.133989 8.533080 8.765145 8.078744 
##       53       56       57       60       65       67       71       73 
## 7.395438 7.468360 8.744402 6.276660 5.936570 6.650322 8.299545 7.685564 
##       75       80       96       97       99      100 
## 7.330191 6.719528 7.540233 6.143172 8.084583 5.799897

test$Satisfaction_Predicted <-pred_test1
head(test[c(1,6)], 10)

##    Satisfaction Satisfaction_Predicted
## 6           4.7               4.975008
## 8           6.3               5.908267
## 11          7.4               6.951629
## 13          8.4               8.677431
## 17          6.4               6.613838
## 19          6.8               6.963113
## 26          6.6               6.313513
## 33          5.4               6.141338
## 34          7.3               6.158993
## 35          6.3               7.415742

YanQiWeek5CodingPractice_factor analysis

Yan Qi

2025-09-27