music <- read.csv("music.csv")
music <- music[,-ncol(music)]
cat("After looking at summary of music data, we see no outliers so no need to log any variable.")
## After looking at summary of music data, we see no outliers so no need to log any variable.
library(psych)
fit_toget_nfactors <- prcomp(music, scale. = TRUE)
cat("Eigen values :")
## Eigen values :
round(fit_toget_nfactors$sdev^2, 3)
## [1] 14.388 2.562 1.070 0.749 0.710 0.678 0.557 0.519 0.487 0.468
## [11] 0.415 0.398 0.385 0.361 0.338 0.328 0.305 0.289 0.270 0.257
## [21] 0.241 0.235 0.222 0.201 0.194 0.192 0.182
screeplot(fit_toget_nfactors)
We note the elbow is at number of components = 2. However Kaiser Criterion suggests to include PC3 as its eigen value is > 1. For now we are keeping 3 components.
cat("Using PCA: ")
## Using PCA:
fit1 <- principal(music, nfactors = 3, rotate = "none")
cum_var = sum(fit1$values[1:3])/ncol(music)
cat("We see that cumulative variance explained by three componenets is", round(cum_var,2))
## We see that cumulative variance explained by three componenets is 0.67
cat("Using MLE: ")
## Using MLE:
mlefit_toget_nfactors <- factanal(~., data = music, factors = 3, scores = "regression")
mlecum_var <- sum(mlefit_toget_nfactors$loadings^2)/ncol(music)
cat("We see that cumulative variance explained by three factors is", round(mlecum_var,2))
## We see that cumulative variance explained by three factors is 0.63
Now that subspace is fixed, we rotate the axes using varimax as suggested in the question.
cor(fit1$loadings)
## PC1 PC2 PC3
## PC1 1.0000000 -0.25829613 -0.56734557
## PC2 -0.2582961 1.00000000 -0.00104071
## PC3 -0.5673456 -0.00104071 1.00000000
cat("We will be using varimax rotation")
## We will be using varimax rotation
fit2 <- principal(music, nfactors = 3, rotate = "varimax")
loadings <- fit2$loadings[,1:3]
sparse_loadings <- ifelse(loadings>0.3,loadings,0)
sparse_loadings <- round(sparse_loadings, 3)
sparse_loadings_str <- ifelse(sparse_loadings>0.3,sparse_loadings,"")
sparse_loadings_str
## RC2 RC1 RC3
## V28 "0.68" "0.392" "0.31"
## V29 "0.67" "0.323" "0.331"
## V30 "0.69" "" "0.301"
## V31 "0.482" "0.411" ""
## V32 "0.788" "0.312" ""
## V33 "0.794" "" ""
## V34 "0.783" "" ""
## V35 "" "0.757" ""
## V36 "0.337" "0.768" ""
## V37 "0.352" "0.701" "0.324"
## V38 "" "0.809" ""
## V39 "0.307" "0.777" ""
## V40 "" "0.61" "0.58"
## V41 "" "0.783" "0.303"
## V42 "0.403" "0.703" ""
## V43 "0.808" "" ""
## V44 "" "0.484" "0.696"
## V45 "" "0.36" "0.73"
## V46 "0.803" "" ""
## V47 "0.592" "" "0.339"
## V48 "0.305" "0.607" "0.337"
## V49 "" "" "0.728"
## V50 "0.65" "" "0.369"
## V51 "" "0.582" "0.545"
## V52 "0.641" "0.348" ""
## V53 "0.47" "0.486" "0.359"
## V54 "0.446" "" "0.511"
#We note that Q28,29,30,32,33,34,43,46,47,50,52 have loadings such that they can be inclu in RC2.
#Further Q 35,36,37,38,39,41,42,48 are viable candidates for RC1.
#For RC3 viable candidates are Q44,45,49. But on analysis alpha reduces if Q49 is included.
fac1_fit2 <- music[, c(1,2,3,5,6,7,16,19)]
#Removed column 20,23,25 as do not contribute to alpha
alpha(fac1_fit2)
##
## Reliability analysis
## Call: alpha(x = fac1_fit2)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.93 0.93 0.93 0.63 14 0.0029 2.7 1.1
##
## lower alpha upper 95% confidence boundaries
## 0.93 0.93 0.94
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se
## V28 0.92 0.92 0.92 0.63 12 0.0034
## V29 0.92 0.92 0.92 0.63 12 0.0033
## V30 0.92 0.92 0.92 0.63 12 0.0033
## V32 0.92 0.92 0.92 0.62 11 0.0035
## V33 0.92 0.92 0.92 0.63 12 0.0033
## V34 0.92 0.92 0.92 0.64 12 0.0033
## V43 0.92 0.92 0.92 0.63 12 0.0033
## V46 0.92 0.92 0.92 0.63 12 0.0034
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## V28 1278 0.83 0.83 0.81 0.77 3.1 1.3
## V29 1278 0.81 0.81 0.79 0.75 3.0 1.2
## V30 1278 0.81 0.81 0.78 0.74 2.8 1.3
## V32 1278 0.86 0.86 0.84 0.81 2.7 1.3
## V33 1278 0.82 0.82 0.79 0.76 2.4 1.3
## V34 1278 0.81 0.80 0.77 0.74 2.4 1.4
## V43 1278 0.82 0.82 0.78 0.76 2.3 1.3
## V46 1278 0.83 0.83 0.80 0.77 2.5 1.4
##
## Non missing response frequency for each item
## 0 1 2 3 4 5 miss
## V28 0.02 0.12 0.12 0.33 0.27 0.13 0
## V29 0.02 0.13 0.13 0.39 0.22 0.11 0
## V30 0.03 0.19 0.17 0.28 0.22 0.11 0
## V32 0.02 0.20 0.19 0.30 0.19 0.09 0
## V33 0.03 0.29 0.21 0.25 0.14 0.08 0
## V34 0.03 0.33 0.18 0.19 0.18 0.09 0
## V43 0.03 0.33 0.23 0.23 0.11 0.06 0
## V46 0.02 0.30 0.18 0.26 0.14 0.10 0
fac2_fit2 <- music[, c(8,9,10,11,12,14,15)]
#if removed column 21 alpha=0.94 else 0.95 indicating not much contribution
alpha(fac2_fit2)
##
## Reliability analysis
## Call: alpha(x = fac2_fit2)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.94 0.94 0.94 0.71 17 0.0024 3.6 1
##
## lower alpha upper 95% confidence boundaries
## 0.94 0.94 0.95
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se
## V35 0.94 0.94 0.93 0.73 16 0.0026
## V36 0.93 0.93 0.92 0.70 14 0.0029
## V37 0.94 0.94 0.93 0.71 15 0.0028
## V38 0.93 0.93 0.92 0.70 14 0.0030
## V39 0.93 0.93 0.93 0.70 14 0.0029
## V41 0.93 0.93 0.93 0.70 14 0.0029
## V42 0.94 0.94 0.93 0.72 16 0.0026
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## V35 1278 0.82 0.82 0.78 0.75 3.7 1.2
## V36 1278 0.89 0.89 0.87 0.85 3.6 1.1
## V37 1278 0.86 0.86 0.83 0.81 3.5 1.2
## V38 1278 0.90 0.90 0.88 0.85 3.7 1.2
## V39 1278 0.88 0.88 0.86 0.83 3.6 1.2
## V41 1278 0.88 0.89 0.87 0.84 3.7 1.1
## V42 1278 0.83 0.83 0.79 0.77 3.2 1.2
##
## Non missing response frequency for each item
## 0 1 2 3 4 5 miss
## V35 0.02 0.06 0.05 0.22 0.41 0.24 0
## V36 0.02 0.05 0.05 0.29 0.38 0.20 0
## V37 0.03 0.05 0.08 0.27 0.36 0.21 0
## V38 0.02 0.05 0.07 0.20 0.44 0.23 0
## V39 0.02 0.04 0.06 0.24 0.42 0.22 0
## V41 0.02 0.04 0.05 0.24 0.44 0.21 0
## V42 0.03 0.06 0.11 0.40 0.27 0.14 0
fac3_fit2 <- music[, c(17,18)]
#Removed column 13,22,24,27 as lower loadings and alpha values indicate it
alpha(fac3_fit2)
##
## Reliability analysis
## Call: alpha(x = fac3_fit2)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.83 0.83 0.71 0.71 4.9 0.0095 3.9 1.1
##
## lower alpha upper 95% confidence boundaries
## 0.81 0.83 0.85
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se
## V44 0.71 0.71 0.5 0.71 NA NA
## V45 0.50 0.71 NA NA 0.71 0.024
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## V44 1278 0.92 0.92 0.78 0.71 4.1 1.1
## V45 1278 0.93 0.92 0.78 0.71 3.8 1.2
##
## Non missing response frequency for each item
## 0 1 2 3 4 5 miss
## V44 0.02 0.02 0.03 0.13 0.37 0.43 0
## V45 0.03 0.04 0.06 0.19 0.37 0.32 0
subset <- cbind(fac1_fit2, fac2_fit2, fac3_fit2)
sub_fit <- principal(subset, nfactors = 3)
loadings1 <- sub_fit$loadings[,1:3]
sparse_loadings1 <- ifelse(loadings1>0.5,loadings1,0)
sparse_loadings1 <- round(sparse_loadings1, 3)
sparse_loadings_str1 <- ifelse(sparse_loadings1>0.3,sparse_loadings1,"")
sparse_loadings_str1
## RC1 RC2 RC3
## V28 "" "0.668" ""
## V29 "" "0.668" ""
## V30 "" "0.698" ""
## V32 "" "0.79" ""
## V33 "" "0.802" ""
## V34 "" "0.799" ""
## V43 "" "0.802" ""
## V46 "" "0.817" ""
## V35 "0.746" "" ""
## V36 "0.785" "" ""
## V37 "0.742" "" ""
## V38 "0.827" "" ""
## V39 "0.8" "" ""
## V41 "0.813" "" ""
## V42 "0.75" "" ""
## V44 "" "" "0.769"
## V45 "" "" "0.825"
We notice that Q 28,29,30,32,33,34,43,46 shall be included in RC2 after purification. Further Q 35,36,37,38,39,41,42 are added in RC1 after purification. For RC3 viable candidates were Q44,45,49. But on analysis alpha reduces if Q49 is included. This might indicate another factor but we do not currently have much information or questions related to it so we dropped Q49 during purification.
RC2 is to measure an adult’s agreement to expressing music preferences among friends and media.
RC1 is the measure of positive effect of music personally.
RC3 is about somewhat negative effects of music on the person.
It may be questionable whether to keep the third factor or not. We may need to add further questions to examine the third factor. However, two factors RC1, RC2 are confirmed.
a. Log
book <- read.csv("book.csv")
book_log <- log(book + 1)
b. Cov vs Cor matrix
It should not matter here as after taking the log of counts, the resulting data has most of the data between values 0 and 6. Also these were all counts so no worry of different units as well.
c. PCA
fit_toget_nfactors <- prcomp(book_log)
cat("Eigen values :")
## Eigen values :
print(round(fit_toget_nfactors$sdev^2,3))
## [1] 2.447 0.745 0.600 0.451 0.362 0.262 0.241 0.214 0.207 0.182 0.169
## [12] 0.162 0.147 0.133 0.129 0.112 0.102 0.097 0.086 0.078 0.072 0.067
## [23] 0.056 0.043 0.035 0.024 0.020 0.017 0.013 0.001
screeplot(fit_toget_nfactors)
cat("Scree plot suggests to keep only one factor.")
## Scree plot suggests to keep only one factor.
summary(fit_toget_nfactors)$importance
## PC1 PC2 PC3 PC4 PC5
## Standard deviation 1.564253 0.8631117 0.7747793 0.6712908 0.6020465
## Proportion of Variance 0.336420 0.1024200 0.0825300 0.0619600 0.0498300
## Cumulative Proportion 0.336420 0.4388500 0.5213800 0.5833400 0.6331700
## PC6 PC7 PC8 PC9 PC10
## Standard deviation 0.5116735 0.4912901 0.4622533 0.4549243 0.4269485
## Proportion of Variance 0.0360000 0.0331900 0.0293800 0.0284500 0.0250600
## Cumulative Proportion 0.6691700 0.7023500 0.7317300 0.7601900 0.7852500
## PC11 PC12 PC13 PC14 PC15
## Standard deviation 0.4108903 0.4024147 0.3835579 0.3644135 0.3597142
## Proportion of Variance 0.0232100 0.0222600 0.0202300 0.0182600 0.0177900
## Cumulative Proportion 0.8084600 0.8307300 0.8509500 0.8692100 0.8870000
## PC16 PC17 PC18 PC19 PC20
## Standard deviation 0.3341145 0.3196087 0.3107098 0.2925616 0.2796614
## Proportion of Variance 0.0153500 0.0140400 0.0132700 0.0117700 0.0107500
## Cumulative Proportion 0.9023500 0.9164000 0.9296700 0.9414400 0.9521900
## PC21 PC22 PC23 PC24 PC25
## Standard deviation 0.2677287 0.2594787 0.23659 0.2062199 0.1865023
## Proportion of Variance 0.0098600 0.0092600 0.00770 0.0058500 0.0047800
## Cumulative Proportion 0.9620500 0.9713000 0.97900 0.9848500 0.9896300
## PC26 PC27 PC28 PC29 PC30
## Standard deviation 0.1563184 0.1409756 0.1316358 0.1132945 0.03099401
## Proportion of Variance 0.0033600 0.0027300 0.0023800 0.0017600 0.00013000
## Cumulative Proportion 0.9929900 0.9957200 0.9981000 0.9998700 1.00000000
cat("Using PCA: ")
## Using PCA:
pca1 <- principal(book_log, nfactors = 2, rotate = "none")
cum_var = sum(pca1$values[1:2])/ncol(book_log)
cat("We see that cumulative variance explained by two componenets is", round(cum_var,2))
## We see that cumulative variance explained by two componenets is 0.29
cat("Using MLE: ")
## Using MLE:
mlefit_toget_nfactors <- factanal(~., data = book_log, factors = 2, scores = "regression")
mlecum_var <- sum(mlefit_toget_nfactors$loadings^2)/ncol(book_log)
cat("We see that cumulative variance explained by two factors is", round(mlecum_var,2))
## We see that cumulative variance explained by two factors is 0.24
Now that subspace is fixed, we rotate the axes using varimax as suggested in the question.
cor(pca1$loadings)
## PC1 PC2
## PC1 1.00000000 -0.03316048
## PC2 -0.03316048 1.00000000
cat("We will be using varimax rotation")
## We will be using varimax rotation
pca2 <- principal(book_log, nfactors = 2, rotate = "varimax")
pca2loadings <- pca2$loadings[,1:2]
#pca_sparse_loadings <- ifelse(pca2loadings>0.3,pca2loadings,0)
pca_sparse_loadings <- round(pca2loadings, 3)
pca_sparse_loadings_str <- ifelse(pca_sparse_loadings>0.3,pca_sparse_loadings,"")
pca_sparse_loadings_str
## RC1 RC2
## fiction1 "0.549" "0.388"
## classics3 "0.501" ""
## cartoons5 "0.34" "0.424"
## legends6 "0.405" ""
## philosophy7 "" "0.431"
## religion8 "0.452" "0.41"
## psychology9 "" "0.395"
## linguistics10 "" "0.493"
## art12 "0.632" ""
## music14 "0.425" ""
## facsimile17 "0.348" ""
## history19 "0.738" ""
## conthist20 "0.587" ""
## economy21 "" "0.606"
## politics22 "" ""
## science23 "" "0.539"
## compsci26 "" "0.526"
## railroads27 "0.494" ""
## maps30 "0.359" ""
## travelguides31 "0.644" ""
## health35 "" "0.694"
## cooking36 "0.336" "0.339"
## learning37 "" "0.692"
## GamesRiddles38 "" "0.301"
## sports39 "" ""
## hobby40 "0.482" "0.437"
## nature41 "0.381" "0.358"
## encyclopaedia44 "0.324" "0.51"
## videos50 "" ""
## nonbooks99 "" ""
idx1 <- pca2loadings[,1]>0.6
rc1_data <- book_log[idx1]
idx2 <- pca2loadings[,2]>0.6
rc2_data <- book_log[idx2]
alpha(rc1_data)
##
## Reliability analysis
## Call: alpha(x = rc1_data)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.68 0.71 0.62 0.45 2.4 0.005 0.5 0.57
##
## lower alpha upper 95% confidence boundaries
## 0.68 0.68 0.69
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se
## art12 0.64 0.65 0.48 0.48 1.9 0.0071
## history19 0.55 0.57 0.40 0.40 1.3 0.0084
## travelguides31 0.56 0.62 0.45 0.45 1.6 0.0074
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## art12 9856 0.70 0.78 0.59 0.50 0.26 0.51
## history19 9856 0.86 0.81 0.66 0.56 0.77 0.89
## travelguides31 9856 0.80 0.79 0.62 0.52 0.46 0.74
alpha(rc2_data)
##
## Reliability analysis
## Call: alpha(x = rc2_data)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.59 0.65 0.56 0.38 1.9 0.0057 0.36 0.45
##
## lower alpha upper 95% confidence boundaries
## 0.58 0.59 0.6
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se
## economy21 0.57 0.61 0.44 0.44 1.57 0.0077
## health35 0.44 0.49 0.33 0.33 0.97 0.0096
## learning37 0.40 0.56 0.38 0.38 1.25 0.0074
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## economy21 9856 0.60 0.74 0.52 0.42 0.11 0.31
## health35 9856 0.89 0.79 0.63 0.51 0.65 0.86
## learning37 9856 0.75 0.77 0.57 0.47 0.31 0.54
The data is not sufficient as the number of factors suggested by loadings is much more than 3.
nba <- read.csv('box2015.csv',header = T)
#per player mean stat
pergame <- aggregate(nba, by = list(player = nba$PLAYER), FUN = mean, na.rm = FALSE)
pergame <- pergame[-c(2:5)] #team, date etc NA fields removed
pergame$multiplier = 36/pergame$MINUTES_PLAYED
pergame36 = pergame$multiplier*pergame[-c(1)] #get 36 min stats
pergame36$player = pergame$player
pergame36 <- pergame36[-c(1,2,20,21)]
pergame36$percent = ifelse(pergame36$FIELD_GOALS_MADE > 0,
pergame36$FIELD_GOALS_MADE/pergame36$FIELD_GOALS_ATT,0)
pergame36$percent_three = ifelse(pergame36$THREE_POINT_MADE>0,
pergame36$THREE_POINT_MADE/pergame36$THREE_POINT_ATT,0)
pergame36$percent_ft = ifelse(pergame36$FREE_THROWS_MADE,
pergame36$FREE_THROWS_MADE/pergame36$FREE_THROWS_ATT,0)
pergame36 <- na.omit(pergame36)
pergame36 <- pergame36[,-c(5,8)]
screeplot(prcomp(pergame36,scale=T))
fit <- principal(pergame36,nfactor=3)
fit
## Principal Components Analysis
## Call: principal(r = pergame36, nfactors = 3)
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC1 RC2 RC3 h2 u2 com
## FIELD_GOALS_MADE 0.04 0.87 -0.04 0.754 0.25 1.0
## FIELD_GOALS_ATT -0.25 0.81 0.13 0.732 0.27 1.3
## FREE_THROWS_MADE 0.20 0.68 0.52 0.771 0.23 2.1
## FREE_THROWS_ATT 0.38 0.60 0.50 0.748 0.25 2.7
## OFFENSIVE_REBOUNDS 0.82 0.12 -0.27 0.754 0.25 1.3
## DEFENSIVE_REBOUNDS 0.67 0.23 -0.14 0.522 0.48 1.3
## ASSISTS -0.28 0.12 0.71 0.595 0.40 1.4
## PERSONAL_FOULS 0.56 -0.20 -0.08 0.356 0.64 1.3
## DISQUALIFICATIONS 0.20 -0.03 -0.18 0.074 0.93 2.0
## STEALS -0.07 -0.24 0.61 0.432 0.57 1.3
## TURNOVERS 0.03 0.28 0.68 0.538 0.46 1.3
## BLOCKED_SHOTS 0.70 0.14 -0.23 0.559 0.44 1.3
## THREE_POINT_MADE -0.83 0.24 -0.18 0.788 0.21 1.3
## THREE_POINT_ATT -0.87 0.16 -0.07 0.788 0.21 1.1
## PLUS_MINUS -0.09 0.35 -0.03 0.131 0.87 1.2
## percent 0.50 0.44 -0.31 0.542 0.46 2.7
## percent_three -0.72 0.23 -0.12 0.582 0.42 1.3
## percent_ft -0.31 0.39 0.14 0.270 0.73 2.2
##
## RC1 RC2 RC3
## SS loadings 4.61 3.10 2.22
## Proportion Var 0.26 0.17 0.12
## Cumulative Var 0.26 0.43 0.55
## Proportion Explained 0.46 0.31 0.22
## Cumulative Proportion 0.46 0.78 1.00
##
## Mean item complexity = 1.5
## Test of the hypothesis that 3 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.08
## with the empirical chi square 988.49 with prob < 4.1e-145
##
## Fit based upon off diagonal values = 0.92
rimprotection <- pergame36[,c(5,6,8,12,13,14,17)]
scoring <- pergame36[,c(1,2,3,4)]
support <- pergame36[,c(3,4,7,10,11)]
alpha(rimprotection, check.keys=T)
##
## Reliability analysis
## Call: alpha(x = rimprotection, check.keys = T)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.8 0.87 0.89 0.49 6.8 0.01 6.4 1
##
## lower alpha upper 95% confidence boundaries
## 0.78 0.8 0.82
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se
## OFFENSIVE_REBOUNDS 0.74 0.84 0.86 0.47 5.3 0.013
## DEFENSIVE_REBOUNDS 0.78 0.86 0.88 0.51 6.3 0.010
## PERSONAL_FOULS 0.79 0.88 0.89 0.54 7.0 0.010
## BLOCKED_SHOTS 0.79 0.86 0.88 0.50 6.1 0.011
## THREE_POINT_MADE- 0.75 0.84 0.83 0.46 5.2 0.013
## THREE_POINT_ATT- 0.76 0.83 0.83 0.46 5.0 0.014
## percent_three- 0.82 0.86 0.88 0.50 5.9 0.011
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## OFFENSIVE_REBOUNDS 476 0.83 0.82 0.79 0.75 1.63 1.36
## DEFENSIVE_REBOUNDS 476 0.75 0.70 0.62 0.57 4.90 2.03
## PERSONAL_FOULS 476 0.59 0.61 0.50 0.45 3.35 1.30
## BLOCKED_SHOTS 476 0.67 0.71 0.64 0.61 0.76 0.78
## THREE_POINT_MADE- 476 0.83 0.83 0.85 0.77 11.75 1.06
## THREE_POINT_ATT- 476 0.88 0.85 0.87 0.74 9.55 2.56
## percent_three- 476 0.63 0.74 0.68 0.62 12.63 0.16
alpha(scoring)
##
## Reliability analysis
## Call: alpha(x = scoring)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.79 0.85 0.93 0.58 5.6 0.014 5.6 1.8
##
## lower alpha upper 95% confidence boundaries
## 0.77 0.79 0.82
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se
## FIELD_GOALS_MADE 0.69 0.82 0.86 0.60 4.5 0.024
## FIELD_GOALS_ATT 0.80 0.81 0.84 0.59 4.4 0.018
## FREE_THROWS_MADE 0.74 0.78 0.78 0.54 3.6 0.017
## FREE_THROWS_ATT 0.76 0.82 0.80 0.60 4.4 0.012
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## FIELD_GOALS_MADE 476 0.86 0.81 0.76 0.76 5.2 1.8
## FIELD_GOALS_ATT 476 0.89 0.82 0.77 0.68 11.8 3.4
## FREE_THROWS_MADE 476 0.79 0.87 0.86 0.70 2.3 1.4
## FREE_THROWS_ATT 476 0.73 0.82 0.81 0.57 3.2 1.9
alpha(support)
##
## Reliability analysis
## Call: alpha(x = support)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.68 0.71 0.8 0.33 2.5 0.022 2.3 0.99
##
## lower alpha upper 95% confidence boundaries
## 0.64 0.68 0.73
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se
## FREE_THROWS_MADE 0.52 0.61 0.59 0.28 1.5 0.032
## FREE_THROWS_ATT 0.57 0.62 0.59 0.29 1.6 0.026
## ASSISTS 0.72 0.68 0.75 0.35 2.1 0.015
## STEALS 0.70 0.76 0.84 0.44 3.1 0.023
## TURNOVERS 0.62 0.64 0.75 0.31 1.8 0.027
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## FREE_THROWS_MADE 476 0.82 0.78 0.83 0.69 2.3 1.40
## FREE_THROWS_ATT 476 0.79 0.75 0.80 0.56 3.2 1.88
## ASSISTS 476 0.69 0.66 0.54 0.35 3.0 2.05
## STEALS 476 0.37 0.50 0.27 0.24 1.1 0.66
## TURNOVERS 476 0.69 0.72 0.61 0.57 2.0 0.92
The three factors are:
a. Rimprotection
b. Scoring
c. Support
We do not need to transform the variables as the data is not highly skewed. The variables are all scores by players which can not be increasing in scale of 100 or 1000. Also we have used per 36 minute standardization approach after which maximum value for the variables can be 20.
Scoring 10 points in a game for a player who played 35 minutes should not count the same as scoring 10 points in 8 minutes. It is expected that a if a player plays for a longer time, he will have more opportunities to score baskets, so it is not fair to not control for the time difference. To combat this, we standardized most of the variables on a per 36 minute scale.
We want rates for most of the variables, but not necessarily all the variables. For example, we would want a rate on how many steals a player made in a 36 min period, but we still want to know how many minutes a player played in a game and we should not standardize that. Additionally, some variables are in percentages like free throw success percentage etc.
We have dropped the records with 0 min played.
We have used three percentage variables: free throws made, goals made, three point shots made.
We have used player as the unit of analysis to have more granularity for doing factor analysis. We may consider team as the unit in future.
Thank you.