##Question 1 - ANOVA importing data
NFL <- read.csv("/Users/Lorraine/Desktop/Project 4 ANOVA.csv")
library(rmarkdown)
library(car)
## Loading required package: carData
library(compute.es)
library(ggplot2)
library(lme4)
## Loading required package: Matrix
## Registered S3 methods overwritten by 'lme4':
## method from
## cooks.distance.influence.merMod car
## influence.merMod car
## dfbeta.influence.merMod car
## dfbetas.influence.merMod car
library(multcomp)
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
## Loading required package: MASS
##
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
##
## geyser
library(pastecs)
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## The following object is masked from 'package:car':
##
## logit
library(sjstats)
##
## Attaching package: 'sjstats'
## The following objects are masked from 'package:psych':
##
## pca, phi
library(WRS2)
library(lessR)
##
## lessR 3.8.9 feedback: gerbing@pdx.edu web: lessRstats.com/new
## ---------------------------------------------------------------------
## 1. d <- Read("") Read text, Excel, SPSS, SAS or R data file
## d: default data frame, no need for data=
## 2. l <- Read("", var_labels=TRUE) Read variable labels into l,
## required name for data frame of labels
## 3. Help() Get help, and, e.g., Help(Read)
## 4. hs(), bc(), or ca() All histograms, all bar charts, or both
## 5. Plot(X) or Plot(X,Y) For continuous and categorical variables
## 6. by1= , by2= Trellis graphics, a plot for each by1, by2
## 7. reg(Y ~ X, Rmd="eg") Regression with full interpretative output
## 8. style("gray") Grayscale theme, + many others available
## style(show=TRUE) all color/style options and current values
## 9. getColors() create many styles of color palettes
##
## lessR parameter names now use _'s. Names with a period are deprecated.
## Ex: bin_width instead of bin.width
##
## Attaching package: 'lessR'
## The following object is masked from 'package:sjstats':
##
## prop
## The following objects are masked from 'package:psych':
##
## reflect, scree
## The following objects are masked from 'package:car':
##
## bc, Recode, sp
library(lm.beta)
library(QuantPsyc)
## Loading required package: boot
##
## Attaching package: 'boot'
## The following object is masked from 'package:psych':
##
## logit
## The following object is masked from 'package:survival':
##
## aml
## The following object is masked from 'package:car':
##
## logit
##
## Attaching package: 'QuantPsyc'
## The following object is masked from 'package:lm.beta':
##
## lm.beta
## The following object is masked from 'package:Matrix':
##
## norm
## The following object is masked from 'package:base':
##
## norm
checking for homogeinty of variance, the assmuption was met
leveneTest(NFL$Weight, NFL$Team, center = median)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 0.6939 0.5984
## 80
seperating 5 NFL teams in groups
Chicago <- NFL[c(1:17),1]
Philadelphia <- NFL[c(18:34),1]
Dallas <- NFL[c(35:51),1]
Denver <- NFL[c(52:68),1]
NewEngland <- NFL[c(69:85),1]
checking for the normality assumption, only Chicago did not meet the assumption
shapiro.test(Chicago)
##
## Shapiro-Wilk normality test
##
## data: Chicago
## W = 0.87711, p-value = 0.02855
shapiro.test(Philadelphia)
##
## Shapiro-Wilk normality test
##
## data: Philadelphia
## W = 0.9226, p-value = 0.1634
shapiro.test(Dallas)
##
## Shapiro-Wilk normality test
##
## data: Dallas
## W = 0.89936, p-value = 0.06627
shapiro.test(Denver)
##
## Shapiro-Wilk normality test
##
## data: Denver
## W = 0.89888, p-value = 0.06506
shapiro.test(NewEngland)
##
## Shapiro-Wilk normality test
##
## data: NewEngland
## W = 0.95771, p-value = 0.589
normalizing Chicago distribution, it is a small dataset so I only got rid of two outliers, which was enough to rectify the normality issue
boxplot(Chicago)
boxplot(Chicago,plot=FALSE)$out
## [1] 228 221 223 220
Chicagotrimmed <- Chicago[221<Chicago]
shapiro.test(Chicagotrimmed)
##
## Shapiro-Wilk normality test
##
## data: Chicagotrimmed
## W = 0.89142, p-value = 0.07046
creating a new dataset for normalized NFL data
NFLN <- data.frame(cbind(Chicagotrimmed, Philadelphia, Dallas, Denver, NewEngland))
Stack_NFLN <- stack(NFLN)
running ANOVA using normalized Chicago dataset and original data from other 4 teams
NFLModel <- aov(values ~ ind, data=Stack_NFLN)
summary(NFLModel)
## Df Sum Sq Mean Sq F value Pr(>F)
## ind 4 3403 850.7 3.487 0.0112
## Residuals 80 19516 244.0
because the p-value was less than 0.05, running Tukey test to sepcify difference(s)
postHocs <- glht(NFLModel, linfct = mcp(ind = "Tukey"))
summary(postHocs)
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: aov(formula = values ~ ind, data = Stack_NFLN)
##
## Linear Hypotheses:
## Estimate Std. Error t value Pr(>|t|)
## Philadelphia - Chicagotrimmed == 0 0.05882 5.35729 0.011 1.00000
## Dallas - Chicagotrimmed == 0 6.11765 5.35729 1.142 0.78378
## Denver - Chicagotrimmed == 0 -1.88235 5.35729 -0.351 0.99667
## NewEngland - Chicagotrimmed == 0 -13.23529 5.35729 -2.471 0.10777
## Dallas - Philadelphia == 0 6.05882 5.35729 1.131 0.78970
## Denver - Philadelphia == 0 -1.94118 5.35729 -0.362 0.99625
## NewEngland - Philadelphia == 0 -13.29412 5.35729 -2.482 0.10512
## Denver - Dallas == 0 -8.00000 5.35729 -1.493 0.56981
## NewEngland - Dallas == 0 -19.35294 5.35729 -3.612 0.00467
## NewEngland - Denver == 0 -11.35294 5.35729 -2.119 0.22218
## (Adjusted p values reported -- single-step method)
There is only one difference between New England and Dallas
t.test(NewEngland)
##
## One Sample t-test
##
## data: NewEngland
## t = 56.019, df = 16, p-value < 0.00000000000000022
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 228.7105 246.7013
## sample estimates:
## mean of x
## 237.7059
t.test(Dallas)
##
## One Sample t-test
##
## data: Dallas
## t = 73.947, df = 16, p-value < 0.00000000000000022
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 249.6895 264.4282
## sample estimates:
## mean of x
## 257.0588
A one-way analysis of variance was used to test for the differences among the average player weight across these 5 NFL teams, Chicago, Philadelphia, Dalas, Denver, and New England. Significant weight differences were observed between means of the five groups (F(4,80) = 3.487, p< .05). Tukey post hoc comparisons of the groups indicated no differences in all groups but between New England (M = 237.71, 95%CI [228.71, 246.70]) and Dallas (M = 257.06 , 95%CI [249.69, 264.43]). A comparison between the two groups revealed a significant difference in their weight(in lbs.).
##Question 2 - Correlations importing data and running correlations
NBA <- read.csv("/Users/Lorraine/Desktop/Project 4 MR.csv")
library(psych)
corr.test(NBA)
## Call:corr.test(x = NBA)
## Correlation matrix
## Height Weight PTS
## Height 1.00 0.83 -0.07
## Weight 0.83 1.00 -0.01
## PTS -0.07 -0.01 1.00
## Sample Size
## [1] 54
## Probability values (Entries above the diagonal are adjusted for multiple tests.)
## Height Weight PTS
## Height 0.00 0.00 1
## Weight 0.00 0.00 1
## PTS 0.62 0.94 0
##
## To see confidence intervals of the correlations, print with the short=FALSE option
A strong correlation between Weight and Height (r=0.83) was found, a weak correltaion was found between Height and PTS (r=-0.069), and the weakest correlation was between Weight and PTS (r=-0.0098). The only significant correlation was between Height and Weight (p<.05).
##Question 3 - Simple Regression running a simple regression analysis predicting points from height
reg <- lm(PTS ~ Height, data=NBA)
summary(reg)
##
## Call:
## lm(formula = PTS ~ Height, data = NBA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.713 -3.864 -1.395 1.644 15.444
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.6256 11.7428 1.501 0.139
## Height -0.8858 1.7785 -0.498 0.621
##
## Residual standard error: 5.942 on 52 degrees of freedom
## Multiple R-squared: 0.004748, Adjusted R-squared: -0.01439
## F-statistic: 0.2481 on 1 and 52 DF, p-value: 0.6205
The result was insignificant (F(1,52)= 0.2481, p>0.05). The percent of variance in points explained by height is 0.004748. Points tend to decrease as height increases: Y=17.6256-0.8858x
##Question 4 - Multiple Regression Run amultiple regression analysis predicting points from both height and weight
stat.desc(NBA)
## Height Weight PTS
## nbr.val 54.00000000 54.0000000 54.0000000
## nbr.null 0.00000000 0.0000000 0.0000000
## nbr.na 0.00000000 0.0000000 0.0000000
## min 5.70000000 105.0000000 2.8000000
## max 7.60000000 263.0000000 27.4000000
## range 1.90000000 158.0000000 24.6000000
## sum 355.70000000 11335.0000000 636.7000000
## median 6.65000000 212.5000000 10.7500000
## mean 6.58703704 209.9074074 11.7907407
## SE.mean 0.06244753 4.1185497 0.8027872
## CI.mean.0.95 0.12525389 8.2607646 1.6101872
## var 0.21058351 915.9723969 34.8012334
## std.dev 0.45889379 30.2650359 5.8992570
## coef.var 0.06966619 0.1441828 0.5003296
Mreg <- lm(PTS ~ Height + Weight, data=NBA)
summary(Mreg)
##
## Call:
## lm(formula = PTS ~ Height + Weight, data = NBA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.801 -3.630 -1.222 1.326 14.668
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.28743 13.99607 1.592 0.117
## Height -2.56738 3.24536 -0.791 0.433
## Weight 0.03056 0.04921 0.621 0.537
##
## Residual standard error: 5.977 on 51 degrees of freedom
## Multiple R-squared: 0.01222, Adjusted R-squared: -0.02652
## F-statistic: 0.3154 on 2 and 51 DF, p-value: 0.7309
coefficients(reg) # model coefficients
## (Intercept) Height
## 17.6256168 -0.8858119
confint(reg, level=0.95) # CIs for model parameters
## 2.5 % 97.5 %
## (Intercept) -5.938055 41.189288
## Height -4.454601 2.682978
anova(reg) # anova table
## Analysis of Variance Table
##
## Response: PTS
## Df Sum Sq Mean Sq F value Pr(>F)
## Height 1 8.76 8.758 0.2481 0.6205
## Residuals 52 1835.71 35.302
lm.beta(reg) # standardized (beta) weights
## Height
## -0.0689059
The results were both insignificant (F(2,51)= 0.3154,p>0.05). The percent of variance in points explained by height and weight is 0.01222. Points tend to decrease as height increases: Y=22.28743-2.56738x, and points tend to increase as weight increases: Y=22.28743+0.03056x.