Write a script in R to run correlation and multiple regression analyses. Use the data in “DAA.03.txt”. The file contains
fictional data from 245 adults. The THREE (3) variables of interest are (1) physical endurance, (2) age, and (3) number of years
engaged in an active sport.
From your R output, report the TEN (10) values listed in the table below. Round to TWO (2) significant digits (for example, if
the correlation is .456 then write .46).
library(psych)
library(ltm)
## Loading required package: MASS
## Loading required package: msm
## Loading required package: mvtnorm
## Loading required package: polycor
## Loading required package: sfsmisc
## Attaching package: 'polycor'
## The following object(s) are masked from 'package:psych':
##
## polyserial
## Attaching package: 'ltm'
## The following object(s) are masked from 'package:psych':
##
## factor.scores
library(gclus)
## Loading required package: cluster
#
# |------------------------------------------------------------------------------------------|
# | I N I T I A L I Z A T I O N |
# |------------------------------------------------------------------------------------------|
Init <- function(fileStr, workDirStr = "C:/Users/denbrige/100 FxOption/103 FxOptionVerBack/080 Fx Git/R-nonsource") {
setwd(workDirStr)
retDfr <- read.table(fileStr, header = T)
return(retDfr)
}
#
# |------------------------------------------------------------------------------------------|
# | M A I N P R O C E D U R E |
# |------------------------------------------------------------------------------------------|
# --- Init loading raw data
rawDfr <- Init("DAA.03.txt")
# --- Count of raw data
nrow(rawDfr)
## [1] 245
# --- Names of header
names(rawDfr)
## [1] "pid" "age" "activeyears" "endurance"
# --- Remove column 1
rawDfr <- rawDfr[, 2:4]
# --- Layout
layout(matrix(1:6, 3, 2, byrow = TRUE))
# --- Custom Plot
for (nameStr in names(rawDfr)) {
# --- Check for normality p>0.05 is normal
p <- shapiro.test(rawDfr[[nameStr]])$p.value
hist(rawDfr[[nameStr]], prob = T, main = c(paste("RAW", nameStr), paste("Shapiro p=",
prettyNum(p, digits = 2))), xlab = nameStr)
lines(density(rawDfr[[nameStr]]))
# --- Check for normality
qqnorm(rawDfr[[nameStr]])
}
describe(rawDfr)
## var n mean sd median trimmed mad min max range skew
## age 1 245 49.18 10.11 48 49.11 10.38 20 82 62 0.15
## activeyears 2 245 10.67 4.78 11 10.56 4.45 0 26 26 0.27
## endurance 3 245 26.53 10.82 27 26.39 10.38 0 55 55 0.11
## kurtosis se
## age -0.08 0.65
## activeyears 0.23 0.31
## endurance -0.30 0.69
# --- Scatterplot and Correlation Analysis (library gclus and ltm)
# Scatterplot
cpairs(rawDfr, panel.colors = dmat.color(abs(cor(rawDfr))), gap = 0.5, main = "RAW Variables Ordered and Colored by Correlations")
# --- Correlation matrix
cor(rawDfr)
## age activeyears endurance
## age 1.0000 0.2827 -0.1259
## activeyears 0.2827 1.0000 0.3365
## endurance -0.1259 0.3365 1.0000
# --- Perform correlation test for matrix (library ltm) Correlation null
# hypothesis is that the correlation is zero (not correlated) If the
# p-value is less than the alpha level, then the null hypothesis is
# rejected Check for correlation p<0.05 is correlated
rcor.test(rawDfr)
##
## age activeyears endurance
## age ***** 0.283 -0.126
## activeyears <0.001 ***** 0.337
## endurance 0.049 <0.001 *****
##
## upper diagonal part contains correlation coefficient estimates
## lower diagonal part contains corresponding p-values
# --- Simple Regression (unstandardized) Y = endurance; X = age;
raw1Lm <- lm(rawDfr$endurance ~ rawDfr$age)
summary(raw1Lm)
##
## Call:
## lm(formula = rawDfr$endurance ~ rawDfr$age)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.073 -7.633 0.097 6.771 30.870
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33.1567 3.4203 9.69 <2e-16 ***
## rawDfr$age -0.1347 0.0681 -1.98 0.049 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.8 on 243 degrees of freedom
## Multiple R-squared: 0.0158, Adjusted R-squared: 0.0118
## F-statistic: 3.91 on 1 and 243 DF, p-value: 0.0491
# --- Simple Regression (standardized)
sraw1Lm <- lm(scale(rawDfr$endurance) ~ scale(rawDfr$age))
summary(sraw1Lm)
##
## Call:
## lm(formula = scale(rawDfr$endurance) ~ scale(rawDfr$age))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.317 -0.706 0.009 0.626 2.853
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.98e-17 6.35e-02 0.00 1.000
## scale(rawDfr$age) -1.26e-01 6.36e-02 -1.98 0.049 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.994 on 243 degrees of freedom
## Multiple R-squared: 0.0158, Adjusted R-squared: 0.0118
## F-statistic: 3.91 on 1 and 243 DF, p-value: 0.0491
# --- Simple Regression (unstandardized) Y = endurance; X = activeyears;
raw2Lm <- lm(rawDfr$endurance ~ rawDfr$activeyears)
summary(raw2Lm)
##
## Call:
## lm(formula = rawDfr$endurance ~ rawDfr$activeyears)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.730 -7.067 0.558 5.745 31.083
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.392 1.600 11.50 < 2e-16 ***
## rawDfr$activeyears 0.762 0.137 5.57 6.7e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.2 on 243 degrees of freedom
## Multiple R-squared: 0.113, Adjusted R-squared: 0.11
## F-statistic: 31 on 1 and 243 DF, p-value: 6.7e-08
# --- Simple Regression (standardized)
sraw2Lm <- lm(scale(rawDfr$endurance) ~ scale(rawDfr$activeyears))
summary(sraw2Lm)
##
## Call:
## lm(formula = scale(rawDfr$endurance) ~ scale(rawDfr$activeyears))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.1933 -0.6532 0.0516 0.5310 2.8730
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.67e-17 6.03e-02 0.00 1
## scale(rawDfr$activeyears) 3.37e-01 6.04e-02 5.57 6.7e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.944 on 243 degrees of freedom
## Multiple R-squared: 0.113, Adjusted R-squared: 0.11
## F-statistic: 31 on 1 and 243 DF, p-value: 6.7e-08
# --- Multiple Regression (unstandardized) Y = endurance; X1 = age; X2 =
# activeyears;
raw3Lm <- lm(rawDfr$endurance ~ rawDfr$age + rawDfr$activeyears)
summary(raw3Lm)
##
## Call:
## lm(formula = rawDfr$endurance ~ rawDfr$age + rawDfr$activeyears)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21.80 -6.90 0.57 5.63 27.23
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 29.3952 3.2054 9.17 < 2e-16 ***
## rawDfr$age -0.2571 0.0655 -3.93 0.00011 ***
## rawDfr$activeyears 0.9163 0.1386 6.61 2.4e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.92 on 242 degrees of freedom
## Multiple R-squared: 0.166, Adjusted R-squared: 0.159
## F-statistic: 24.1 on 2 and 242 DF, p-value: 2.75e-10
# --- Multiple Regression (standardized)
sraw3Lm <- lm(scale(rawDfr$endurance) ~ scale(rawDfr$age) + scale(rawDfr$activeyears))
summary(sraw3Lm)
##
## Call:
## lm(formula = scale(rawDfr$endurance) ~ scale(rawDfr$age) + scale(rawDfr$activeyears))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.0149 -0.6381 0.0527 0.5206 2.5166
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.40e-17 5.86e-02 0.00 1.00000
## scale(rawDfr$age) -2.40e-01 6.12e-02 -3.93 0.00011 ***
## scale(rawDfr$activeyears) 4.04e-01 6.12e-02 6.61 2.4e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.917 on 242 degrees of freedom
## Multiple R-squared: 0.166, Adjusted R-squared: 0.159
## F-statistic: 24.1 on 2 and 242 DF, p-value: 2.75e-10
#
# |------------------------------------------------------------------------------------------|
# | E N D O F S C R I P T |
# |------------------------------------------------------------------------------------------|