X <- c(5,8,7,1,5,4,1,9,6,4)
Y <- c(4,5,7,2,6,6,3,8,5,3)
# Scatterplot
plot(X, Y, main="Scatterplot a)",
xlab="X Variable", ylab="Y Variable", pch=19)
# Add fit lines
abline(lm(Y~X), col="red") # regression line (y~x)
X <- c(1.0,0.0,-.5,1.5,.5,-.5,.5,-1.0)
Y <- c(-2.0,.5,1.0,-1.0,0.0,0.0,-1.0,1.5)
# Scatterplot
plot(X, Y, main="Scatterplot b)",
xlab="X Variable", ylab="Y Variable", pch=19)
# Add fit lines
abline(lm(Y~X), col="red") # regression line (y~x)
X <- c(12,17,18,13,13,12,13,14,14,19,16,15)
Y <- c(10,12,4,9,6,11,10,6,7,10,5,7)
# Scatterplot
plot(X, Y, main="Scatterplot c)",
xlab="X Variable", ylab="Y Variable", pch=19)
# Add fit lines
abline(lm(Y~X), col="red") # regression line (y~x)
X <- c(90,83,89,89,87,90,84,77,74,86,73,83,85,89,79,76,70)
Y <- c(1.4,3.0,1.6,3.4,2.5,3.5,1.4,1.3,3.5,1.5,3.0,2.7,1.7,2.5,2.3,3.2,1.4)
# Scatterplot
plot(X, Y, main="Scatterplot d)",
xlab="X Variable", ylab="Y Variable", pch=19)
# Add fit lines
abline(lm(Y~X), col="red") # regression line (y~x)
X <- c(0,12,4,1,8,6,6,9,8,0,2,2,11,5)
Y <- c(9,9,4,7,4,2,3,8,3,11,6,8,12,1)
# Scatterplot
plot(X, Y, main="Scatterplot e)",
xlab="X Variable", ylab="Y Variable", pch=19)
# Add fit lines
abline(lm(Y~X), col="red") # regression line (y~x)
Direct positive relationship
Inverse negative relationship
Inverse negative relationship
No relationship
Nonlinear relationship
\(r = (N\sum XY - \sum X \sum Y)/\sqrt{[N\sum X^{2} - (\sum X)^{2}][N\sum Y^{2} - (\sum Y)^{2}]}\)
# a)
X <- c(5,8,7,1,5,4,1,9,6,4)
Y <- c(4,5,7,2,6,6,3,8,5,3)
N <- length(X)
Na <- N
# compute correlation coefficient using class formula
r <- (N*sum(X*Y) - sum(X)*sum(Y))/sqrt((N*sum(X^2) - sum(X)^2)*(N*sum(Y^2) - sum(Y)^2))
r <- round(r,3)
r
a) [1] 0.806
ra <- r
# compute correlation coefficient using R cor()
r2 <- cor(X,Y,method="pearson")
r2 <- round(r2,3)
r2
a) [1] 0.806
# b)
X <- c(1.0,0.0,-.5,1.5,.5,-.5,.5,-1.0)
Y <- c(-2.0,.5,1.0,-1.0,0.0,0.0,-1.0,1.5)
N <- length(X)
Nb <- N
# compute correlation coefficient using class formula
r <- (N*sum(X*Y) - sum(X)*sum(Y))/sqrt((N*sum(X^2) - sum(X)^2)*(N*sum(Y^2) - sum(Y)^2))
r <- round(r,3)
r
b) [1] -0.852
rb <- r
# compute correlation coefficient using R cor()
r2 <- cor(X,Y,method="pearson")
r2 <- round(r2,3)
r2
b) [1] -0.852
# c)
X <- c(12,17,18,13,13,12,13,14,14,19,16,15)
Y <- c(10,12,4,9,6,11,10,6,7,10,5,7)
N <- length(X)
Nc <- N
# compute correlation coefficient using class formula
r <- (N*sum(X*Y) - sum(X)*sum(Y))/sqrt((N*sum(X^2) - sum(X)^2)*(N*sum(Y^2) - sum(Y)^2))
r <- round(r,3)
r
c) [1] -0.19
rc <- r
# compute correlation coefficient using R cor()
r2 <- cor(X,Y,method="pearson")
r2 <- round(r2,3)
r2
c) [1] -0.19
# d)
X <- c(90,83,89,89,87,90,84,77,74,86,73,83,85,89,79,76,70)
Y <- c(1.4,3.0,1.6,3.4,2.5,3.5,1.4,1.3,3.5,1.5,3.0,2.7,1.7,2.5,2.3,3.2,1.4)
N <- length(X)
Nd <- N
# compute correlation coefficient using class formula
r <- (N*sum(X*Y) - sum(X)*sum(Y))/sqrt((N*sum(X^2) - sum(X)^2)*(N*sum(Y^2) - sum(Y)^2))
r <- round(r,3)
r
d) [1] -0.047
rd <- r
# compute correlation coefficient using R cor()
r2 <- cor(X,Y,method="pearson")
r2 <- round(r2,3)
r2
d) [1] -0.047
# e)
X <- c(0,12,4,1,8,6,6,9,8,0,2,2,11,5)
Y <- c(9,9,4,7,4,2,3,8,3,11,6,8,12,1)
N <- length(X)
Ne <- N
# compute correlation coefficient using class formula
r <- (N*sum(X*Y) - sum(X)*sum(Y))/sqrt((N*sum(X^2) - sum(X)^2)*(N*sum(Y^2) - sum(Y)^2))
r <- round(r,3)
r
e) [1] -0.033
re <- r
# compute correlation coefficient using R cor()
r2 <- cor(X,Y,method="pearson")
r2 <- round(r2,3)
r2
e) [1] -0.033
H0: \(\rho = 0\)
HA: \(\rho <> 0\)
Set \(\alpha = .05\), N= 10, r* = .632
r = .806
|.806| > .632, reject H0 and conclude that population correlation coefficient is different from 0, there is a relationship
H0: \(\rho\) = 0
HA: \(\rho\) <> 0
Set \(\alpha\) = .05, N= 8, r* = .707
r = -.852
|-.852| > .707, reject H0 and conclude that population correlation coefficient is different from 0, there is a relationship
H0: \(\rho\) = 0
HA: \(\rho\) <> 0
Set \(\alpha\) = .05, N= 12, r* = .576
r = -.19
|-.19| < .576, retain H0 and conclude that there is not sufficient evidence to say population correlation coefficient is different from 0, no evidence of a relationship
H0: \(\rho\) = 0
HA: \(\rho\) <> 0
Set \(\alpha\) = .05, N= 17, r* = .482
r = -.047
|-.047| < .482, retain H0 and conclude that there is not sufficient evidence to say population correlation coefficient is different from 0, there is no evidence of a relationship
H0: \(\rho\) = 0
HA: \(\rho\) <> 0
Set \(\alpha\) = .05, N= 14, r* = .532
r = -.033
|-.033| < .532, accept H0 and conclude that there there is not sufficient evidence to say population correlation coefficient is different from 0, is no evidence of a relationship
Setting α = .01 and using a two-tailed test of significance, what can you conclude about the relationship?
Using the same alpha level (α = .01) and a two-tailed test,, what would be the effect of having a larger sample size than 30 on the significance of an r = .46? Describe the general effect of larger sample sizes on the significance of r.
Comment on your friend’s statement.
Assume that the amount of cigarettes smoked per day and the incidence of respiratory diseases such as emphysema are found to be significantly related, with r = .8. Is this significant correlation coefficient to be interpreted that cigarette smoking causes respiratory disease? Comment.
Assume that the amount of cigarettes smoked per day and “assertiveness” are found to be significantly related, with r = .4. Is the r = .8 between cigarette smoking and respiratory disease twice as strong a relationship as the r = .4 between cigarette smoking and assertiveness? Comment.