Mod-7_Exercises

Practice Problems for Week Seven

1. Construct a scatter plot for each of the data sets below.

X = 5,8,7,1,5,4,1,9,6,4
Y = 4,5,7,2,6,6,3,8,5,3

X <- c(5,8,7,1,5,4,1,9,6,4)  
Y <- c(4,5,7,2,6,6,3,8,5,3)  

# Scatterplot
plot(X, Y, main="Scatterplot a)", 
    xlab="X Variable", ylab="Y Variable", pch=19)
# Add fit lines
abline(lm(Y~X), col="red") # regression line (y~x)

plot of chunk unnamed-chunk-1

X = 1.0,0.0,-.5,1.5,.5,-.5,.5,-1.0
Y = -2.0,.5,1.0,-1.0,0.0,0.0,-1.0,1.5

X <- c(1.0,0.0,-.5,1.5,.5,-.5,.5,-1.0)  
Y <- c(-2.0,.5,1.0,-1.0,0.0,0.0,-1.0,1.5)  

# Scatterplot
plot(X, Y, main="Scatterplot b)", 
    xlab="X Variable", ylab="Y Variable", pch=19)
# Add fit lines
abline(lm(Y~X), col="red") # regression line (y~x)

plot of chunk unnamed-chunk-2

X = 12,17,18,13,13,12,13,14,14,19,16,15
Y = 10,12,4,9,6,11,10,6,7,10,5,7

X <- c(12,17,18,13,13,12,13,14,14,19,16,15)  
Y <- c(10,12,4,9,6,11,10,6,7,10,5,7)  

# Scatterplot
plot(X, Y, main="Scatterplot c)", 
    xlab="X Variable", ylab="Y Variable", pch=19)
# Add fit lines
abline(lm(Y~X), col="red") # regression line (y~x)

plot of chunk unnamed-chunk-3

X = 90,83,89,89,87,90,84,77,74,86,73,83,85,89,79,76,70
Y = 1.4,3.0,1.6,3.4,2.5,3.5,1.4,1.3,3.5,1.5,3.0,2.7,1.7,2.5,2.3,3.2,1.4

X <- c(90,83,89,89,87,90,84,77,74,86,73,83,85,89,79,76,70)
Y <- c(1.4,3.0,1.6,3.4,2.5,3.5,1.4,1.3,3.5,1.5,3.0,2.7,1.7,2.5,2.3,3.2,1.4)

# Scatterplot
plot(X, Y, main="Scatterplot d)", 
    xlab="X Variable", ylab="Y Variable", pch=19)
# Add fit lines
abline(lm(Y~X), col="red") # regression line (y~x)

plot of chunk unnamed-chunk-4

X = 0,12,4,1,8,6,6,9,8,0,2,2,11,5
Y = 9,9,4,7,4,2,3,8,3,11,6,8,12,1

X <- c(0,12,4,1,8,6,6,9,8,0,2,2,11,5)
Y <- c(9,9,4,7,4,2,3,8,3,11,6,8,12,1)

# Scatterplot
plot(X, Y, main="Scatterplot e)", 
    xlab="X Variable", ylab="Y Variable", pch=19)
# Add fit lines
abline(lm(Y~X), col="red") # regression line (y~x)

plot of chunk unnamed-chunk-5

2. Identify the type of relationship observed in each of the scatter plots constructed in Problem 1.

Direct positive relationship
Inverse negative relationship
Inverse negative relationship
No relationship
Nonlinear relationship

3. Compute the correlation coefficient for each of the data sets in Exercise 1 using the computational formula.

\(r = (N\sum XY - \sum X \sum Y)/\sqrt{[N\sum X^{2} - (\sum X)^{2}][N\sum Y^{2} - (\sum Y)^{2}]}\)

# a)
X <- c(5,8,7,1,5,4,1,9,6,4)  
Y <- c(4,5,7,2,6,6,3,8,5,3)  
N <- length(X)
Na <- N

# compute correlation coefficient using class formula
r <- (N*sum(X*Y) - sum(X)*sum(Y))/sqrt((N*sum(X^2) - sum(X)^2)*(N*sum(Y^2) - sum(Y)^2))
r <- round(r,3)
r

a) [1] 0.806

ra <- r
# compute correlation coefficient using R cor()
r2 <- cor(X,Y,method="pearson")
r2 <- round(r2,3)
r2

a) [1] 0.806

correlational coefficient r = 0.806

# b)
X <- c(1.0,0.0,-.5,1.5,.5,-.5,.5,-1.0)  
Y <- c(-2.0,.5,1.0,-1.0,0.0,0.0,-1.0,1.5)  
N <- length(X)
Nb <- N

# compute correlation coefficient using class formula
r <- (N*sum(X*Y) - sum(X)*sum(Y))/sqrt((N*sum(X^2) - sum(X)^2)*(N*sum(Y^2) - sum(Y)^2))
r <- round(r,3)
r

b) [1] -0.852

rb <- r
# compute correlation coefficient using R cor()
r2 <- cor(X,Y,method="pearson")
r2 <- round(r2,3)
r2

b) [1] -0.852

correlational coefficient r = -0.852

# c)
X <- c(12,17,18,13,13,12,13,14,14,19,16,15)  
Y <- c(10,12,4,9,6,11,10,6,7,10,5,7)  
N <- length(X)
Nc <- N

# compute correlation coefficient using class formula
r <- (N*sum(X*Y) - sum(X)*sum(Y))/sqrt((N*sum(X^2) - sum(X)^2)*(N*sum(Y^2) - sum(Y)^2))
r <- round(r,3)
r

c) [1] -0.19

rc <- r
# compute correlation coefficient using R cor()
r2 <- cor(X,Y,method="pearson")
r2 <- round(r2,3)
r2

c) [1] -0.19

correlational coefficient r = -0.19

# d)
X <- c(90,83,89,89,87,90,84,77,74,86,73,83,85,89,79,76,70)
Y <- c(1.4,3.0,1.6,3.4,2.5,3.5,1.4,1.3,3.5,1.5,3.0,2.7,1.7,2.5,2.3,3.2,1.4)
N <- length(X)
Nd <- N

# compute correlation coefficient using class formula
r <- (N*sum(X*Y) - sum(X)*sum(Y))/sqrt((N*sum(X^2) - sum(X)^2)*(N*sum(Y^2) - sum(Y)^2))
r <- round(r,3)
r

d) [1] -0.047

rd <- r
# compute correlation coefficient using R cor()
r2 <- cor(X,Y,method="pearson")
r2 <- round(r2,3)
r2

d) [1] -0.047

correlational coefficient r = -0.047

# e)
X <- c(0,12,4,1,8,6,6,9,8,0,2,2,11,5)
Y <- c(9,9,4,7,4,2,3,8,3,11,6,8,12,1)
N <- length(X)
Ne <- N

# compute correlation coefficient using class formula
r <- (N*sum(X*Y) - sum(X)*sum(Y))/sqrt((N*sum(X^2) - sum(X)^2)*(N*sum(Y^2) - sum(Y)^2))
r <- round(r,3)
r

e) [1] -0.033

re <- r
# compute correlation coefficient using R cor()
r2 <- cor(X,Y,method="pearson")
r2 <- round(r2,3)
r2

e) [1] -0.033

correlational coefficient r = -0.033

4. Employ the hypothesis-testing procedure using critical values of r to test the significance of each r computed for data sets (a), (b), (c), and (d) in Exercise 1. Set α = .05 and use a two-tailed test.

r = 0.806, N = 10, r* = .632

H0: \(\rho = 0\)
HA: \(\rho <> 0\)
Set \(\alpha = .05\), N= 10, r* = .632
r = .806
|.806| > .632, reject H0 and conclude that population correlation coefficient is different from 0, there is a relationship

r = -0.852, N = 8

H0: \(\rho\) = 0
HA: \(\rho\) <> 0
Set \(\alpha\) = .05, N= 8, r* = .707
r = -.852
|-.852| > .707, reject H0 and conclude that population correlation coefficient is different from 0, there is a relationship

r = -0.19, N = 12

H0: \(\rho\) = 0
HA: \(\rho\) <> 0
Set \(\alpha\) = .05, N= 12, r* = .576
r = -.19
|-.19| < .576, retain H0 and conclude that there is not sufficient evidence to say population correlation coefficient is different from 0, no evidence of a relationship

r = -0.047, N = 17

H0: \(\rho\) = 0
HA: \(\rho\) <> 0
Set \(\alpha\) = .05, N= 17, r* = .482
r = -.047
|-.047| < .482, retain H0 and conclude that there is not sufficient evidence to say population correlation coefficient is different from 0, there is no evidence of a relationship

r = -0.033, N = 14

H0: \(\rho\) = 0
HA: \(\rho\) <> 0
Set \(\alpha\) = .05, N= 14, r* = .532
r = -.033
|-.033| < .532, accept H0 and conclude that there there is not sufficient evidence to say population correlation coefficient is different from 0, is no evidence of a relationship

5. You conducted a study and found that the correlation coefficient between two variables based on a set of 30 pairs of scores is .46.

Setting α = .01 and using a two-tailed test of significance, what can you conclude about the relationship?
Using the same alpha level (α = .01) and a two-tailed test,, what would be the effect of having a larger sample size than 30 on the significance of an r = .46? Describe the general effect of larger sample sizes on the significance of r.

6. A friend of yours describing the results of a study tells you that the correlation coefficient between the two variables of interest was 1.62.

Comment on your friend’s statement.

7. You predict that the average number of hours spent viewing TV each evening after 6:00 P.M. is inversely related to educational level. You select 12 adults at random and determine their years of education and the meannumber of hours spent watching TV per evening. You obtain the following scores:

X - Years of education
Y - Hours watching TV
P - Participant

P	X	Y
S1	16	1.0
S2	12	2.9
S3	12	2.0
S4	15	1.5
S5	12	1.4
S6	10	2.6
S7	12	1.6
S8	16	1.2
S9	13	1.5
S10	11	2.5
S11	9	2.4
S12	15	.8

X <- c(16,12,12,15,12,10,12,16,13,11,9,15)
Y <- c(1,2.9,2,1.5,1.4,2.6,1.6,1.2,1.5,2.5,2.4,0.8)
N <- length(X)

Construct a scatter plot of the data to determine whether it is reasonable to assume that the relationship between the two variables is approximately linear

X <- c(5,8,7,1,5,4,1,9,6,4)  
Y <- c(4,5,7,2,6,6,3,8,5,3)  

# Scatterplot
plot(X, Y, main="Scatterplot a)", 
    xlab="X Variable", ylab="Y Variable", pch=19)
# Add fit lines
abline(lm(Y~X), col="red") # regression line (y~x)

plot of chunk unnamed-chunk-12

It appears that there is a direct positive relationship between the two variables

Compute the correlation coefficient using the raw-score computational formula.

\(r = (N\sum XY - \sum X \sum Y)/\sqrt{[N\sum X^{2} - (\sum X)^{2}][N\sum Y^{2} - (\sum Y)^{2}]}\)

# compute correlation coefficient using class formula
r <- (N*sum(X*Y) - sum(X)*sum(Y))/sqrt((N*sum(X^2) - sum(X)^2)*(N*sum(Y^2) - sum(Y)^2))
r <- round(r,3)
r

8b) [1] 0.887

correlation coefficient r = 0.887

Setting α = .05, complete the hypothesis-testing procedure using critical values of r. What can you conclude about the prediction that TV viewing is inversely related to educational level?
Put your results in journal form.

8. Describe the effect on the correlation coefficient of restricting the range of scores sampled along one or both variables.

9. Answer the following:

Assume that the amount of cigarettes smoked per day and the incidence of respiratory diseases such as emphysema are found to be significantly related, with r = .8. Is this significant correlation coefficient to be interpreted that cigarette smoking causes respiratory disease? Comment.
Assume that the amount of cigarettes smoked per day and “assertiveness” are found to be significantly related, with r = .4. Is the r = .8 between cigarette smoking and respiratory disease twice as strong a relationship as the r = .4 between cigarette smoking and assertiveness? Comment.