Homework 9

library(readr)
library(ggplot2)
ex9_25 <- read_csv("/Users/davidmontalvo/Desktop/ex9-25.csv")

## New names:
## * `` -> ...3
## * `` -> ...4

## Rows: 26 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): rating
## dbl (1): Price$
## lgl (2): ...3, ...4
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

echo=TRUE

9.25

str(ex9_25)

## spec_tbl_df [26 × 4] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Price$: num [1:26] 100 100 60 135 195 195 125 135 95 42 ...
##  $ rating: chr [1:26] ">=93" ">=93" ">=93" ">=93" ...
##  $ ...3  : logi [1:26] NA NA NA NA NA NA ...
##  $ ...4  : logi [1:26] NA NA NA NA NA NA ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   `Price$` = col_double(),
##   ..   rating = col_character(),
##   ..   ...3 = col_logical(),
##   ..   ...4 = col_logical()
##   .. )
##  - attr(*, "problems")=<externalptr>

qqnorm(ex9_25$`Price$`)+stat_qq_line()+stat_qq()

## NULL

ggplot(ex9_25) + geom_boxplot(aes(y = `Price$`)) +
  facet_wrap(~rating)

## The variables prove to have normal distribution based on their linear distribution

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

H <- ex9_25 %>% filter(rating == ">=93") %>% select("Price$")
P <- ex9_25 %>% filter(rating == "<=89") %>% select("Price$")

t.test(x = H$`Price$`, y = P$`Price$`, alternative = c("two.sided"))

## 
##  Welch Two Sample t-test
## 
## data:  H$`Price$` and P$`Price$`
## t = 3.1744, df = 15.426, p-value = 0.006106
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  16.19010 81.88133
## sample estimates:
## mean of x mean of y 
## 110.75000  61.71429

#Sample mean: X¯=110.75,Y¯=61.71429

#Two sided confidence interval: P(16.19010<μ1−μ2<81.88133)=0.95

#Degree of freedom: 15.426.

#Test statistic: t=3.1744

#P-value: 0.006106.

#Conclusion: at α=0.05, fail to reject the null hypothesis. The two groups are statistically significant.

9.28

library(dplyr)
d9_28<-read_csv("~/Downloads/ex9-28.csv")

## Rows: 15 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Age
## dbl (1): Angle of Lean
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

str(d9_28)

## spec_tbl_df [15 × 2] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Age          : chr [1:15] "YF" "YF" "YF" "YF" ...
##  $ Angle of Lean: num [1:15] 29 34 33 27 28 32 31 34 32 27 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Age = col_character(),
##   ..   `Angle of Lean` = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

Y <- d9_28 %>% filter(Age == "YF") %>% select("Angle of Lean")
O <- d9_28 %>% filter(Age == "OF") %>% select("Angle of Lean")

t.test(x = Y$`Angle of Lean`, y = O$`Angle of Lean`, mu=10, alternative = "greater", var.equal = TRUE)

## 
##  Two Sample t-test
## 
## data:  Y$`Angle of Lean` and O$`Angle of Lean`
## t = 2.4441, df = 13, p-value = 0.01477
## alternative hypothesis: true difference in means is greater than 10
## 95 percent confidence interval:
##  11.23937      Inf
## sample estimates:
## mean of x mean of y 
##      30.7      16.2

#We reject the null hypothesis since p-value<alpha. Thus it is concluded that there is enough evidence to conclude the true average maximum angle for older females is more than 10 degrees smaller than it is for younger females.

9.38

library(ggplot2)
d9_38<-read_csv("~/Downloads/ex9-38.csv")

## Rows: 24 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (3): Subject, Slide, Digital
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

str(d9_38)

## spec_tbl_df [24 × 3] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Subject: num [1:24] 1 2 3 4 5 6 7 8 9 10 ...
##  $ Slide  : num [1:24] 30 35 40 25 20 30 35 62 40 51 ...
##  $ Digital: num [1:24] 25 16 15 15 10 20 7 16 15 13 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Subject = col_double(),
##   ..   Slide = col_double(),
##   ..   Digital = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

ggplot(d9_38) + geom_boxplot(aes(y=`Slide`))

## Warning: Removed 11 rows containing non-finite values (stat_boxplot).

ggplot(d9_38) + geom_boxplot(aes(y=`Digital`))

## Warning: Removed 11 rows containing non-finite values (stat_boxplot).

#As it can be observed, the Slide boxplot proves to have a mean value of 35 and also contains an outlier value at 62. On the other hand, the Digital boxplot has a mean value of 15 with no outliers, yet demonstrating how both differ as they have different means and also showing how they do not have a symmetrical distribution.

t.test(y = d9_38$`Digital`, x = d9_38$`Slide`, alternative = "two.sided")

## 
##  Welch Two Sample t-test
## 
## data:  d9_38$Slide and d9_38$Digital
## t = 6.0209, df = 15.99, p-value = 1.785e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  13.30673 27.77019
## sample estimates:
## mean of x mean of y 
##  36.00000  15.46154

#Since the p-value proves to be smaller than alpha on a 95% confidence level, the null hypothesis is rejected. This leads to the conclusion that the true average times of both Slide and Digtal are not identical The mean of Slide is 15.46 as expected from the boxplot diagram and the Digital mean is 36, close to the approximation that could be seen above in the boxplot.

9.40

d9_40<-read_csv("~/Downloads/ex9-40.csv")

## Rows: 20 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Period
## dbl (2): TBBMC, Subject
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

str(d9_40)

## spec_tbl_df [20 × 3] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ TBBMC  : num [1:20] 1928 2549 2825 1924 1628 ...
##  $ Period : chr [1:20] "L" "L" "L" "L" ...
##  $ Subject: num [1:20] 1 2 3 4 5 6 7 8 9 10 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   TBBMC = col_double(),
##   ..   Period = col_character(),
##   ..   Subject = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

P<- d9_40 %>% filter(Period == "P") %>% select("TBBMC")
L<- d9_40 %>% filter(Period == "L") %>% select("TBBMC")

t.test(P, mu=25)

## 
##  One Sample t-test
## 
## data:  P
## t = 17.873, df = 9, p-value = 2.442e-08
## alternative hypothesis: true mean is not equal to 25
## 95 percent confidence interval:
##  2029.962 2611.038
## sample estimates:
## mean of x 
##    2320.5

#Since the p-value is less than 0.05 we reject the null hypothesis and conclude that the true average total body bone mineral during postweaning exceeds that during lactation by more than 25g.

P <- d9_40 %>% filter(`Period` == "P") %>% select("TBBMC")
L <- d9_40 %>% filter(`Period` == "L") %>% select("TBBMC")


t.test(y = P$`TBBMC`, x = L$`TBBMC`, alternative = "two.sided")

## 
##  Welch Two Sample t-test
## 
## data:  L$TBBMC and P$TBBMC
## t = -0.58872, df = 17.99, p-value = 0.5634
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -482.9168  271.5168
## sample estimates:
## mean of x mean of y 
##    2214.8    2320.5

#It does not lead to the same conclusion, as the p-value is greater than 0.05, we fail to reject the null hypothesis thus concluding that the true average total body bone mineral during postweaning is equal to 25g.

9.43 #a) Since the plot shows somewhat of a straight line it does not generate doubts about the normality of the data.

d9_43<-read_csv("~/Downloads/ex9-43.csv")

## Rows: 15 Columns: 1
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (1): difference
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

str(d9_43)

## spec_tbl_df [15 × 1] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ difference: num [1:15] -24 -12 -55 -15 -30 -60 -14 -21 -48 -12 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   difference = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

t.test(d9_43)

## 
##  One Sample t-test
## 
## data:  d9_43
## t = -6.4497, df = 14, p-value = 1.523e-05
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -51.436 -25.764
## sample estimates:
## mean of x 
##     -38.6

#b) -45.0497. We can be highly confident that at a 95% CI the true population mean difference is greater than -45.0497 #c) the upper bound for 95% CI would be 49.1

9.84

d9_84<-read_csv("~/Downloads/ex9-84.csv")

## Rows: 14 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): type
## dbl (2): energy, player
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

str(d9_84)

## spec_tbl_df [14 × 3] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ energy: num [1:14] 14.4 12.1 14.3 14.2 15.2 15.5 17.8 14.6 9.2 11.8 ...
##  $ type  : chr [1:14] "expend" "expend" "expend" "expend" ...
##  $ player: num [1:14] 1 2 3 4 5 6 7 1 2 3 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   energy = col_double(),
##   ..   type = col_character(),
##   ..   player = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

expend <- d9_84 %>% filter(`type` == "expend") %>% select("energy")
intake <- d9_84 %>% filter(`type` == "intake") %>% select("energy")


intakeE<-intake$energy
expendE<-expend$energy

t.test(y = intake$`energy`, x = expend$`energy`, alternative = "two.sided", conf.level = 0.99)

## 
##  Welch Two Sample t-test
## 
## data:  expend$energy and intake$energy
## t = 1.563, df = 10.803, p-value = 0.1469
## alternative hypothesis: true difference in means is not equal to 0
## 99 percent confidence interval:
##  -1.747275  5.261561
## sample estimates:
## mean of x mean of y 
##  14.78571  13.02857

#We reject the null hypothesis at all levels, there is no difference regarding a different significance level.

10.6

d10_06<-read_csv("~/Downloads/ex10-06.csv")

## Rows: 40 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (2): FE, Formation
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

str(d10_06)

## spec_tbl_df [40 × 2] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ FE       : num [1:40] 20.5 28.1 27.8 27 28 25.2 25.3 27.1 20.5 31.3 ...
##  $ Formation: num [1:40] 1 1 1 1 1 1 1 1 1 1 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   FE = col_double(),
##   ..   Formation = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

d10_06$Formation<-as.numeric(d10_06$Formation)

lm1<-aov(FE~factor(Formation), data=d10_06)
summary(lm1)

##                   Df Sum Sq Mean Sq F value  Pr(>F)    
## factor(Formation)  3  509.1  169.71   10.85 3.2e-05 ***
## Residuals         36  563.1   15.64                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#Since the p-value drawn from the ANOVA model is smaller than the significance level at 0.01, we reject the null hypothesis and conclude that there is a significant difference between all the four types of iron formation.

10.08

d10_08<-read_csv("~/Downloads/ex10-08.csv")

## Rows: 35 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (2): stiffness, lengths
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

str(d10_08)

## spec_tbl_df [35 × 2] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ stiffness: num [1:35] 309 410 311 326 317 ...
##  $ lengths  : num [1:35] 4 4 4 4 4 4 4 6 6 6 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   stiffness = col_double(),
##   ..   lengths = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

lm2<-aov(stiffness~factor(lengths), data=d10_08)
summary(lm2)

##                 Df Sum Sq Mean Sq F value   Pr(>F)    
## factor(lengths)  4  43993   10998   10.48 1.96e-05 ***
## Residuals       30  31475    1049                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#Since the p-value is smaller than the significance level at all levels, we reject the null hypothesis and conclude that the variation in plate length do have an effect on true average axial stiffness.