7.30

t = 2.24
pval <- pt(t, df=21, lower.tail=FALSE)

pval
## [1] 0.01801848

7.32

# Question 7.32 

fuel = c(41.5, 50.7,    36.6,   37.3,   34.2,   45.0,   48.0,   43.2,   47.7, 42.2, 43.2,   44.6,   48.4,   46.4,   46.8,   39.2,   37.3,   43.5,   44.3,   43.3)

mean(fuel)
## [1] 43.17
hist(fuel, breaks = 15)

qqnorm(fuel); qqline(fuel, col = 2,lwd=2,lty=2)

mean(fuel)
## [1] 43.17
sd(fuel)
## [1] 4.414939
std = function(x) sd(x)/sqrt(length(x))
std(fuel)
## [1] 0.9872103
(43.17-0)/(4.41/(sqrt(20)))
## [1] 43.77826
mean = mean(fuel)

sd = sd(fuel)

n = 20

error = qnorm(0.975)*sd/sqrt(n)

left = mean - error 
right = mean + error
print(c(left, right))
## [1] 41.2351 45.1049

7.40

# question 7.40 
# 
# md = c(160, 250, 0, 100, -50, -55, 150, 220, 125, 240)
# 
# mean(md)
# 
# sd(md)
# hist(md, breaks = 15)
# qqnorm(md); qqline(md, col = 2,lwd=2,lty=2)
car = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
jockos = c(1410, 1550, 1250, 1300, 900, 1520, 1750, 3600, 2250, 2840)
other = c(1250, 1300, 1250, 1200, 950, 1575, 1600, 3380, 2125, 2600)

garage.data = data.frame(car, jockos, other)

garage.data$diff = garage.data$jockos - garage.data$other

mean(garage.data$diff)
## [1] 114
sd(garage.data$diff)
## [1] 114.4018
qqnorm(garage.data$diff); qqline(garage.data$diff, col = 2,lwd=2,lty=2)

hist(garage.data$diff, breaks = 20)

t.test(garage.data$jockos, garage.data$other, paired = T, alternative = "greater")
## 
##  Paired t-test
## 
## data:  garage.data$jockos and garage.data$other
## t = 3.1512, df = 9, p-value = 0.005858
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  47.68341      Inf
## sample estimates:
## mean of the differences 
##                     114
t_star_g = (mean(garage.data$diff) - 0)/(sd(garage.data$diff)/sqrt(length(garage.data$dif)))
#p-value
pt(t_star_g, 9, lower.tail = FALSE)
## [1] 0.005857731
# confidence interval
# x ± 2.262 s/√10

x_bar_g = mean(garage.data$diff) 

sd_g = sd(garage.data$diff)

n_g = 10

t_star_g = qt(p=0.025, df=9, lower.tail=FALSE)

lower = x_bar_g - t_star_g * (sd_g/sqrt(n_g))
upper = x_bar_g + t_star_g * (sd_g/sqrt(n_g))

print(c(lower, upper))
## [1]  32.16186 195.83814
195.84 * 1000
## [1] 195840

#7.78

#question 7.78

n_wall = 66
xbar_wall = 4.77
s_wall = 1.50

n_nat = 61
xbar_nat = 2.43
s_nat = 1.64


df = n_nat - 1 # used smaller n 
sigma = sqrt((s_wall^2/n_wall)+(s_nat^2/n_nat))
t = t(xbar_wall-xbar_nat)/sigma

t
##          [,1]
## [1,] 8.368748
pt(8.3687, df=df, lower.tail=FALSE)
## [1] 5.771647e-12
t_star = qt(p = 0.025, df, lower.tail = F )

# t.test(qnorm(66, mean =4.77, sd = 1.50), qnorm(61, mean =2.43, sd = 1.64), alternative = "greater")

#7.114

#question 7.114 

# 1-pt(tstar,df,delta)
?pt()

1-pt(1.984, 128, 5.344)
## [1] 0.9995783
1-pt(1.984, 198, 6.629)
## [1] 0.9999981

#7.115

# install.packages("BSDA")
library(BSDA)
## Loading required package: lattice
## 
## Attaching package: 'BSDA'
## The following object is masked from 'package:datasets':
## 
##     Orange
# ?SIGN.test()
SIGN.test(garage.data$diff, md = 0.5, alternative = "greater", conf.level = .95 )
## 
##  One-sample Sign-Test
## 
## data:  garage.data$diff
## s = 7, p-value = 0.1719
## alternative hypothesis: true median is greater than 0.5
## 95 percent confidence interval:
##  -5.333333       Inf
## sample estimates:
## median of x 
##       137.5 
## 
## Achieved and Interpolated Confidence Intervals: 
## 
##                   Conf.Level   L.E.pt U.E.pt
## Lower Achieved CI     0.9453   0.0000    Inf
## Interpolated CI       0.9500  -5.3333    Inf
## Upper Achieved CI     0.9893 -50.0000    Inf
# b = 7
# 
# n = 10
# 
# library(distributions3)
# 
# ?Binomial
# X = Binomial(9, 0.5)
# 2 * min(cdf(X, b), 1 - cdf(X, b-1))
binom.test(x = 7, n=10, p =.5, alternative = "greater")
## 
##  Exact binomial test
## 
## data:  7 and 10
## number of successes = 7, number of trials = 10, p-value = 0.1719
## alternative hypothesis: true probability of success is greater than 0.5
## 95 percent confidence interval:
##  0.3933758 1.0000000
## sample estimates:
## probability of success 
##                    0.7

7.126

#7.126

group1 = c(48.86, 50.60, 51.02, 47.99, 54.20, 50.66, 45.91, 48.79, 47.76, 51.13)
group2 = c(48.88, 52.63, 52.55, 50.94, 53.02, 50.66, 47.78, 48.44, 48.92, 51.63)

data = data.frame(group1, group2)
#sample mean group1

mu1 = mean(group1)
mean(group1)
## [1] 49.692
#sample variance group 1


sd1 = sd(group1)
var(group1)
## [1] 5.37264
#sample mean group 2

mu2 = mean(group2)
mean(group2)
## [1] 50.545
# sample variance group 2 

sd2 = sd(group2)
var(group2)
## [1] 3.703161
#t.test wrong 

#sample t 
t_wrong = (mu1-mu2)/sqrt((sd1^2/10)+(sd2^2/10))

#df - variance unequal?
df_uneq = ((sd1^2/10) + (sd2^2/10))^2/((sd1^2/10)^2/(10-1) + (sd2^2/10)^2/(10-1))

#p-value two-sided 

pval_wrong = 2*pt(abs(t_wrong), df_uneq, lower.tail = F)

print(c(t_wrong, df_uneq, pval_wrong))
## [1] -0.8953783 17.4108684  0.3827970
# t.test(group1, group2, paired = F, alternative = "two.sided")
#t.test correct 

data$diff = group1 - group2

mean_diff = mean(data$diff)

sd_diff = sd(data$diff)


#df 
df_right = 9

#sample t 
t_right = (mean_diff - 0)/(sd_diff/sqrt(10))

#p-value two-sided 

pval_right = 2*pt(abs(t_right), df_right, lower.tail = F)

print(c(t_right, df_right, pval_right))
## [1] -2.12542628  9.00000000  0.06248424
# t.test(group1, group2, paired = T, alternative = "two.sided")

9.50

#question 9.50 a-c

n11 = 424
n12 = 317
n13 = 2879
n14 = 1781
n15 = 827
n16 = 1081
n21 = 518
n22 = 282
n23 = 2355
n24 = 1457
n25 = 551
n26 = 1219
a = matrix(c(n11, n12, n13, n14, n15, n16, n21, n22, n23, n24, n25, n26), ncol = 2)
a
##      [,1] [,2]
## [1,]  424  518
## [2,]  317  282
## [3,] 2879 2355
## [4,] 1781 1457
## [5,]  827  551
## [6,] 1081 1219
chisq.test(a, correct = F)
## 
##  Pearson's Chi-squared test
## 
## data:  a
## X-squared = 97.547, df = 5, p-value < 2.2e-16
loans = t(a)

rownames(loans) = c("Yes Loans", "No Loans")
colnames(loans) = c("Trades", "Design", "Health", "Media/IT", "Service", "Other")

barplot(loans, main="Percent of Loans by Field",
  xlab="Number of Loans", col=c("darkblue","red"),
  ylim = c(0, 6000),
  legend = rownames(loans))