t = 2.24
pval <- pt(t, df=21, lower.tail=FALSE)
pval
## [1] 0.01801848
# Question 7.32
fuel = c(41.5, 50.7, 36.6, 37.3, 34.2, 45.0, 48.0, 43.2, 47.7, 42.2, 43.2, 44.6, 48.4, 46.4, 46.8, 39.2, 37.3, 43.5, 44.3, 43.3)
mean(fuel)
## [1] 43.17
hist(fuel, breaks = 15)
qqnorm(fuel); qqline(fuel, col = 2,lwd=2,lty=2)
mean(fuel)
## [1] 43.17
sd(fuel)
## [1] 4.414939
std = function(x) sd(x)/sqrt(length(x))
std(fuel)
## [1] 0.9872103
(43.17-0)/(4.41/(sqrt(20)))
## [1] 43.77826
mean = mean(fuel)
sd = sd(fuel)
n = 20
error = qnorm(0.975)*sd/sqrt(n)
left = mean - error
right = mean + error
print(c(left, right))
## [1] 41.2351 45.1049
# question 7.40
#
# md = c(160, 250, 0, 100, -50, -55, 150, 220, 125, 240)
#
# mean(md)
#
# sd(md)
# hist(md, breaks = 15)
# qqnorm(md); qqline(md, col = 2,lwd=2,lty=2)
car = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
jockos = c(1410, 1550, 1250, 1300, 900, 1520, 1750, 3600, 2250, 2840)
other = c(1250, 1300, 1250, 1200, 950, 1575, 1600, 3380, 2125, 2600)
garage.data = data.frame(car, jockos, other)
garage.data$diff = garage.data$jockos - garage.data$other
mean(garage.data$diff)
## [1] 114
sd(garage.data$diff)
## [1] 114.4018
qqnorm(garage.data$diff); qqline(garage.data$diff, col = 2,lwd=2,lty=2)
hist(garage.data$diff, breaks = 20)
t.test(garage.data$jockos, garage.data$other, paired = T, alternative = "greater")
##
## Paired t-test
##
## data: garage.data$jockos and garage.data$other
## t = 3.1512, df = 9, p-value = 0.005858
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 47.68341 Inf
## sample estimates:
## mean of the differences
## 114
t_star_g = (mean(garage.data$diff) - 0)/(sd(garage.data$diff)/sqrt(length(garage.data$dif)))
#p-value
pt(t_star_g, 9, lower.tail = FALSE)
## [1] 0.005857731
# confidence interval
# x ± 2.262 s/√10
x_bar_g = mean(garage.data$diff)
sd_g = sd(garage.data$diff)
n_g = 10
t_star_g = qt(p=0.025, df=9, lower.tail=FALSE)
lower = x_bar_g - t_star_g * (sd_g/sqrt(n_g))
upper = x_bar_g + t_star_g * (sd_g/sqrt(n_g))
print(c(lower, upper))
## [1] 32.16186 195.83814
195.84 * 1000
## [1] 195840
#7.78
#question 7.78
n_wall = 66
xbar_wall = 4.77
s_wall = 1.50
n_nat = 61
xbar_nat = 2.43
s_nat = 1.64
df = n_nat - 1 # used smaller n
sigma = sqrt((s_wall^2/n_wall)+(s_nat^2/n_nat))
t = t(xbar_wall-xbar_nat)/sigma
t
## [,1]
## [1,] 8.368748
pt(8.3687, df=df, lower.tail=FALSE)
## [1] 5.771647e-12
t_star = qt(p = 0.025, df, lower.tail = F )
# t.test(qnorm(66, mean =4.77, sd = 1.50), qnorm(61, mean =2.43, sd = 1.64), alternative = "greater")
#7.114
#question 7.114
# 1-pt(tstar,df,delta)
?pt()
1-pt(1.984, 128, 5.344)
## [1] 0.9995783
1-pt(1.984, 198, 6.629)
## [1] 0.9999981
#7.115
# install.packages("BSDA")
library(BSDA)
## Loading required package: lattice
##
## Attaching package: 'BSDA'
## The following object is masked from 'package:datasets':
##
## Orange
# ?SIGN.test()
SIGN.test(garage.data$diff, md = 0.5, alternative = "greater", conf.level = .95 )
##
## One-sample Sign-Test
##
## data: garage.data$diff
## s = 7, p-value = 0.1719
## alternative hypothesis: true median is greater than 0.5
## 95 percent confidence interval:
## -5.333333 Inf
## sample estimates:
## median of x
## 137.5
##
## Achieved and Interpolated Confidence Intervals:
##
## Conf.Level L.E.pt U.E.pt
## Lower Achieved CI 0.9453 0.0000 Inf
## Interpolated CI 0.9500 -5.3333 Inf
## Upper Achieved CI 0.9893 -50.0000 Inf
# b = 7
#
# n = 10
#
# library(distributions3)
#
# ?Binomial
# X = Binomial(9, 0.5)
# 2 * min(cdf(X, b), 1 - cdf(X, b-1))
binom.test(x = 7, n=10, p =.5, alternative = "greater")
##
## Exact binomial test
##
## data: 7 and 10
## number of successes = 7, number of trials = 10, p-value = 0.1719
## alternative hypothesis: true probability of success is greater than 0.5
## 95 percent confidence interval:
## 0.3933758 1.0000000
## sample estimates:
## probability of success
## 0.7
#7.126
group1 = c(48.86, 50.60, 51.02, 47.99, 54.20, 50.66, 45.91, 48.79, 47.76, 51.13)
group2 = c(48.88, 52.63, 52.55, 50.94, 53.02, 50.66, 47.78, 48.44, 48.92, 51.63)
data = data.frame(group1, group2)
#sample mean group1
mu1 = mean(group1)
mean(group1)
## [1] 49.692
#sample variance group 1
sd1 = sd(group1)
var(group1)
## [1] 5.37264
#sample mean group 2
mu2 = mean(group2)
mean(group2)
## [1] 50.545
# sample variance group 2
sd2 = sd(group2)
var(group2)
## [1] 3.703161
#t.test wrong
#sample t
t_wrong = (mu1-mu2)/sqrt((sd1^2/10)+(sd2^2/10))
#df - variance unequal?
df_uneq = ((sd1^2/10) + (sd2^2/10))^2/((sd1^2/10)^2/(10-1) + (sd2^2/10)^2/(10-1))
#p-value two-sided
pval_wrong = 2*pt(abs(t_wrong), df_uneq, lower.tail = F)
print(c(t_wrong, df_uneq, pval_wrong))
## [1] -0.8953783 17.4108684 0.3827970
# t.test(group1, group2, paired = F, alternative = "two.sided")
#t.test correct
data$diff = group1 - group2
mean_diff = mean(data$diff)
sd_diff = sd(data$diff)
#df
df_right = 9
#sample t
t_right = (mean_diff - 0)/(sd_diff/sqrt(10))
#p-value two-sided
pval_right = 2*pt(abs(t_right), df_right, lower.tail = F)
print(c(t_right, df_right, pval_right))
## [1] -2.12542628 9.00000000 0.06248424
# t.test(group1, group2, paired = T, alternative = "two.sided")
#question 9.50 a-c
n11 = 424
n12 = 317
n13 = 2879
n14 = 1781
n15 = 827
n16 = 1081
n21 = 518
n22 = 282
n23 = 2355
n24 = 1457
n25 = 551
n26 = 1219
a = matrix(c(n11, n12, n13, n14, n15, n16, n21, n22, n23, n24, n25, n26), ncol = 2)
a
## [,1] [,2]
## [1,] 424 518
## [2,] 317 282
## [3,] 2879 2355
## [4,] 1781 1457
## [5,] 827 551
## [6,] 1081 1219
chisq.test(a, correct = F)
##
## Pearson's Chi-squared test
##
## data: a
## X-squared = 97.547, df = 5, p-value < 2.2e-16
loans = t(a)
rownames(loans) = c("Yes Loans", "No Loans")
colnames(loans) = c("Trades", "Design", "Health", "Media/IT", "Service", "Other")
barplot(loans, main="Percent of Loans by Field",
xlab="Number of Loans", col=c("darkblue","red"),
ylim = c(0, 6000),
legend = rownames(loans))