birds <- read.csv("NZBIRDS.csv")
head(birds)
## Species Name Extinct Habitat Nest.Site Nest.Density Diet Flight
## 1 Grebes P. cristatus No A G L F Yes
## 2 Grebes P. rufopectus No A G L F Yes
## 3 Petrels P. gavia Yes A GC H F Yes
## 4 Petrels P. assimilis Yes A GC H F Yes
## 5 Petrels P. urinatrix Yes A GC H F Yes
## 6 Petrels P. georgicus No A GC H F Yes
## Body.Mass Egg.Length
## 1 1100 57
## 2 250 43
## 3 300 57
## 4 200 54
## 5 130 38
## 6 120 39
set.seed(5)
sam <- runif(35,1,116)
BM <- birds$Body.Mass[sam]
mean(BM)
## [1] 5050.486
sd(BM)
## [1] 12172.05
MATH4753GRAY::myci(BM,0.05)
## [1] 869.2391
## [1] 9231.732
With 95% confidence we can sya that the mean value of body mass for the birds is between 869.2391 and 9231.732.
It is very likely to contain \(\mu\) because 95% percent of the intervals created will contain the true mean of the body mass value of the birds.
set.seed(5)
sam1 <- runif(35,1,116)
EL <- birds$Egg.Length[sam1]
mean(EL)
## [1] 67
sd(EL)
## [1] 35.32538
MATH4753GRAY::myci(EL,0.05)
## [1] 54.86531
## [1] 79.13469
p1=21/116
q1=1-p1
n1=21
p2=7/116
q2=1-p2
n2=7
alpha=0.05
z=qnorm(1-(alpha)/2, 0, 1)
L=p1-p2-z*sqrt((p1*q1)/n1 +(p2*q2)/n2)
L
## [1] -0.1206369
U=p1-p2+z*sqrt((p1*q1)/n1 +(p2*q2)/n2)
U
## [1] 0.3620163
Yes, it does support this theory, but it also can be evidence against the theory. This is because the confidence interval contains both positive and negative values.
#southern pine-ponderosa pine
ybar1=1312
sigsq1=(422)^2
n1=100
ybar2=1352
sigsq2=(271)^2
n2=47
alpha=0.1
z=qnorm(1-(alpha)/2, 0, 1)
L=ybar1-ybar2- z*sqrt(sigsq1/n1 + sigsq2/n2)
L
## [1] -135.1092
U=ybar1-ybar2+z*sqrt(sigsq1/n1 + sigsq2/n2)
U
## [1] 55.10916
v1=n1-1
v2=n2-2
ratio=sigsq1/sigsq2
upfq=qf(1-(alpha)/2, v1, v2)
lowfq=qf(alpha/2, v1, v2)
L=ratio/upfq
L
## [1] 1.560003
U=ratio/lowfq
U
## [1] 3.626675
Has a normal distribution. Thus,
\[Z^2=\frac{Y^2}{\sigma^2}\]
Which by theorem 6.11 is a \(\chi^2\) distribution with 1 degree of freedom.
\[P(\chi_{1-\frac{\alpha}{2}} \leq \frac{y^2}{\sigma^2} \leq \chi_{\frac{\alpha}{2}})=1-\alpha=P(\frac{y^2}{\chi_{\frac{\alpha}{2}}} \leq \sigma^2 \leq \frac{y^2}{\chi_{1-\frac{\alpha}{2}}})\]
Rough <- read.csv("ROUGHPIPE.csv")
rough <-Rough$ROUGH
Null: \(\mu=2\)
Alternative: \(\mu \neq 2\)
The test statistic is \(-1.02\) and the p-value is \(0.322\).
df=length(rough)-1
alpha=0.05
lowtq=qt(alpha/2,df)
lowtq
## [1] -2.093024
uptq=qt(1-alpha/2,df)
uptq
## [1] 2.093024
The rejection region is below \(-2.093024\) and above \(2.093024\).
The appropriate conclusion is to accept the Null hypothesis.
They would both lead to the same conclusion about \(mu\) because the test statistic is between both rejection regions. And the Null hypothesis value is within the confidence interval.
lake <- read.csv("WISCLAKES.csv")
oxy <- lake$DOC
t.test(oxy, alpha=0.1, mu=15)
##
## One Sample t-test
##
## data: oxy
## t = -0.18668, df = 24, p-value = 0.8535
## alternative hypothesis: true mean is not equal to 15
## 95 percent confidence interval:
## 9.164852 19.867148
## sample estimates:
## mean of x
## 14.516
orch <- read.csv("ORCHARD.csv")
fog <- orch[orch$CONDITION=="Fog"]
#orch[fog]
gas <- read.csv("GASTURBINE.csv")
gas
## ENGINE SHAFTS RPM CPRATIO INLET.TEMP EXH.TEMP AIRFLOW POWER HEATRATE
## 1 Traditional 1 27245 9.2 1134 602 7 1630 14622
## 2 Traditional 1 14000 12.2 950 446 15 2726 13196
## 3 Traditional 1 17384 14.8 1149 537 20 5247 11948
## 4 Traditional 1 11085 11.8 1024 478 27 6726 11289
## 5 Traditional 1 14045 13.2 1149 553 29 7726 11964
## 6 Traditional 1 6211 15.7 1172 517 176 52600 10526
## 7 Traditional 1 6210 17.4 1177 510 193 57500 10387
## 8 Traditional 1 3600 13.5 1146 503 315 89600 10592
## 9 Traditional 1 3000 15.1 1146 524 375 113700 10460
## 10 Traditional 1 3000 15.0 1171 525 514 164300 10086
## 11 Traditional 1 18000 12.7 1038 525 11 2000 14628
## 12 Traditional 1 11140 9.1 1038 523 25 5223 13396
## 13 Traditional 1 16630 15.0 1232 571 19 5500 11726
## 14 Traditional 2 7900 15.6 1077 482 47 11700 11252
## 15 Traditional 1 5100 10.0 963 485 123 26555 12449
## 16 Traditional 1 5160 12.3 1135 542 144 42170 11030
## 17 Traditional 1 3600 12.6 1113 534 295 86650 10787
## 18 Traditional 1 3000 12.3 1124 541 410 124700 10603
## 19 Traditional 1 3000 14.2 1204 553 515 172985 10144
## 20 Traditional 1 14000 15.9 1177 521 27 6930 11674
## 21 Traditional 1 3660 14.6 1135 526 56 14838 11510
## 22 Traditional 1 5400 15.3 1149 514 172 49500 10946
## 23 Traditional 1 3600 14.2 1141 526 362 109370 10508
## 24 Traditional 1 3600 11.0 1149 544 354 108719 10604
## 25 Traditional 1 3600 14.2 1177 525 378 120500 10270
## 26 Traditional 1 3000 14.2 1116 511 448 132220 10529
## 27 Traditional 1 3000 11.1 1149 537 500 157010 10360
## 28 Traditional 1 22516 6.6 899 512 7 1210 14796
## 29 Traditional 1 14950 9.7 916 444 19 3515 12913
## 30 Traditional 1 14950 10.7 1054 517 19 4600 12270
## 31 Traditional 1 14950 12.0 1093 513 22 5500 11842
## 32 Traditional 1 14950 15.0 1121 490 27 7520 10656
## 33 Traditional 2 8568 16.2 1066 464 39 9286 11360
## 34 Traditional 2 8568 17.6 1104 487 42 10685 11136
## 35 Traditional 1 11220 15.8 1121 493 49 13500 10814
## 36 Traditional 1 4473 8.9 960 517 158 32776 13523
## 37 Traditional 1 3600 12.4 1079 515 311 81600 11289
## 38 Traditional 1 3000 12.5 1041 490 400 100500 11183
## 39 Traditional 2 10400 15.0 1057 479 26 6844 10951
## 40 Advanced 1 6600 20.0 1288 546 120 43000 9722
## 41 Advanced 1 5100 14.8 1288 590 204 70905 10481
## 42 Advanced 1 3600 15.5 1327 599 448 174000 9812
## 43 Advanced 1 3600 18.5 1371 626 445 186600 9669
## 44 Advanced 1 3000 14.6 1327 599 648 259670 9643
## 45 Advanced 1 3000 23.2 1427 566 685 282000 9115
## 46 Advanced 1 3000 23.2 1427 621 685 331000 9115
## 47 Advanced 1 7280 14.3 1271 556 49 13680 11588
## 48 Advanced 1 7280 14.6 1271 556 88 27010 10888
## 49 Advanced 1 3600 16.0 1343 607 453 185400 9738
## 50 Advanced 1 3600 20.0 1427 596 567 254000 9295
## 51 Advanced 1 3000 17.0 1343 586 651 270300 9421
## 52 Advanced 1 3000 21.0 1427 587 737 334000 9105
## 53 Advanced 1 5400 16.1 1288 531 188 62300 10233
## 54 Advanced 1 5400 16.2 1310 589 187 68000 10186
## 55 Advanced 1 3600 16.0 1288 551 425 153600 9918
## 56 Advanced 1 3600 16.9 1343 577 440 182000 9209
## 57 Advanced 1 3600 15.0 1349 590 450 186500 9532
## 58 Advanced 1 3000 14.0 1260 585 510 189000 9933
## 59 Advanced 1 3600 19.2 1427 594 550 253000 9152
## 60 Advanced 1 3000 17.0 1316 584 642 265540 9295
## 61 Aeroderiv 2 33000 6.9 888 513 3 486 16243
## 62 Aeroderiv 2 30000 8.5 1004 561 4 806 14628
## 63 Aeroderiv 2 18910 14.0 1066 532 8 1845 12766
## 64 Aeroderiv 3 3600 35.0 1288 448 152 57930 8714
## 65 Aeroderiv 3 3600 20.0 1160 456 84 25600 9469
## 66 Aeroderiv 2 16000 10.6 1232 560 14 3815 11948
## 67 Aeroderiv 1 14600 13.4 1077 536 20 4942 12414
## LHV. ISOWORK
## 1 24.6 232.86
## 2 27.3 181.73
## 3 30.1 262.35
## 4 31.9 249.11
## 5 30.1 266.41
## 6 34.2 298.86
## 7 34.7 397.93
## 8 34.0 284.44
## 9 34.4 303.20
## 10 35.7 319.65
## 11 24.9 181.82
## 12 26.9 208.92
## 13 30.7 289.47
## 14 32.0 248.94
## 15 28.9 215.89
## 16 32.6 292.85
## 17 33.4 293.73
## 18 34.0 304.15
## 19 35.5 335.89
## 20 30.8 256.67
## 21 31.3 264.96
## 22 32.9 287.79
## 23 34.3 302.13
## 24 33.9 307.12
## 25 35.1 318.78
## 26 34.2 295.13
## 27 34.7 314.02
## 28 24.3 172.86
## 29 27.9 185.00
## 30 29.3 242.11
## 31 30.4 250.00
## 32 33.8 278.52
## 33 31.7 238.10
## 34 32.3 254.40
## 35 33.3 275.51
## 36 26.6 207.44
## 37 31.9 262.38
## 38 32.2 251.25
## 39 32.9 263.23
## 40 37.0 358.33
## 41 34.3 347.57
## 42 36.7 388.39
## 43 37.2 419.33
## 44 37.3 400.73
## 45 39.5 411.68
## 46 39.5 483.21
## 47 31.1 282.86
## 48 33.1 306.93
## 49 37.0 409.27
## 50 38.7 447.97
## 51 38.2 415.21
## 52 39.5 453.19
## 53 35.2 331.38
## 54 35.3 363.64
## 55 36.3 361.41
## 56 39.1 413.64
## 57 37.8 414.44
## 58 36.2 370.59
## 59 39.3 460.00
## 60 38.7 413.61
## 61 22.2 162.00
## 62 24.6 202.00
## 63 28.2 230.63
## 64 41.3 341.64
## 65 38.0 304.76
## 66 30.1 272.50
## 67 29.0 247.10
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
gas %>% filter(ENGINE == "Traditional" | ENGINE == "Aeroderiv") %>% var.test(HEATRATE ~ ENGINE, data = .)
##
## F test to compare two variances
##
## data: HEATRATE by ENGINE
## F = 4.297, num df = 6, denom df = 38, p-value = 0.004234
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 1.555004 21.574593
## sample estimates:
## ratio of variances
## 4.296996
gas %>% filter(ENGINE == "Advanced" | ENGINE == "Aeroderiv") %>% var.test(HEATRATE ~ ENGINE, data = .)
##
## F test to compare two variances
##
## data: HEATRATE by ENGINE
## F = 0.057974, num df = 20, denom df = 6, p-value = 1.192e-06
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.0112170 0.1813622
## sample estimates:
## ratio of variances
## 0.05797393
ants <- read.csv("GOBIANTS.csv")
head(ants)
## Site Region Rain Temp PlantCov AntSpecies Diversity
## 1 1 Dry Steppe 196 5.7 40 3 0.89
## 2 2 Dry Steppe 196 5.7 52 3 0.83
## 3 3 Dry Steppe 179 7.0 40 52 1.31
## 4 4 Dry Steppe 197 8.0 43 7 1.48
## 5 5 Dry Steppe 149 8.5 27 5 0.97
## 6 6 Gobi Desert 112 10.7 30 49 0.46
ants
## Site Region Rain Temp PlantCov AntSpecies Diversity
## 1 1 Dry Steppe 196 5.7 40 3 0.89
## 2 2 Dry Steppe 196 5.7 52 3 0.83
## 3 3 Dry Steppe 179 7.0 40 52 1.31
## 4 4 Dry Steppe 197 8.0 43 7 1.48
## 5 5 Dry Steppe 149 8.5 27 5 0.97
## 6 6 Gobi Desert 112 10.7 30 49 0.46
## 7 7 Gobi Desert 125 11.4 16 5 1.23
## 8 8 Gobi Desert 99 10.9 30 4 NA
## 9 9 Gobi Desert 125 11.4 56 4 0.76
## 10 10 Gobi Desert 84 11.4 22 5 1.26
## 11 11 Gobi Desert 115 11.4 14 4 0.69
Null: \(\frac{\sigma_1^2}{\sigma_2^2}=1\)
Alternative: \(\frac{\sigma_1^2}{\sigma_2^2} \neq 1\)
library(dplyr)
#ants %>% filter(Region == "Dry Steppe" | Region == "Gobi Desert") %>% var.test(AntSpecies ~ Region, data = .)
Give the rejection region for the test if .
Find the approximate p-value of the test.
Make the appropriate conclusion in the words of the problem.
What conditions are required for the test results to be valid?
thru <- read.csv("THRUPUT.csv")
hu <- thru$HUMAN
auto <- thru$AUTO
t.test(hu, auto)
##
## Welch Two Sample t-test
##
## data: hu and auto
## t = -1.441, df = 13.897, p-value = 0.1717
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -81.06293 15.93793
## sample estimates:
## mean of x mean of y
## 210.8875 243.4500
set.seed(35); sam <- round(rnorm(30,mean=20, sd=3),3)
myboot<-function(iter=10000,x,fun="mean",alpha=0.05,...){ #Notice where the ... is repeated in the code
n=length(x) #sample size
df=n-1
s=sd(x)
ybar=mean(x)
t_quant=qt(1-alpha/2,df)
L=ybar-(t_quant*s)/(sqrt(n))
U=ybar+(t_quant*s)/(sqrt(n))
y=sample(x,n*iter,replace=TRUE)
rs.mat=matrix(y,nr=n,nc=iter,byrow=TRUE)
xstat=apply(rs.mat,2,fun) # xstat is a vector and will have iter values in it
ci=quantile(xstat,c(alpha/2,1-alpha/2))# Nice way to form a confidence interval
# A histogram follows
# The object para will contain the parameters used to make the histogram
para=hist(xstat,breaks=9,freq=FALSE,las=1,
main=paste("Histogram of Bootstrap sample statistics","\n","alpha=",alpha," iter=",iter,sep=""),
...)
para
#mat will be a matrix that contains the data, this is done so that I can use apply()
mat=matrix(x,nr=length(x),nc=1,byrow=TRUE)
#pte is the point estimate
#This uses whatever fun is
pte=apply(mat,2,fun)
abline(v=pte,lwd=3,col="Black")# Vertical line
segments(ci[1],0,ci[2],0,lwd=4) #Make the segment for the ci
text(ci[1],0,paste("(",round(ci[1],2),sep=""),col="Red",cex=3)
text(ci[2],0,paste(round(ci[2],2),")",sep=""),col="Red",cex=3)
text(L,0.15,paste("(",round(L,2),sep=""),col="Blue",cex=3)
text(U,0.15,paste(round(U,2),")",sep=""),col="Blue",cex=3)
# plot the point estimate 1/2 way up the density
text(pte,max(para$density)/2,round(pte,2),cex=3)
return(list(ci=ci,fun=fun,x=x, t=t))# Some output to use if necessary
}
myboot(10000,x=sam,fun="mean",alpha=0.05,xlab="mean(x)",col=rainbow(9))
## $ci
## 2.5% 97.5%
## 20.07910 22.22545
##
## $fun
## [1] "mean"
##
## $x
## [1] 23.195 20.399 19.898 19.865 30.014 18.821 21.232 18.313 23.574 21.047
## [11] 21.535 21.336 17.695 18.497 14.274 14.664 22.593 18.963 25.515 25.019
## [21] 22.053 22.871 23.006 23.829 19.038 21.735 21.461 21.659 21.703 21.049
##
## $t
## function (x)
## UseMethod("t")
## <bytecode: 0x0000000012536c48>
## <environment: namespace:base>