A study of stress on among the students in a college campus shows that the stress scores follow a Uniform distribution with the lowest stress score equal to 1 and the highest equal to 5.
Use a sample of 75 students and those four (d,p,q,r) built-in R functions for the probability distribution of random variable to compute the following [Hint: you may use the Central Limit Theorem]:
#(a)
Uniform_distribution <- runif(75,1,5) #stress scores follow a Uniform distribution
mean_1 <- mean(Uniform_distribution) #mean
mean_1
## [1] 3.03889
Sd <- sd(Uniform_distribution)#standard deviation
Sd
## [1] 1.209053
p <-pnorm(2, mean_1, Sd) #probability that the average stress score is less than 2
p
## [1] 0.1950988
#(b)
#The mean here is 3 because (a+b)/2 = (1+5)/2 = 3
#sd here = sqrt((b-a)^2/12)
#remember that the standard devation of the sampling distribution of x bar is sigma_x_bar = sigma/sqrt(n)
qnorm(0.90,3,1.15/sqrt(75))
## [1] 3.170178
#(c)
#remember that the standard devation of the sampling distribution of x bar is sigma_x_bar = sigma/sqrt(n)
#The central limit theorem for sums is sqrt(n)*sigma of x
n <- 75
sd = sqrt((5-1)^2/12)
sd_sums <-(sqrt(n)*(sd))
sd_sums
## [1] 10
# n · µX = the mean of X
mean_X = (75)*3
pnorm(200,mean_X,sd_sums) # probability that the score is less than 200
## [1] 0.006209665
#(d)
qnorm(.90,mean_X,sd_sums)
## [1] 237.8155
PART B:
Consider again the same stress study that is described in PART A and answer the same questions. However, this time assume that the stress score may obtain only the values 1, 2, 3, 4 or 5, with the same probability for obtaining each of the values [Hint: you may use the Central Limit Theorem] .
range_1 <- floor(runif(75,1,6)) #random numbers between 1 and 5
mean1<-mean(range_1) #mean
SD <-sd(range_1)
prob<-pnorm(2,mean1,SD,lower.tail = TRUE)
prob
## [1] 0.2454943
#xv
setwd("C:/Users/amorales/Documents/R")
scatter1<-read.table("scatter1.txt",header = TRUE)
attach(scatter1)
str(scatter1)
## 'data.frame': 2000 obs. of 2 variables:
## $ xv: num 90.8 16.1 31.1 39.8 48.8 ...
## $ ys: num 51.8 29 35.5 32.7 40.5 ...
summary(scatter1)
## xv ys
## Min. : 0.0285 Min. :18.34
## 1st Qu.:25.3602 1st Qu.:32.44
## Median :50.5420 Median :40.11
## Mean :50.3060 Mean :40.03
## 3rd Qu.:75.6620 3rd Qu.:47.50
## Max. :99.9326 Max. :62.59
col<-scatter1$xv
n<-2000 #since n is 2000
X.bar1<-rep(0,n)
for(i in 1:n){
X.samp<-sample(col,20)
X.bar1[i]<-mean(X.samp)
#print(X.bar)
}
mean(col)
## [1] 50.30595
mean(X.bar1)
## [1] 50.70583
var(col)
## [1] 856.3489
var(X.bar1)
## [1] 41.78023
sd(col)
## [1] 29.26344
sd(X.bar1)
## [1] 6.463763
col<-scatter1$xv
n<-2000 #since n is 2000
X.bar<-rep(0,n)
for(i in 1:n){
X.samp<-sample(col,50)
X.bar1[i]<-mean(X.samp)
#print(X.bar)
}
mean(col)
## [1] 50.30595
mean(X.bar)
## [1] 0
var(col)
## [1] 856.3489
var(X.bar)
## [1] 0
sd(col)
## [1] 29.26344
sd(X.bar)
## [1] 0
#ys
col<-scatter1$ys
n<-2000 #since n is 2000
X.bar<-rep(0,n)
for(i in 1:n){
X.samp<-sample(col,20)
X.bar[i]<-mean(X.samp)
#print(X.bar)
}
mean(col)
## [1] 40.02937
mean(X.bar)
## [1] 40.04622
var(col)
## [1] 84.80254
var(X.bar)
## [1] 4.172101
sd(col)
## [1] 9.20883
sd(X.bar)
## [1] 2.042572
col<-scatter1$ys
n<-2000 #since n is 2000
X.bar<-rep(0,n)
for(i in 1:n){
X.samp<-sample(col,50)
X.bar[i]<-mean(X.samp)
#print(X.bar)
}
mean(col)
## [1] 40.02937
mean(X.bar)
## [1] 40.00805
var(col)
## [1] 84.80254
var(X.bar)
## [1] 1.628406
sd(col)
## [1] 9.20883
sd(X.bar)
## [1] 1.27609
#(b)
X.bar1<-rep(0,n)
for(i in 1:n){
X.samp<-sample(col,20)
X.bar1[i]<-mean(X.samp)
#print(X.bar)
}
col<-scatter1$xv#set the columns
col1<-scatter1$ys
setwd("C:/Users/amorales/Documents/R")
data<-read.table("scatter1.txt",header = T)
attach(data)#guide was from the professor notes
## The following objects are masked from scatter1:
##
## xv, ys
str(data)
## 'data.frame': 2000 obs. of 2 variables:
## $ xv: num 90.8 16.1 31.1 39.8 48.8 ...
## $ ys: num 51.8 29 35.5 32.7 40.5 ...
summary(data)
## xv ys
## Min. : 0.0285 Min. :18.34
## 1st Qu.:25.3602 1st Qu.:32.44
## Median :50.5420 Median :40.11
## Mean :50.3060 Mean :40.03
## 3rd Qu.:75.6620 3rd Qu.:47.50
## Max. :99.9326 Max. :62.59
#xv
mean(abs(X.bar1-mean(col))<=0.5)
## [1] 0
#ys
mean(abs(X.bar1-mean(col1))<=0.5)# fall with in .5
## [1] 0.1815
#(c)
X.bar2<-rep(0,n)
for(i in 1:n){
X.samp<-sample(col,50)
X.bar2[i]<-mean(X.samp)
#print(X.bar)
}
col<-scatter1$xv#set the columns
col1<-scatter1$ys
setwd("C:/Users/amorales/Documents/R")
data<-read.table("scatter1.txt",header = T)
attach(data)#guide was from the professor notes
## The following objects are masked from data (pos = 3):
##
## xv, ys
## The following objects are masked from scatter1:
##
## xv, ys
str(data)
## 'data.frame': 2000 obs. of 2 variables:
## $ xv: num 90.8 16.1 31.1 39.8 48.8 ...
## $ ys: num 51.8 29 35.5 32.7 40.5 ...
summary(data)
## xv ys
## Min. : 0.0285 Min. :18.34
## 1st Qu.:25.3602 1st Qu.:32.44
## Median :50.5420 Median :40.11
## Mean :50.3060 Mean :40.03
## 3rd Qu.:75.6620 3rd Qu.:47.50
## Max. :99.9326 Max. :62.59
#xv
mean(abs(X.bar2-mean(col))<=0.5)
## [1] 0.0975
#mean falling within .5 of the mean
#ys
mean(abs(X.bar2-mean(col1))<=0.5)
## [1] 0.006
We do a measurement X from the abstract population distributed by Binomial(10,0.5) . Assume that 70 independent measurements are done with this distribution,(i.e.,the sample size n=70).
#(a)
means<-numeric(10000)#from notes
for (i in 1:10000){
# n = 70 prob = 0.5 sampling_size= 10
means[i]<-mean(rbinom(70,10,.5))
}
#(b)
hist(means,ylim=c(0,3000),main="")
(c) Overlay the smooth density curve of the Normal distribution on the histogram of the sampling distribution that was plotted in (b).
#(c)
hist(means,ylim=c(0,3000),main="")
mean(means) #means
## [1] 4.998776
sd(means) #standard devation
## [1] 0.1886617
xv<-seq(4.5,5.5,0.01)#we make a smooth normal curve
yv <- dnorm(xv,mean=mean(means),sd=sd(means))*1225#From notes
lines(xv,yv)
(d) Explain why the variance of the sampling distribution is always smaller than that of the distribution of the population.
#(d)
binomial1<-rbinom(70,10,.5)
mean1<-mean(binomial1)# means
mean1
## [1] 4.985714
variance<-var(binomial1)# the variance of rbionm
variance
## [1] 2.709938
aws<-variance/70
aws
## [1] 0.0387134
It is small because the varince I get is 2.311387 and the sampling of the varince I get 0.03301982. Since n = 70 we divide by our varince. Its also becuse we are grouping them.