MATH 377 Homework 6 Alejandro Morales

A study of stress on among the students in a college campus shows that the stress scores follow a Uniform distribution with the lowest stress score equal to 1 and the highest equal to 5.

Use a sample of 75 students and those four (d,p,q,r) built-in R functions for the probability distribution of random variable to compute the following [Hint: you may use the Central Limit Theorem]:

The probability that the average stress score for the 75 students is less than 2.

#(a)
Uniform_distribution <- runif(75,1,5) #stress scores follow a Uniform distribution
mean_1 <- mean(Uniform_distribution) #mean
mean_1

## [1] 3.03889

Sd <- sd(Uniform_distribution)#standard deviation
Sd

## [1] 1.209053

p <-pnorm(2, mean_1, Sd) #probability that the average stress score is less than 2
p

## [1] 0.1950988

The 90th percentile for the average stress score for the 75 students.

#(b)
#The mean here is 3 because (a+b)/2 = (1+5)/2 = 3
#sd here = sqrt((b-a)^2/12)
#remember that the standard devation of the sampling distribution of x bar is sigma_x_bar = sigma/sqrt(n)
qnorm(0.90,3,1.15/sqrt(75))

## [1] 3.170178

The probability that the total of the 75 stress scores is less than 200.

#(c)
#remember that the standard devation of the sampling distribution of x bar is sigma_x_bar = sigma/sqrt(n)
#The central limit theorem for sums is sqrt(n)*sigma of x
n <- 75
sd = sqrt((5-1)^2/12)
sd_sums <-(sqrt(n)*(sd))
sd_sums

## [1] 10

# n Â· ÂµX = the mean of X
mean_X = (75)*3
pnorm(200,mean_X,sd_sums) # probability that the score is less than 200

## [1] 0.006209665

The 90th percentile for the total stress score for the 75 students.

#(d)
qnorm(.90,mean_X,sd_sums)

## [1] 237.8155

PART B:

Consider again the same stress study that is described in PART A and answer the same questions. However, this time assume that the stress score may obtain only the values 1, 2, 3, 4 or 5, with the same probability for obtaining each of the values [Hint: you may use the Central Limit Theorem] .

range_1 <- floor(runif(75,1,6)) #random numbers between 1 and 5
mean1<-mean(range_1) #mean 
SD <-sd(range_1)
prob<-pnorm(2,mean1,SD,lower.tail = TRUE) 
prob

## [1] 0.2454943

Using the sampling distribution of sample mean to estimate the mean of the variables “xv” and “ys” in the data “scatter1.txt” from the R book.

For the simulation do the sampling 2000 times with sample size n=20 and n=50.

#xv
setwd("C:/Users/amorales/Documents/R")
scatter1<-read.table("scatter1.txt",header = TRUE)
attach(scatter1) 
str(scatter1)

## 'data.frame':    2000 obs. of  2 variables:
##  $ xv: num  90.8 16.1 31.1 39.8 48.8 ...
##  $ ys: num  51.8 29 35.5 32.7 40.5 ...

summary(scatter1)

##        xv                ys       
##  Min.   : 0.0285   Min.   :18.34  
##  1st Qu.:25.3602   1st Qu.:32.44  
##  Median :50.5420   Median :40.11  
##  Mean   :50.3060   Mean   :40.03  
##  3rd Qu.:75.6620   3rd Qu.:47.50  
##  Max.   :99.9326   Max.   :62.59

col<-scatter1$xv
n<-2000 #since n is 2000
X.bar1<-rep(0,n)
for(i in 1:n){
  X.samp<-sample(col,20)
  X.bar1[i]<-mean(X.samp)
  #print(X.bar)
}
mean(col)

## [1] 50.30595

mean(X.bar1)

## [1] 50.70583

var(col)

## [1] 856.3489

var(X.bar1)

## [1] 41.78023

sd(col)

## [1] 29.26344

sd(X.bar1)

## [1] 6.463763

col<-scatter1$xv
n<-2000 #since n is 2000
X.bar<-rep(0,n)
for(i in 1:n){
  X.samp<-sample(col,50)
  X.bar1[i]<-mean(X.samp)
  #print(X.bar)
}
mean(col)

## [1] 50.30595

mean(X.bar)

## [1] 0

var(col)

## [1] 856.3489

var(X.bar)

## [1] 0

sd(col)

## [1] 29.26344

sd(X.bar)

## [1] 0

#ys


col<-scatter1$ys
n<-2000 #since n is 2000
X.bar<-rep(0,n)
for(i in 1:n){
  X.samp<-sample(col,20)
  X.bar[i]<-mean(X.samp)
  #print(X.bar)
}
mean(col)

## [1] 40.02937

mean(X.bar)

## [1] 40.04622

var(col)

## [1] 84.80254

var(X.bar)

## [1] 4.172101

sd(col)

## [1] 9.20883

sd(X.bar)

## [1] 2.042572

col<-scatter1$ys
n<-2000 #since n is 2000
X.bar<-rep(0,n)
for(i in 1:n){
  X.samp<-sample(col,50)
  X.bar[i]<-mean(X.samp)
  #print(X.bar)
}
mean(col)

## [1] 40.02937

mean(X.bar)

## [1] 40.00805

var(col)

## [1] 84.80254

var(X.bar)

## [1] 1.628406

sd(col)

## [1] 9.20883

sd(X.bar)

## [1] 1.27609

Use the sampling distribution(with n=20) that you constructed in (a) to compute an approximation of the probability of the sample mean falling within .5 of the mean of each variable “xv” and “ys”.

#(b)
X.bar1<-rep(0,n)
for(i in 1:n){
  X.samp<-sample(col,20)
  X.bar1[i]<-mean(X.samp)
  #print(X.bar)
}
col<-scatter1$xv#set the columns 
col1<-scatter1$ys
setwd("C:/Users/amorales/Documents/R")
data<-read.table("scatter1.txt",header = T)
attach(data)#guide was from the professor notes

## The following objects are masked from scatter1:
## 
##     xv, ys

str(data)

## 'data.frame':    2000 obs. of  2 variables:
##  $ xv: num  90.8 16.1 31.1 39.8 48.8 ...
##  $ ys: num  51.8 29 35.5 32.7 40.5 ...

summary(data)

##        xv                ys       
##  Min.   : 0.0285   Min.   :18.34  
##  1st Qu.:25.3602   1st Qu.:32.44  
##  Median :50.5420   Median :40.11  
##  Mean   :50.3060   Mean   :40.03  
##  3rd Qu.:75.6620   3rd Qu.:47.50  
##  Max.   :99.9326   Max.   :62.59

#xv
mean(abs(X.bar1-mean(col))<=0.5)

## [1] 0

#ys
mean(abs(X.bar1-mean(col1))<=0.5)# fall with in .5

## [1] 0.1815

Do the same question for (b) with n=50.

#(c)
X.bar2<-rep(0,n)
for(i in 1:n){
  X.samp<-sample(col,50)
  X.bar2[i]<-mean(X.samp)
  #print(X.bar)
}
col<-scatter1$xv#set the columns 
col1<-scatter1$ys
setwd("C:/Users/amorales/Documents/R")
data<-read.table("scatter1.txt",header = T)
attach(data)#guide was from the professor notes

## The following objects are masked from data (pos = 3):
## 
##     xv, ys

## The following objects are masked from scatter1:
## 
##     xv, ys

str(data)

## 'data.frame':    2000 obs. of  2 variables:
##  $ xv: num  90.8 16.1 31.1 39.8 48.8 ...
##  $ ys: num  51.8 29 35.5 32.7 40.5 ...

summary(data)

##        xv                ys       
##  Min.   : 0.0285   Min.   :18.34  
##  1st Qu.:25.3602   1st Qu.:32.44  
##  Median :50.5420   Median :40.11  
##  Mean   :50.3060   Mean   :40.03  
##  3rd Qu.:75.6620   3rd Qu.:47.50  
##  Max.   :99.9326   Max.   :62.59

#xv
mean(abs(X.bar2-mean(col))<=0.5)

## [1] 0.0975

#mean falling within .5 of the mean 

#ys
mean(abs(X.bar2-mean(col1))<=0.5)

## [1] 0.006

A Sampling distribution can be considered in the context of theoretical distribution models as well. We can approximate the distribution of the sample mean by simulation:

We do a measurement X from the abstract population distributed by Binomial(10,0.5) . Assume that 70 independent measurements are done with this distribution,(i.e.,the sample size n=70).

Construct the distribution of the sample mean by using a simulation producing 10000 sample means.

#(a)
means<-numeric(10000)#from notes 
for (i in 1:10000){
  # n = 70 prob = 0.5 sampling_size= 10 
  means[i]<-mean(rbinom(70,10,.5))
  }

Plot the histogram of the distribution that you constructed in (a).

#(b)
hist(means,ylim=c(0,3000),main="")

(c) Overlay the smooth density curve of the Normal distribution on the histogram of the sampling distribution that was plotted in (b).

#(c)
hist(means,ylim=c(0,3000),main="") 
mean(means) #means

## [1] 4.998776

sd(means) #standard devation

## [1] 0.1886617

xv<-seq(4.5,5.5,0.01)#we make a smooth normal curve 
yv <- dnorm(xv,mean=mean(means),sd=sd(means))*1225#From notes
lines(xv,yv)

(d) Explain why the variance of the sampling distribution is always smaller than that of the distribution of the population.

#(d)
binomial1<-rbinom(70,10,.5)
mean1<-mean(binomial1)# means
mean1

## [1] 4.985714

variance<-var(binomial1)# the variance of rbionm 
variance

## [1] 2.709938

aws<-variance/70
aws

## [1] 0.0387134

It is small because the varince I get is 2.311387 and the sampling of the varince I get 0.03301982. Since n = 70 we divide by our varince. Its also becuse we are grouping them.