lab8zzzzz.R

#OpenIntro data
download.file("http://www.openintro.org/stat/data/nc.RData", destfile = "nc.RData")
load("nc.RData")
#1
inference(y = nc$weeks, est = "mean", type = "ci", null = 0, alternative = "twosided", method = "theoretical")

## Warning: package 'BHH2' was built under R version 3.6.3

## Single mean 
## Summary statistics:

## mean = 38.3347 ;  sd = 2.9316 ;  n = 998 
## Standard error = 0.0928 
## 95 % Confidence interval = ( 38.1528 , 38.5165 )

# We have 95% confidence that the average length of pregnancy is between 38.15 and 38.52 weeks

#2
inference(y = nc$weeks, est = "mean", type = "ci", null = 0, alternative = "twosided", method = "theoretical",conflevel = 0.90)

## Single mean 
## Summary statistics:

## mean = 38.3347 ;  sd = 2.9316 ;  n = 998 
## Standard error = 0.0928 
## 90 % Confidence interval = ( 38.182 , 38.4873 )

#We have 90% confidence that the average length of pregnancy is between 38.18 and 38.49 weeks.

#3
inference(y = nc$gained, x = nc$mature, est = "mean", type = "ht", null = 0, alternative = "twosided", method = "theoretical")

## Response variable: numerical, Explanatory variable: categorical
## Difference between two means
## Summary statistics:
## n_mature mom = 129, mean_mature mom = 28.7907, sd_mature mom = 13.4824
## n_younger mom = 844, mean_younger mom = 30.5604, sd_younger mom = 14.3469

## Observed difference between means (mature mom-younger mom) = -1.7697
## 
## H0: mu_mature mom - mu_younger mom = 0 
## HA: mu_mature mom - mu_younger mom != 0 
## Standard error = 1.286 
## Test statistic: Z =  -1.376 
## p-value =  0.1686

#at 95% confidence level the p value is greater than alpha which means we will fail to reject the null. This is evedince that there is no difference for average weight gained between young and mature moms.

#4
by(nc$mage, nc$mature,summary)

## nc$mature: mature mom
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   35.00   35.00   37.00   37.18   38.00   50.00 
## ------------------------------------------------------------ 
## nc$mature: younger mom
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   13.00   21.00   25.00   25.44   30.00   34.00

#The age cutoff for young moms is 34 which means that mature moms start at 35. This was all given by the command "by".This command lists the summaries of both variebles and gives us the max and min values.

#5
#y = number of visits to the hospital. 
#x = marital status
#Is the number of visits to the hospital different for married and non married mothers?
#H0: mu_married = mu_not-married
#HA: mu_married =! mu_not-married
inference(y = nc$visits, x = nc$marital, est = "mean", type = "ht", null = 0, alternative = "twosided", method = "theoretical")

## Response variable: numerical, Explanatory variable: categorical
## Difference between two means
## Summary statistics:
## n_married = 380, mean_married = 10.9553, sd_married = 4.2408
## n_not married = 611, mean_not married = 12.82, sd_not married = 3.5883

## Observed difference between means (married-not married) = -1.8647
## 
## H0: mu_married - mu_not married = 0 
## HA: mu_married - mu_not married != 0 
## Standard error = 0.262 
## Test statistic: Z =  -7.13 
## p-value =  0

#we reject the null because pvalue = 0, therefore there is a difference betweeen visits to the hospital for married and non married mothers.


#Milestone
#1
poke = read.csv("Pokemon.csv")
n = 40
#CL = 95%
#H0: mu = 50    average speed of a pokemon= 50
#HA: mu < 50    average speed =! 50
pokebug <- subset(poke, Type.1 == "Bug")
sampbug <- sample(pokebug$Speed, n)
muspeed <- mean(sampbug)
#independent
hist(sampbug)#normal

t.test(sampbug, mu = 50, alternative = "less")

## 
##  One Sample t-test
## 
## data:  sampbug
## t = 3.4235, df = 39, p-value = 0.9993
## alternative hypothesis: true mean is less than 50
## 95 percent confidence interval:
##      -Inf 77.26896
## sample estimates:
## mean of x 
##    68.275

#we fail to reject the null because p value is greater than alpha, therefore at 95% confidence level there is no evidence that the average speed of bug pokemon is less than 50.

#2
poke = read.csv("Pokemon.csv")
n = 40
#CL = 95%
#H0: mu_L = mu_nL     average HP of legendary Pokemon is equal to average HP of non-legendary Pokemon
#Ha: mu_L > mu_nL     ----is greater than----
pokeL <- subset(poke, Legendary == "TRUE")
pokenL <- subset(poke, Legendary == "FALSE")
#sampL <- sample(pokeL$HP, n)
#sampnL <- sample(pokenL$HP, n)
#independent
#hist(sampL) im getting an error when trying to publish
#hist(sampnL)
#normal distribution 
#t.test(sampL,sampnL, alternative = "greater")
#pvalue is less than alpha therefore we reject the null hypothesis. Atn 95% there is convincing evidence that the Hp of a legendary pokemon is greater than a non-legendary pokemon.

lab8zzzzz.R

C21Zhivko.Kolevski.m

2020-04-05