# M. SAI TEJA # P301116CBA230
# Business Statistics 2 Assignemnt 2
# Q1. Student t-test
d1<-c(1.3,1.4,1.7,1.9,1.5,1.8)
d2<-c(1.2,1.3,1.8,2.0,1.4,1.7)
t.test(d1,d2)
##
## Welch Two Sample t-test
##
## data: d1 and d2
## t = 0.20761, df = 9.2928, p-value = 0.84
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.3281293 0.3947960
## sample estimates:
## mean of x mean of y
## 1.600000 1.566667
# Q2. Two-Sample t-Test with Unequal Variance
d3<-c(1.312,1.41,1.73,1.93,1.15,1.92)
d4<-c(1.29,1.34,1.83,2.20,1.41,2.13)
t.test(d3,d4)
##
## Welch Two Sample t-test
##
## data: d3 and d4
## t = -0.58133, df = 9.5838, p-value = 0.5744
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.6053236 0.3559903
## sample estimates:
## mean of x mean of y
## 1.575333 1.700000
# Q3.Two-Sample t-Test with Equal Variance
t.test(d1,d2,var.equal = TRUE)
##
## Two Sample t-test
##
## data: d1 and d2
## t = 0.20761, df = 10, p-value = 0.8397
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.3244046 0.3910713
## sample estimates:
## mean of x mean of y
## 1.600000 1.566667
#Q4. One-Sample t-Testing
t.test(d3, mu = 1.3)
##
## One Sample t-test
##
## data: d3
## t = 2.0407, df = 5, p-value = 0.09678
## alternative hypothesis: true mean is not equal to 1.3
## 95 percent confidence interval:
## 1.228513 1.922153
## sample estimates:
## mean of x
## 1.575333
# Q.5 Using Directional Hypotheses
# we will use alternative commanad
t.test(d3, mu = 1.4, alternative = 'greater')
##
## One Sample t-test
##
## data: d3
## t = 1.2995, df = 5, p-value = 0.1252
## alternative hypothesis: true mean is greater than 1.4
## 95 percent confidence interval:
## 1.303465 Inf
## sample estimates:
## mean of x
## 1.575333
# Q.6 Formula Syntax and Subsetting Samples in the t-Test( Grouping by two levels)
data("mtcars")
str(mtcars)
## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
t.test(cyl~ am, data = mtcars)
##
## Welch Two Sample t-test
##
## data: cyl by am
## t = 3.3541, df = 25.854, p-value = 0.002465
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.7238488 3.0170419
## sample estimates:
## mean in group 0 mean in group 1
## 6.947368 5.076923
#Q. 7 The Wilcoxon U-Test (Mann-Whitney)
# Ans: two samples to compare and your data are non-parametric, you can use the U-test.
#This goes by various names and may be known as the Mann-Whitney U-test or Wilcoxon sign rank
#test. You use the wilcox.test() command to carry out the analysis
#Q 8. Two-Sample U-Test
wilcox.test(d3, d2)
##
## Wilcoxon rank sum test
##
## data: d3 and d2
## W = 19, p-value = 0.9372
## alternative hypothesis: true location shift is not equal to 0
#One-Sample U-Test
wilcox.test(d3, exact = FALSE)
##
## Wilcoxon signed rank test with continuity correction
##
## data: d3
## V = 21, p-value = 0.03603
## alternative hypothesis: true location is not equal to 0
#Using Directional Hypotheses
wilcox.test(d3, mu = 1.4, exact = FALSE, conf.int = TRUE, alt = 'less')
##
## Wilcoxon signed rank test with continuity correction
##
## data: d3
## V = 16, p-value = 0.8958
## alternative hypothesis: true location is less than 1.4
## 95 percent confidence interval:
## -Inf 1.920055
## sample estimates:
## (pseudo)median
## 1.57002
# Formula Syntax and Subsetting Samples in the U-test
# formula syntax to describe the situation and carry out the wilcox.test() on your data.
#This is much the same method you used for the t-test previously.
T#he basic form of the command becomes:
## [1] TRUE
#wilcox.test(response ~ predictor, data = data)
wilcox.test(cyl ~ am, data = mtcars, exact = FALSE)
##
## Wilcoxon rank sum test with continuity correction
##
## data: cyl by am
## W = 194, p-value = 0.0039
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(cyl ~ am, data = mtcars, subset = am %in% c('0', '1'),exact = F)
##
## Wilcoxon rank sum test with continuity correction
##
## data: cyl by am
## W = 194, p-value = 0.0039
## alternative hypothesis: true location shift is not equal to 0
# paiRed t- and u-teStS
wilcox.test(mtcars$cyl, mtcars$am, exact = FALSE, paired = TRUE)
##
## Wilcoxon signed rank test with continuity correction
##
## data: mtcars$cyl and mtcars$am
## V = 528, p-value = 6.709e-07
## alternative hypothesis: true location shift is not equal to 0
t.test(cyl~vs, data = mtcars, paired = FALSE, mu = 1, conf.level = 0.99)
##
## Welch Two Sample t-test
##
## data: cyl by vs
## t = 5.0763, df = 29.905, p-value = 1.894e-05
## alternative hypothesis: true difference in means is not equal to 1
## 99 percent confidence interval:
## 1.858121 3.887911
## sample estimates:
## mean in group 0 mean in group 1
## 7.444444 4.571429
# CORRelation and covaRiance
# cor(x, y = NULL) ---Carries out a basic correlation between x and y If x is a matrix or data frame, y can be omitted
# Simple Correlation
cor(d1,d2)
## [1] 0.9686196
cor(d2,d3)
## [1] 0.8617134
#Covariance
cov(d2,d3)
## [1] 0.08945333
cov(mtcars$mpg,mtcars$cyl)
## [1] -9.172379
# Significance Testing in Correlation Tests
cor.test(d2,d3)
##
## Pearson's product-moment correlation
##
## data: d2 and d3
## t = 3.3966, df = 4, p-value = 0.02736
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1668031 0.9846654
## sample estimates:
## cor
## 0.8617134
# Formula Syntax
cor.test(~ mpg + cyl, data = mtcars, method = 'spearman', exact = F)
##
## Spearman's rank correlation rho
##
## data: mpg and cyl
## S = 10425, p-value = 4.69e-13
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.9108013
# teStS FoR aSSociation
# categorical data for associations between categories by using the chisquared
# test. Routines to achieve this are accessed using the chisq.test()
# chisq.test(x, y = NULL) A basic chi-squared test is carried out on a matrix or data frame If
#x is provided as a vector, a second vector can be supplied If x is a
# single vector and y is not given, a goodness of fit test is carried out
# Multiple Categories: Chi-Squared Tests
data("women")
x<-chisq.test(women)
summary(x)
## Length Class Mode
## statistic 1 -none- numeric
## parameter 1 -none- numeric
## p.value 1 -none- numeric
## method 1 -none- character
## data.name 1 -none- character
## observed 30 -none- numeric
## expected 30 -none- numeric
## residuals 30 -none- numeric
## stdres 30 -none- numeric
# Monte Carlo Simulation
chisq.test(women, simulate.p.value = TRUE, B = 130)
##
## Pearson's Chi-squared test with simulated p-value (based on 130
## replicates)
##
## data: women
## X-squared = 1.2297, df = NA, p-value = 1
# Yates' Correction for 2 n 2 Tables
y<-as.data.frame(table(mtcars$am,mtcars$vs))
d<-table(mtcars$vs,mtcars$am) # creating table
chisq.test(d)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: d
## X-squared = 0.34754, df = 1, p-value = 0.5555
chisq.test(d,correct = FALSE)
##
## Pearson's Chi-squared test
##
## data: d
## X-squared = 0.90688, df = 1, p-value = 0.3409
# Single Category: Goodness of Fit Tests
# chisq.test() command to carry out a goodness of fit test. In this case we
#have two vectors of numerical values, one representing the observed values and the other representing
#the expected ratio of values. The goodness of fit tests the data against the ratios (probabilities)
chisq.test(women$height, p = women$weight, rescale.p = TRUE)
##
## Chi-squared test for given probabilities
##
## data: women$height
## X-squared = 1.8076, df = 14, p-value = 1
# summary of chi-square test
summary(chisq.test(women$height, p = women$weight, rescale.p = TRUE))
## Length Class Mode
## statistic 1 -none- numeric
## parameter 1 -none- numeric
## p.value 1 -none- numeric
## method 1 -none- character
## data.name 1 -none- character
## observed 15 -none- numeric
## expected 15 -none- numeric
## residuals 15 -none- numeric
## stdres 15 -none- numeric
# 1. t-test can be carried out using the t.test() command.
# This can conduct one- or twosample tests and a range of options allow one-tailed and two-tailed tests.
#2.t-test is accessed via the wilcox.test() command. This non-parametric test of differences
#can be applied as one-sample or two-sample versions.
#3.Matched paired data can be analyzed using t-test or U-test by the simple addition of the
#paired = TRUE instruction in the t.test() or wilcox.test() commands.
# 4. he subset instruction can be used to select one or more samples from a variable containing
#several groups.
#5. Correlation and covariance can be carried out on pairs of vectors, or on entire data frames
#or matrix objects using the cor() and cov() commands. A single variable can be specified to
#produce a targeted correlation or covariance matrix.
#6. hree types of correlation can be used; Pearson's Product Moment,
#Spearman's rho or Kendall's tau.
#7.Correlation hypothesis tests can be carried out using Pearson, Spearman, or Kendall methods
#via the cor.test() command.
#Two variables can be specified as separate vectors or using the formula syntax.
#8. tests using categorical data can be carried out via the chisq.test() command. This can
#conduct standard tests of association (chi-squared tests) or goodness of fit tests.
#Monte Carlo simulation can be used to produce the p-value.