#  M. SAI TEJA          # P301116CBA230


# Business Statistics 2 Assignemnt 2


# Q1.  Student t-test
d1<-c(1.3,1.4,1.7,1.9,1.5,1.8)
d2<-c(1.2,1.3,1.8,2.0,1.4,1.7)
t.test(d1,d2)
## 
##  Welch Two Sample t-test
## 
## data:  d1 and d2
## t = 0.20761, df = 9.2928, p-value = 0.84
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.3281293  0.3947960
## sample estimates:
## mean of x mean of y 
##  1.600000  1.566667
# Q2. Two-Sample t-Test with Unequal Variance
d3<-c(1.312,1.41,1.73,1.93,1.15,1.92)
d4<-c(1.29,1.34,1.83,2.20,1.41,2.13)
t.test(d3,d4)
## 
##  Welch Two Sample t-test
## 
## data:  d3 and d4
## t = -0.58133, df = 9.5838, p-value = 0.5744
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.6053236  0.3559903
## sample estimates:
## mean of x mean of y 
##  1.575333  1.700000
# Q3.Two-Sample t-Test with Equal Variance
t.test(d1,d2,var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  d1 and d2
## t = 0.20761, df = 10, p-value = 0.8397
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.3244046  0.3910713
## sample estimates:
## mean of x mean of y 
##  1.600000  1.566667
#Q4. One-Sample t-Testing
t.test(d3, mu = 1.3)
## 
##  One Sample t-test
## 
## data:  d3
## t = 2.0407, df = 5, p-value = 0.09678
## alternative hypothesis: true mean is not equal to 1.3
## 95 percent confidence interval:
##  1.228513 1.922153
## sample estimates:
## mean of x 
##  1.575333
# Q.5 Using Directional Hypotheses
# we will use alternative commanad
t.test(d3, mu = 1.4, alternative = 'greater')
## 
##  One Sample t-test
## 
## data:  d3
## t = 1.2995, df = 5, p-value = 0.1252
## alternative hypothesis: true mean is greater than 1.4
## 95 percent confidence interval:
##  1.303465      Inf
## sample estimates:
## mean of x 
##  1.575333
# Q.6  Formula Syntax and Subsetting Samples in the t-Test( Grouping by two levels)
data("mtcars")
str(mtcars)
## 'data.frame':    32 obs. of  11 variables:
##  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
##  $ disp: num  160 160 108 258 360 ...
##  $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
##  $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
##  $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
##  $ qsec: num  16.5 17 18.6 19.4 17 ...
##  $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
##  $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
##  $ carb: num  4 4 1 1 2 1 4 2 2 4 ...
t.test(cyl~ am, data = mtcars)
## 
##  Welch Two Sample t-test
## 
## data:  cyl by am
## t = 3.3541, df = 25.854, p-value = 0.002465
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.7238488 3.0170419
## sample estimates:
## mean in group 0 mean in group 1 
##        6.947368        5.076923
#Q. 7 The Wilcoxon U-Test (Mann-Whitney)
#  Ans: two samples to compare and your data are non-parametric, you can use the U-test.
#This goes by various names and may be known as the Mann-Whitney U-test or Wilcoxon sign rank
#test. You use the wilcox.test() command to carry out the analysis


#Q 8. Two-Sample U-Test
wilcox.test(d3, d2)
## 
##  Wilcoxon rank sum test
## 
## data:  d3 and d2
## W = 19, p-value = 0.9372
## alternative hypothesis: true location shift is not equal to 0
#One-Sample U-Test
wilcox.test(d3, exact = FALSE)
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  d3
## V = 21, p-value = 0.03603
## alternative hypothesis: true location is not equal to 0
#Using Directional Hypotheses
wilcox.test(d3, mu = 1.4, exact = FALSE, conf.int = TRUE, alt = 'less')
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  d3
## V = 16, p-value = 0.8958
## alternative hypothesis: true location is less than 1.4
## 95 percent confidence interval:
##      -Inf 1.920055
## sample estimates:
## (pseudo)median 
##        1.57002
# Formula Syntax and Subsetting Samples in the U-test

# formula syntax to describe the situation and carry out the wilcox.test() on your data. 
#This is much the same method you used for the t-test previously. 
T#he basic form of the command becomes:
## [1] TRUE
  #wilcox.test(response ~ predictor, data = data)
wilcox.test(cyl ~ am, data = mtcars, exact = FALSE)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  cyl by am
## W = 194, p-value = 0.0039
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(cyl ~ am, data = mtcars, subset = am %in% c('0', '1'),exact = F)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  cyl by am
## W = 194, p-value = 0.0039
## alternative hypothesis: true location shift is not equal to 0
# paiRed t- and u-teStS
wilcox.test(mtcars$cyl, mtcars$am, exact = FALSE, paired = TRUE)
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  mtcars$cyl and mtcars$am
## V = 528, p-value = 6.709e-07
## alternative hypothesis: true location shift is not equal to 0
t.test(cyl~vs, data = mtcars, paired = FALSE, mu = 1, conf.level = 0.99)
## 
##  Welch Two Sample t-test
## 
## data:  cyl by vs
## t = 5.0763, df = 29.905, p-value = 1.894e-05
## alternative hypothesis: true difference in means is not equal to 1
## 99 percent confidence interval:
##  1.858121 3.887911
## sample estimates:
## mean in group 0 mean in group 1 
##        7.444444        4.571429
# CORRelation and covaRiance

# cor(x, y = NULL)  ---Carries out a basic correlation between x and y If x is a matrix or data frame, y can be omitted

# Simple Correlation

cor(d1,d2)
## [1] 0.9686196
cor(d2,d3)
## [1] 0.8617134
#Covariance
cov(d2,d3)
## [1] 0.08945333
cov(mtcars$mpg,mtcars$cyl)
## [1] -9.172379
# Significance Testing in Correlation Tests

cor.test(d2,d3)
## 
##  Pearson's product-moment correlation
## 
## data:  d2 and d3
## t = 3.3966, df = 4, p-value = 0.02736
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1668031 0.9846654
## sample estimates:
##       cor 
## 0.8617134
# Formula Syntax

cor.test(~ mpg + cyl, data = mtcars, method = 'spearman', exact = F)
## 
##  Spearman's rank correlation rho
## 
## data:  mpg and cyl
## S = 10425, p-value = 4.69e-13
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.9108013
# teStS FoR aSSociation
# categorical data for  associations between categories by using the chisquared
# test. Routines to achieve this are accessed using the chisq.test()
# chisq.test(x, y = NULL) A basic chi-squared test is carried out on a matrix or data frame If
#x is provided as a vector, a second vector can be supplied If x is a
# single vector and y is not given, a goodness of fit test is carried out

# Multiple Categories: Chi-Squared Tests

data("women")
x<-chisq.test(women)
summary(x)
##           Length Class  Mode     
## statistic  1     -none- numeric  
## parameter  1     -none- numeric  
## p.value    1     -none- numeric  
## method     1     -none- character
## data.name  1     -none- character
## observed  30     -none- numeric  
## expected  30     -none- numeric  
## residuals 30     -none- numeric  
## stdres    30     -none- numeric
# Monte Carlo Simulation

chisq.test(women, simulate.p.value = TRUE, B = 130)
## 
##  Pearson's Chi-squared test with simulated p-value (based on 130
##  replicates)
## 
## data:  women
## X-squared = 1.2297, df = NA, p-value = 1
# Yates' Correction for 2 n 2 Tables

y<-as.data.frame(table(mtcars$am,mtcars$vs))

d<-table(mtcars$vs,mtcars$am)  # creating table

chisq.test(d)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  d
## X-squared = 0.34754, df = 1, p-value = 0.5555
chisq.test(d,correct = FALSE)
## 
##  Pearson's Chi-squared test
## 
## data:  d
## X-squared = 0.90688, df = 1, p-value = 0.3409
# Single Category: Goodness of Fit Tests

# chisq.test() command to carry out a goodness of fit test. In this case we
#have two vectors of numerical values, one representing the observed values and the other representing
#the expected ratio of values. The goodness of fit tests the data against the ratios (probabilities)

chisq.test(women$height, p = women$weight, rescale.p = TRUE)
## 
##  Chi-squared test for given probabilities
## 
## data:  women$height
## X-squared = 1.8076, df = 14, p-value = 1
# summary of chi-square test
summary(chisq.test(women$height, p = women$weight, rescale.p = TRUE))
##           Length Class  Mode     
## statistic  1     -none- numeric  
## parameter  1     -none- numeric  
## p.value    1     -none- numeric  
## method     1     -none- character
## data.name  1     -none- character
## observed  15     -none- numeric  
## expected  15     -none- numeric  
## residuals 15     -none- numeric  
## stdres    15     -none- numeric
# 1. t-test can be carried out using the t.test() command. 
# This can conduct one- or twosample tests and a range of options allow one-tailed and two-tailed tests.

#2.t-test is accessed via the wilcox.test() command. This non-parametric test of differences
 #can be applied as one-sample or two-sample versions.

#3.Matched paired data can be analyzed using t-test or U-test by the simple addition of the
#paired = TRUE instruction in the t.test() or wilcox.test() commands.

# 4. he subset instruction can be used to select one or more samples from a variable containing
#several groups.

#5. Correlation and covariance can be carried out on pairs of vectors, or on entire data frames
#or matrix objects using the cor() and cov() commands. A single variable can be specified to
#produce a targeted correlation or covariance matrix.

#6. hree types of correlation can be used; Pearson's Product Moment, 
#Spearman's rho or Kendall's tau.


#7.Correlation hypothesis tests can be carried out using Pearson, Spearman, or Kendall methods
#via the cor.test() command. 
#Two variables can be specified as separate vectors or using the formula syntax.


#8. tests using categorical data can be carried out via the chisq.test() command. This can
#conduct standard tests of association (chi-squared tests) or goodness of fit tests.
#Monte Carlo simulation can be used to produce the p-value.