Inferentional Statistics
we use inferential statistics to try to infer from the sample data what the population might think. Or, we use inferential statistics to make judgments of the probability that an observed difference between groups is a dependable one or one that might have happened by chance in this study.
Hypothesis test..
In Hypothesis test, there are 2 types of Hypotheis.
H0(Null Hypothesis ):its is already existing thing.
H1(Alternate Hypothesis):this is what we have to prove.
In this Hypothesis testing we use
"t-test","z-test","Anova","F-test","chi-square"
to prove wheather Ho is correct (or) H1 is correct.
data<-read.csv(file.choose(),header = T) # file.choose() used to point that file
data
## Names Age ID Gender Height Region Profit Talkitive
## 1 sujith 23 4567 male 5.11 A.P 41.9136 yes
## 2 kumar 24 3456 male 5.30 Andhra 219.5820 no
## 3 atanu 25 2153 male 5.50 Kolkata 6.8714 no
## 4 karum 27 2134 male 4.50 Pondichary -383.0310 yes
## 5 ravi 26 3214 male 5.40 Hyderabad 2.5164 yes
## 6 pavan 25 2614 male 3.40 Vijayavada 14.1694 no
## 7 shyama 22 2874 female 5.00 Kerala 1.9656 yes
## 8 pream 24 1234 male 2.10 Tamil nadu 90.7152 yes
attach(data)
names(data)
## [1] "Names" "Age" "ID" "Gender" "Height" "Region"
## [7] "Profit" "Talkitive"
class(Profit)
## [1] "numeric"
t-test(1 and 2 sample)
boxplot(Age)

## H0:mu< 24
## one side 95% confidence interval for mu
t.test(Age,mu=24,alternative = "less",conf.level = 0.95)
##
## One Sample t-test
##
## data: Age
## t = 0.88192, df = 7, p-value = 0.7965
## alternative hypothesis: true mean is less than 24
## 95 percent confidence interval:
## -Inf 25.57413
## sample estimates:
## mean of x
## 24.5
# two-sided
t.test(Age,mu=24,alternative = "two.sided",conf.level = 0.95) #2 sided if we not specify it will default take as 2 sided.
##
## One Sample t-test
##
## data: Age
## t = 0.88192, df = 7, p-value = 0.4071
## alternative hypothesis: true mean is not equal to 24
## 95 percent confidence interval:
## 23.15938 25.84062
## sample estimates:
## mean of x
## 24.5
# 2 sample
boxplot(Age~Talkitive)

##H0:mean age of people = of talkitive
## 2 sided test
t.test(Age~Talkitive,mu=0,alt="two.sided",conf=0.95,var.equal=F,paired=F)
##
## Welch Two Sample t-test
##
## data: Age by Talkitive
## t = 0.2706, df = 4.9356, p-value = 0.7976
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.276488 2.809822
## sample estimates:
## mean in group no mean in group yes
## 24.66667 24.40000
u test
approximate for examining the diff in MEDION for 2 independent populations..
wilcox.test(Age~Talkitive,mu=0,alt="two.sided",conf.int=T,conf.level=0.95,paried=F,exact=T,correct=T)
## Warning in wilcox.test.default(x = c(24L, 25L, 25L), y = c(23L, 27L, 26L, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(24L, 25L, 25L), y = c(23L, 27L, 26L, :
## cannot compute exact confidence intervals with ties
##
## Wilcoxon rank sum test with continuity correction
##
## data: Age by Talkitive
## W = 8.5, p-value = 0.8801
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## -3 3
## sample estimates:
## difference in location
## 0.9999889
wilcox.test(Age~Talkitive,mu=0,alt="two.sided",conf.int=T,conf.level=0.95,paried=F,exact=F,correct=T)
##
## Wilcoxon rank sum test with continuity correction
##
## data: Age by Talkitive
## W = 8.5, p-value = 0.8801
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## -3 3
## sample estimates:
## difference in location
## 0.9999889
Anova
boxplot(Age~Height)

aov(Age~Height)
## Call:
## aov(formula = Age ~ Height)
##
## Terms:
## Height Residuals
## Sum of Squares 0.000421 17.999579
## Deg. of Freedom 1 6
##
## Residual standard error: 1.732031
## Estimated effects may be unbalanced
summary(aov(Age~Height))
## Df Sum Sq Mean Sq F value Pr(>F)
## Height 1 0 4e-04 0 0.991
## Residuals 6 18 3e+00
chi-square
TAB<-table(Gender,Talkitive)
TAB
## Talkitive
## Gender no yes
## female 0 1
## male 3 4
barplot(TAB,beside = T,legend=T)

chisq.test(TAB,correct = T)
## Warning in chisq.test(TAB, correct = T): Chi-squared approximation may be
## incorrect
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: TAB
## X-squared = 0, df = 1, p-value = 1
correlation test
plot(Age,Height,main = "scatterplot",las=1)

cor(Age,Height)
## [1] -0.004837542
cor(Age,Height,method = "pearson")
## [1] -0.004837542
cor(Age,Height,method = "spearman")
## [1] 0.1084416
cor(Age,Height,method = "kendall")
## [1] 0.1482499
cor.test(Age,Height,method = "pearson")
##
## Pearson's product-moment correlation
##
## data: Age and Height
## t = -0.01185, df = 6, p-value = 0.9909
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.7071000 0.7022292
## sample estimates:
## cor
## -0.004837542
cor.test(Age,Height,method = "pearson",alt="greater",conf.level = 0.99) #2sided
##
## Pearson's product-moment correlation
##
## data: Age and Height
## t = -0.01185, df = 6, p-value = 0.5045
## alternative hypothesis: true correlation is greater than 0
## 99 percent confidence interval:
## -0.7799379 1.0000000
## sample estimates:
## cor
## -0.004837542
covariance
cov(Age,Height)
## [1] -0.009285714
pairs(data)

Linear Regression
mod<-lm(Height~Age) #x=age,y=height
summary(mod)
##
## Call:
## lm(formula = Height ~ Age)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.4406 -0.3065 0.5090 0.7863 0.9631
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.627222 7.480212 0.619 0.559
## Age -0.003611 0.304744 -0.012 0.991
##
## Residual standard error: 1.293 on 6 degrees of freedom
## Multiple R-squared: 2.34e-05, Adjusted R-squared: -0.1666
## F-statistic: 0.0001404 on 1 and 6 DF, p-value: 0.9909
plot(Age,Height,main = "scatterplot",las=1)
abline(mod)

confint(mod,level = 0.99)
## 0.5 % 99.5 %
## (Intercept) -23.105127 32.359571
## Age -1.133428 1.126206
anova(mod)
## Analysis of Variance Table
##
## Response: Height
## Df Sum Sq Mean Sq F value Pr(>F)
## Age 1 0.0002 0.00023 1e-04 0.9909
## Residuals 6 10.0299 1.67164
END