Inferentional Statistics

we use inferential statistics to try to infer from the sample data what the population might think. Or, we use inferential statistics to make judgments of the probability that an observed difference between groups is a dependable one or one that might have happened by chance in this study.

Hypothesis test..

In Hypothesis test, there are 2 types of Hypotheis.
H0(Null Hypothesis ):its is already existing thing.
H1(Alternate Hypothesis):this is what we have to prove.

In this Hypothesis testing we use 
"t-test","z-test","Anova","F-test","chi-square"
to prove wheather Ho is correct (or) H1 is correct.
data<-read.csv(file.choose(),header = T)   # file.choose() used to point that file
data
##    Names Age   ID Gender Height     Region    Profit Talkitive
## 1 sujith  23 4567   male   5.11        A.P   41.9136       yes
## 2  kumar  24 3456   male   5.30     Andhra  219.5820        no
## 3  atanu  25 2153   male   5.50    Kolkata    6.8714        no
## 4  karum  27 2134   male   4.50 Pondichary -383.0310       yes
## 5   ravi  26 3214   male   5.40  Hyderabad    2.5164       yes
## 6  pavan  25 2614   male   3.40 Vijayavada   14.1694        no
## 7 shyama  22 2874 female   5.00     Kerala    1.9656       yes
## 8  pream  24 1234   male   2.10 Tamil nadu   90.7152       yes
attach(data)
names(data)
## [1] "Names"     "Age"       "ID"        "Gender"    "Height"    "Region"   
## [7] "Profit"    "Talkitive"
class(Profit)
## [1] "numeric"

t-test(1 and 2 sample)

boxplot(Age)

## H0:mu< 24
## one side 95% confidence interval for mu

t.test(Age,mu=24,alternative = "less",conf.level = 0.95)
## 
##  One Sample t-test
## 
## data:  Age
## t = 0.88192, df = 7, p-value = 0.7965
## alternative hypothesis: true mean is less than 24
## 95 percent confidence interval:
##      -Inf 25.57413
## sample estimates:
## mean of x 
##      24.5
# two-sided

t.test(Age,mu=24,alternative = "two.sided",conf.level = 0.95) #2 sided if we not specify it will default take as 2 sided.
## 
##  One Sample t-test
## 
## data:  Age
## t = 0.88192, df = 7, p-value = 0.4071
## alternative hypothesis: true mean is not equal to 24
## 95 percent confidence interval:
##  23.15938 25.84062
## sample estimates:
## mean of x 
##      24.5
# 2 sample
boxplot(Age~Talkitive)

##H0:mean age of people = of talkitive
## 2 sided test

t.test(Age~Talkitive,mu=0,alt="two.sided",conf=0.95,var.equal=F,paired=F)
## 
##  Welch Two Sample t-test
## 
## data:  Age by Talkitive
## t = 0.2706, df = 4.9356, p-value = 0.7976
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.276488  2.809822
## sample estimates:
##  mean in group no mean in group yes 
##          24.66667          24.40000

u test

approximate for examining the diff in MEDION for 2 independent populations..

wilcox.test(Age~Talkitive,mu=0,alt="two.sided",conf.int=T,conf.level=0.95,paried=F,exact=T,correct=T)
## Warning in wilcox.test.default(x = c(24L, 25L, 25L), y = c(23L, 27L, 26L, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(24L, 25L, 25L), y = c(23L, 27L, 26L, :
## cannot compute exact confidence intervals with ties
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Age by Talkitive
## W = 8.5, p-value = 0.8801
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  -3  3
## sample estimates:
## difference in location 
##              0.9999889
wilcox.test(Age~Talkitive,mu=0,alt="two.sided",conf.int=T,conf.level=0.95,paried=F,exact=F,correct=T)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Age by Talkitive
## W = 8.5, p-value = 0.8801
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  -3  3
## sample estimates:
## difference in location 
##              0.9999889

Anova

boxplot(Age~Height)

aov(Age~Height)
## Call:
##    aov(formula = Age ~ Height)
## 
## Terms:
##                    Height Residuals
## Sum of Squares   0.000421 17.999579
## Deg. of Freedom         1         6
## 
## Residual standard error: 1.732031
## Estimated effects may be unbalanced
summary(aov(Age~Height))
##             Df Sum Sq Mean Sq F value Pr(>F)
## Height       1      0   4e-04       0  0.991
## Residuals    6     18   3e+00

chi-square

TAB<-table(Gender,Talkitive)
TAB
##         Talkitive
## Gender   no yes
##   female  0   1
##   male    3   4
barplot(TAB,beside = T,legend=T)

chisq.test(TAB,correct = T)
## Warning in chisq.test(TAB, correct = T): Chi-squared approximation may be
## incorrect
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  TAB
## X-squared = 0, df = 1, p-value = 1

correlation test

plot(Age,Height,main = "scatterplot",las=1)

cor(Age,Height)
## [1] -0.004837542
cor(Age,Height,method = "pearson")
## [1] -0.004837542
cor(Age,Height,method = "spearman")
## [1] 0.1084416
cor(Age,Height,method = "kendall")
## [1] 0.1482499
cor.test(Age,Height,method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  Age and Height
## t = -0.01185, df = 6, p-value = 0.9909
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.7071000  0.7022292
## sample estimates:
##          cor 
## -0.004837542
cor.test(Age,Height,method = "pearson",alt="greater",conf.level = 0.99) #2sided
## 
##  Pearson's product-moment correlation
## 
## data:  Age and Height
## t = -0.01185, df = 6, p-value = 0.5045
## alternative hypothesis: true correlation is greater than 0
## 99 percent confidence interval:
##  -0.7799379  1.0000000
## sample estimates:
##          cor 
## -0.004837542

covariance

cov(Age,Height)
## [1] -0.009285714
pairs(data)

Linear Regression

mod<-lm(Height~Age)                 #x=age,y=height
summary(mod)
## 
## Call:
## lm(formula = Height ~ Age)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4406 -0.3065  0.5090  0.7863  0.9631 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept)  4.627222   7.480212   0.619    0.559
## Age         -0.003611   0.304744  -0.012    0.991
## 
## Residual standard error: 1.293 on 6 degrees of freedom
## Multiple R-squared:  2.34e-05,   Adjusted R-squared:  -0.1666 
## F-statistic: 0.0001404 on 1 and 6 DF,  p-value: 0.9909
plot(Age,Height,main = "scatterplot",las=1)
abline(mod)

confint(mod,level = 0.99)
##                  0.5 %    99.5 %
## (Intercept) -23.105127 32.359571
## Age          -1.133428  1.126206
anova(mod)
## Analysis of Variance Table
## 
## Response: Height
##           Df  Sum Sq Mean Sq F value Pr(>F)
## Age        1  0.0002 0.00023   1e-04 0.9909
## Residuals  6 10.0299 1.67164

END