data("trees")
trees
## Girth Height Volume
## 1 8.3 70 10.3
## 2 8.6 65 10.3
## 3 8.8 63 10.2
## 4 10.5 72 16.4
## 5 10.7 81 18.8
## 6 10.8 83 19.7
## 7 11.0 66 15.6
## 8 11.0 75 18.2
## 9 11.1 80 22.6
## 10 11.2 75 19.9
## 11 11.3 79 24.2
## 12 11.4 76 21.0
## 13 11.4 76 21.4
## 14 11.7 69 21.3
## 15 12.0 75 19.1
## 16 12.9 74 22.2
## 17 12.9 85 33.8
## 18 13.3 86 27.4
## 19 13.7 71 25.7
## 20 13.8 64 24.9
## 21 14.0 78 34.5
## 22 14.2 80 31.7
## 23 14.5 74 36.3
## 24 16.0 72 38.3
## 25 16.3 77 42.6
## 26 17.3 81 55.4
## 27 17.5 82 55.7
## 28 17.9 80 58.3
## 29 18.0 80 51.5
## 30 18.0 80 51.0
## 31 20.6 87 77.0
trees$Volume
## [1] 10.3 10.3 10.2 16.4 18.8 19.7 15.6 18.2 22.6 19.9 24.2 21.0 21.4 21.3
## [15] 19.1 22.2 33.8 27.4 25.7 24.9 34.5 31.7 36.3 38.3 42.6 55.4 55.7 58.3
## [29] 51.5 51.0 77.0
head(trees)
## Girth Height Volume
## 1 8.3 70 10.3
## 2 8.6 65 10.3
## 3 8.8 63 10.2
## 4 10.5 72 16.4
## 5 10.7 81 18.8
## 6 10.8 83 19.7
mean(trees$Volume)
## [1] 30.17097
median(trees$Volume)
## [1] 24.2
table(trees$Volume)
##
## 10.2 10.3 15.6 16.4 18.2 18.8 19.1 19.7 19.9 21 21.3 21.4 22.2 22.6 24.2
## 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1
## 24.9 25.7 27.4 31.7 33.8 34.5 36.3 38.3 42.6 51 51.5 55.4 55.7 58.3 77
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
max(table(trees$Volume))
## [1] 2
sort(table(trees$Volume))
##
## 10.2 15.6 16.4 18.2 18.8 19.1 19.7 19.9 21 21.3 21.4 22.2 22.6 24.2 24.9
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 25.7 27.4 31.7 33.8 34.5 36.3 38.3 42.6 51 51.5 55.4 55.7 58.3 77 10.3
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2
Mode <- function(x) {
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
Mode(trees$Volume)
## [1] 10.3
ajayfunction=function(x){x^3+3*x}
ajayfunction(10)
## [1] 1030
length(trees$Volume)
## [1] 31
gm_mean = function(x, na.rm=TRUE){
exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x))
}
gm_mean(trees$Volume)
## [1] 26.38331
x=trees$Volume
w=rep(1,length(x))
weighted.mean(x, w)
## [1] 30.17097
plot(density(trees$Volume))

range(trees$Volume)
## [1] 10.2 77.0
attach(trees)
sd(Volume)
## [1] 16.43785
x=c(1:4)
x
## [1] 1 2 3 4
gm_x=(prod(x)^1/4)
gm_x
## [1] 6
prod(Volume)
## [1] 1.151355e+44
prod(Volume)^(1/length(Volume))
## [1] 26.38331
quantile(Volume)
## 0% 25% 50% 75% 100%
## 10.2 19.4 24.2 37.3 77.0
boxplot(Volume)

plot(density(Volume))

hist(Volume,breaks = 4)

quantile(Volume,seq(0,1,0.1))
## 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
## 10.2 15.6 18.8 19.9 21.4 24.2 27.4 34.5 42.6 55.4 77.0
hist(Volume,breaks = 10)

summary(Volume)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 10.20 19.40 24.20 30.17 37.30 77.00
#??kurtosis
#install.packages("e1071")
library(e1071)
skewness(Volume)
## [1] 1.013274
kurtosis(Volume)
## [1] 0.2460393
rnorm(100,10,5)
## [1] 18.8424045 5.4155681 5.3193081 11.9121195 18.5425150 6.6568848
## [7] 1.6142246 1.2084215 11.6041001 10.3628070 14.2758711 2.9703578
## [13] -3.1526082 9.3511963 12.2022894 14.6378875 15.2238515 0.7362240
## [19] 1.9607277 21.8211432 8.3135191 7.0488950 2.2637027 14.5175621
## [25] 6.0758972 -0.8179548 7.8068657 5.5803139 6.0446307 9.6754397
## [31] 11.4877983 9.9609437 9.8149807 9.1680792 10.7191439 20.8768796
## [37] -2.9360097 5.6745919 16.4981872 5.4391386 7.6356364 9.6324293
## [43] 8.6520694 18.5488492 12.1837757 8.2865357 13.6721397 21.7520445
## [49] 3.9871210 1.5706777 7.7689042 7.1975114 14.4595124 11.9492685
## [55] 6.3135113 13.0682876 4.9177680 8.1536626 5.3463528 13.3107549
## [61] 11.3796600 15.2743601 13.1337463 1.0364682 8.2324989 6.0025915
## [67] 5.0212346 8.7506350 9.0498944 15.9413606 8.4534871 10.6244326
## [73] 13.1010427 17.6375854 13.0025060 4.6311517 11.1564321 9.4277241
## [79] 11.7767305 6.8871153 9.6228834 0.6910083 16.3358258 19.7441994
## [85] 17.1290247 14.7113160 9.8845957 13.7652695 7.7375719 13.9973694
## [91] 18.4388432 9.9029991 7.7050429 8.9610156 18.0190494 2.8330654
## [97] 8.3105953 2.0803874 16.5235927 8.5892795
plot(rnorm(100,10,5))

plot(sort(rnorm(100,10,5)))

plot(density(rnorm(1000)))
sample(c("Head","Tail"),20,T)
## [1] "Head" "Head" "Head" "Head" "Tail" "Head" "Tail" "Tail" "Head" "Tail"
## [11] "Head" "Head" "Tail" "Head" "Head" "Tail" "Head" "Head" "Tail" "Tail"
rbinom(10,2,0.5)
## [1] 0 1 2 1 0 2 2 2 1 1
table(rbinom(20,2,0.33))
##
## 0 1 2
## 5 11 4
#http://stats.idre.ucla.edu/stata/whatstat/what-statistical-analysis-should-i-usestatistical-analyses-using-stata/
#install.packages("candisc")
library(candisc)
## Loading required package: car
## Loading required package: heplots
##
## Attaching package: 'candisc'
## The following object is masked from 'package:stats':
##
## cancor

#?HSB
data("HSB")
str(HSB)
## 'data.frame': 600 obs. of 15 variables:
## $ id : num 55 114 490 44 26 510 133 213 548 309 ...
## $ gender : Factor w/ 2 levels "male","female": 2 1 1 2 2 1 2 2 2 2 ...
## $ race : Factor w/ 4 levels "hispanic","asian",..: 1 3 4 1 1 4 3 4 4 4 ...
## $ ses : Factor w/ 3 levels "low","middle",..: 1 2 2 1 2 2 1 1 2 3 ...
## $ sch : Factor w/ 2 levels "public","private": 1 1 1 1 1 1 1 1 2 1 ...
## $ prog : Factor w/ 3 levels "general","academic",..: 1 2 3 3 2 3 3 1 2 1 ...
## $ locus : num -1.78 0.24 -1.28 0.22 1.12 ...
## $ concept: num 0.56 -0.35 0.34 -0.76 -0.74 ...
## $ mot : num 1 1 0.33 1 0.67 ...
## $ career : Factor w/ 17 levels "clerical","craftsman",..: 9 8 9 15 15 8 14 1 10 10 ...
## $ read : num 28.3 30.5 31 31 31 ...
## $ write : num 46.3 35.9 35.9 41.1 41.1 ...
## $ math : num 42.8 36.9 46.1 49.2 36 ...
## $ sci : num 44.4 33.6 39 33.6 36.9 ...
## $ ss : num 50.6 40.6 45.6 35.6 45.6 ...
summary(HSB)
## id gender race ses
## Min. : 1.0 male :273 hispanic : 71 low :139
## 1st Qu.:150.8 female:327 asian : 34 middle:299
## Median :300.5 african-amer: 58 high :162
## Mean :300.5 white :437
## 3rd Qu.:450.2
## Max. :600.0
##
## sch prog locus concept
## public :506 general :145 Min. :-2.23000 Min. :-2.620000
## private: 94 academic:308 1st Qu.:-0.37250 1st Qu.:-0.300000
## vocation:147 Median : 0.21000 Median : 0.030000
## Mean : 0.09653 Mean : 0.004917
## 3rd Qu.: 0.51000 3rd Qu.: 0.440000
## Max. : 1.36000 Max. : 1.190000
##
## mot career read write
## Min. :0.0000 prof1 :161 Min. :28.3 Min. :25.50
## 1st Qu.:0.3300 prof2 : 94 1st Qu.:44.2 1st Qu.:44.30
## Median :0.6700 clerical : 50 Median :52.1 Median :54.10
## Mean :0.6608 craftsman: 39 Mean :51.9 Mean :52.38
## 3rd Qu.:1.0000 technical: 36 3rd Qu.:60.1 3rd Qu.:59.90
## Max. :1.0000 homemaker: 33 Max. :76.0 Max. :67.10
## (Other) :187
## math sci ss
## Min. :31.80 Min. :26.00 Min. :25.70
## 1st Qu.:44.50 1st Qu.:44.40 1st Qu.:45.60
## Median :51.30 Median :52.60 Median :50.60
## Mean :51.85 Mean :51.76 Mean :52.05
## 3rd Qu.:58.38 3rd Qu.:58.65 3rd Qu.:60.50
## Max. :75.50 Max. :74.20 Max. :70.50
##
t.test(HSB$write,mu=50)# Ho: mu=50
##
## One Sample t-test
##
## data: HSB$write
## t = 6.0059, df = 599, p-value = 3.301e-09
## alternative hypothesis: true mean is not equal to 50
## 95 percent confidence interval:
## 51.60499 53.16467
## sample estimates:
## mean of x
## 52.38483
wilcox.test(HSB$write, mu=50)
##
## Wilcoxon signed rank test with continuity correction
##
## data: HSB$write
## V = 118590, p-value = 2.121e-11
## alternative hypothesis: true location is not equal to 50
#binom.test(HSB$gender,mu=0.5)
#library(rattle)
attach(HSB)
t.test(write,read)
##
## Welch Two Sample t-test
##
## data: write and read
## t = 0.84362, df = 1196.3, p-value = 0.399
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.6402743 1.6062741
## sample estimates:
## mean of x mean of y
## 52.38483 51.90183
HSBf=HSB[gender=="female",]
HSBm=HSB[gender=="male",]
HSB[2,3]
## [1] african-amer
## Levels: hispanic asian african-amer white
HSB[2,]
## id gender race ses sch prog locus concept mot
## 2 114 male african-amer middle public academic 0.24 -0.35 1
## career read write math sci ss
## 2 operative 30.5 35.9 36.9 33.6 40.6
head(HSB)
## id gender race ses sch prog locus concept mot
## 1 55 female hispanic low public general -1.78 0.56 1.00
## 2 114 male african-amer middle public academic 0.24 -0.35 1.00
## 3 490 male white middle public vocation -1.28 0.34 0.33
## 4 44 female hispanic low public vocation 0.22 -0.76 1.00
## 5 26 female hispanic middle public academic 1.12 -0.74 0.67
## 6 510 male white middle public vocation -0.86 1.19 0.33
## career read write math sci ss
## 1 prof1 28.3 46.3 42.8 44.4 50.6
## 2 operative 30.5 35.9 36.9 33.6 40.6
## 3 prof1 31.0 35.9 46.1 39.0 45.6
## 4 service 31.0 41.1 49.2 33.6 35.6
## 5 service 31.0 41.1 36.0 36.9 45.6
## 6 operative 33.6 28.1 31.8 39.6 35.6
t.test(HSBf$write,HSBm$write)
##
## Welch Two Sample t-test
##
## data: HSBf$write and HSBm$write
## t = 6.0807, df = 540.78, p-value = 2.264e-09
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 3.227937 6.308770
## sample estimates:
## mean of x mean of y
## 54.55443 49.78608
t.test(HSBf$read,HSBm$read)
##
## Welch Two Sample t-test
##
## data: HSBf$read and HSBm$read
## t = -1.019, df = 572.67, p-value = 0.3087
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.477264 0.784896
## sample estimates:
## mean of x mean of y
## 51.51682 52.36300
#null hypothesis umean >=10000
xbar=9900
umean=10000
sd=120
n=30
z=(xbar-umean)/(sd/sqrt(n))
z
## [1] -4.564355
alpha=0.05
z.alpha=qnorm(1-alpha)
-z.alpha
## [1] -1.644854