stats_class

data("trees")
trees

##    Girth Height Volume
## 1    8.3     70   10.3
## 2    8.6     65   10.3
## 3    8.8     63   10.2
## 4   10.5     72   16.4
## 5   10.7     81   18.8
## 6   10.8     83   19.7
## 7   11.0     66   15.6
## 8   11.0     75   18.2
## 9   11.1     80   22.6
## 10  11.2     75   19.9
## 11  11.3     79   24.2
## 12  11.4     76   21.0
## 13  11.4     76   21.4
## 14  11.7     69   21.3
## 15  12.0     75   19.1
## 16  12.9     74   22.2
## 17  12.9     85   33.8
## 18  13.3     86   27.4
## 19  13.7     71   25.7
## 20  13.8     64   24.9
## 21  14.0     78   34.5
## 22  14.2     80   31.7
## 23  14.5     74   36.3
## 24  16.0     72   38.3
## 25  16.3     77   42.6
## 26  17.3     81   55.4
## 27  17.5     82   55.7
## 28  17.9     80   58.3
## 29  18.0     80   51.5
## 30  18.0     80   51.0
## 31  20.6     87   77.0

trees$Volume

##  [1] 10.3 10.3 10.2 16.4 18.8 19.7 15.6 18.2 22.6 19.9 24.2 21.0 21.4 21.3
## [15] 19.1 22.2 33.8 27.4 25.7 24.9 34.5 31.7 36.3 38.3 42.6 55.4 55.7 58.3
## [29] 51.5 51.0 77.0

head(trees)

##   Girth Height Volume
## 1   8.3     70   10.3
## 2   8.6     65   10.3
## 3   8.8     63   10.2
## 4  10.5     72   16.4
## 5  10.7     81   18.8
## 6  10.8     83   19.7

mean(trees$Volume)

## [1] 30.17097

median(trees$Volume)

## [1] 24.2

table(trees$Volume)

## 
## 10.2 10.3 15.6 16.4 18.2 18.8 19.1 19.7 19.9   21 21.3 21.4 22.2 22.6 24.2 
##    1    2    1    1    1    1    1    1    1    1    1    1    1    1    1 
## 24.9 25.7 27.4 31.7 33.8 34.5 36.3 38.3 42.6   51 51.5 55.4 55.7 58.3   77 
##    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1

max(table(trees$Volume))

## [1] 2

sort(table(trees$Volume))

## 
## 10.2 15.6 16.4 18.2 18.8 19.1 19.7 19.9   21 21.3 21.4 22.2 22.6 24.2 24.9 
##    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1 
## 25.7 27.4 31.7 33.8 34.5 36.3 38.3 42.6   51 51.5 55.4 55.7 58.3   77 10.3 
##    1    1    1    1    1    1    1    1    1    1    1    1    1    1    2

Mode <- function(x) {
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}
Mode(trees$Volume)

## [1] 10.3

ajayfunction=function(x){x^3+3*x}
ajayfunction(10)

## [1] 1030

length(trees$Volume)

## [1] 31

gm_mean = function(x, na.rm=TRUE){
  exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x))
}

gm_mean(trees$Volume)

## [1] 26.38331

x=trees$Volume
w=rep(1,length(x))

weighted.mean(x, w)

## [1] 30.17097

plot(density(trees$Volume))

range(trees$Volume)

## [1] 10.2 77.0

attach(trees)

sd(Volume)

## [1] 16.43785

x=c(1:4)
x

## [1] 1 2 3 4

gm_x=(prod(x)^1/4)
gm_x

## [1] 6

prod(Volume)

## [1] 1.151355e+44

prod(Volume)^(1/length(Volume))

## [1] 26.38331

quantile(Volume)

##   0%  25%  50%  75% 100% 
## 10.2 19.4 24.2 37.3 77.0

boxplot(Volume)

plot(density(Volume))

hist(Volume,breaks = 4)

quantile(Volume,seq(0,1,0.1))

##   0%  10%  20%  30%  40%  50%  60%  70%  80%  90% 100% 
## 10.2 15.6 18.8 19.9 21.4 24.2 27.4 34.5 42.6 55.4 77.0

hist(Volume,breaks = 10)

summary(Volume)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   10.20   19.40   24.20   30.17   37.30   77.00

#??kurtosis

#install.packages("e1071")
library(e1071)
skewness(Volume)

## [1] 1.013274

kurtosis(Volume)

## [1] 0.2460393

rnorm(100,10,5)

##   [1] 18.8424045  5.4155681  5.3193081 11.9121195 18.5425150  6.6568848
##   [7]  1.6142246  1.2084215 11.6041001 10.3628070 14.2758711  2.9703578
##  [13] -3.1526082  9.3511963 12.2022894 14.6378875 15.2238515  0.7362240
##  [19]  1.9607277 21.8211432  8.3135191  7.0488950  2.2637027 14.5175621
##  [25]  6.0758972 -0.8179548  7.8068657  5.5803139  6.0446307  9.6754397
##  [31] 11.4877983  9.9609437  9.8149807  9.1680792 10.7191439 20.8768796
##  [37] -2.9360097  5.6745919 16.4981872  5.4391386  7.6356364  9.6324293
##  [43]  8.6520694 18.5488492 12.1837757  8.2865357 13.6721397 21.7520445
##  [49]  3.9871210  1.5706777  7.7689042  7.1975114 14.4595124 11.9492685
##  [55]  6.3135113 13.0682876  4.9177680  8.1536626  5.3463528 13.3107549
##  [61] 11.3796600 15.2743601 13.1337463  1.0364682  8.2324989  6.0025915
##  [67]  5.0212346  8.7506350  9.0498944 15.9413606  8.4534871 10.6244326
##  [73] 13.1010427 17.6375854 13.0025060  4.6311517 11.1564321  9.4277241
##  [79] 11.7767305  6.8871153  9.6228834  0.6910083 16.3358258 19.7441994
##  [85] 17.1290247 14.7113160  9.8845957 13.7652695  7.7375719 13.9973694
##  [91] 18.4388432  9.9029991  7.7050429  8.9610156 18.0190494  2.8330654
##  [97]  8.3105953  2.0803874 16.5235927  8.5892795

plot(rnorm(100,10,5))

plot(sort(rnorm(100,10,5)))

plot(density(rnorm(1000)))


sample(c("Head","Tail"),20,T)

##  [1] "Head" "Head" "Head" "Head" "Tail" "Head" "Tail" "Tail" "Head" "Tail"
## [11] "Head" "Head" "Tail" "Head" "Head" "Tail" "Head" "Head" "Tail" "Tail"

rbinom(10,2,0.5)

##  [1] 0 1 2 1 0 2 2 2 1 1

table(rbinom(20,2,0.33))

## 
##  0  1  2 
##  5 11  4

#http://stats.idre.ucla.edu/stata/whatstat/what-statistical-analysis-should-i-usestatistical-analyses-using-stata/
#install.packages("candisc")
library(candisc)

## Loading required package: car

## Loading required package: heplots

## 
## Attaching package: 'candisc'

## The following object is masked from 'package:stats':
## 
##     cancor

#?HSB
data("HSB")
str(HSB)

## 'data.frame':    600 obs. of  15 variables:
##  $ id     : num  55 114 490 44 26 510 133 213 548 309 ...
##  $ gender : Factor w/ 2 levels "male","female": 2 1 1 2 2 1 2 2 2 2 ...
##  $ race   : Factor w/ 4 levels "hispanic","asian",..: 1 3 4 1 1 4 3 4 4 4 ...
##  $ ses    : Factor w/ 3 levels "low","middle",..: 1 2 2 1 2 2 1 1 2 3 ...
##  $ sch    : Factor w/ 2 levels "public","private": 1 1 1 1 1 1 1 1 2 1 ...
##  $ prog   : Factor w/ 3 levels "general","academic",..: 1 2 3 3 2 3 3 1 2 1 ...
##  $ locus  : num  -1.78 0.24 -1.28 0.22 1.12 ...
##  $ concept: num  0.56 -0.35 0.34 -0.76 -0.74 ...
##  $ mot    : num  1 1 0.33 1 0.67 ...
##  $ career : Factor w/ 17 levels "clerical","craftsman",..: 9 8 9 15 15 8 14 1 10 10 ...
##  $ read   : num  28.3 30.5 31 31 31 ...
##  $ write  : num  46.3 35.9 35.9 41.1 41.1 ...
##  $ math   : num  42.8 36.9 46.1 49.2 36 ...
##  $ sci    : num  44.4 33.6 39 33.6 36.9 ...
##  $ ss     : num  50.6 40.6 45.6 35.6 45.6 ...

summary(HSB)

##        id           gender              race         ses     
##  Min.   :  1.0   male  :273   hispanic    : 71   low   :139  
##  1st Qu.:150.8   female:327   asian       : 34   middle:299  
##  Median :300.5                african-amer: 58   high  :162  
##  Mean   :300.5                white       :437               
##  3rd Qu.:450.2                                               
##  Max.   :600.0                                               
##                                                              
##       sch            prog         locus             concept         
##  public :506   general :145   Min.   :-2.23000   Min.   :-2.620000  
##  private: 94   academic:308   1st Qu.:-0.37250   1st Qu.:-0.300000  
##                vocation:147   Median : 0.21000   Median : 0.030000  
##                               Mean   : 0.09653   Mean   : 0.004917  
##                               3rd Qu.: 0.51000   3rd Qu.: 0.440000  
##                               Max.   : 1.36000   Max.   : 1.190000  
##                                                                     
##       mot               career         read          write      
##  Min.   :0.0000   prof1    :161   Min.   :28.3   Min.   :25.50  
##  1st Qu.:0.3300   prof2    : 94   1st Qu.:44.2   1st Qu.:44.30  
##  Median :0.6700   clerical : 50   Median :52.1   Median :54.10  
##  Mean   :0.6608   craftsman: 39   Mean   :51.9   Mean   :52.38  
##  3rd Qu.:1.0000   technical: 36   3rd Qu.:60.1   3rd Qu.:59.90  
##  Max.   :1.0000   homemaker: 33   Max.   :76.0   Max.   :67.10  
##                   (Other)  :187                                 
##       math            sci              ss       
##  Min.   :31.80   Min.   :26.00   Min.   :25.70  
##  1st Qu.:44.50   1st Qu.:44.40   1st Qu.:45.60  
##  Median :51.30   Median :52.60   Median :50.60  
##  Mean   :51.85   Mean   :51.76   Mean   :52.05  
##  3rd Qu.:58.38   3rd Qu.:58.65   3rd Qu.:60.50  
##  Max.   :75.50   Max.   :74.20   Max.   :70.50  
##

t.test(HSB$write,mu=50)# Ho: mu=50

## 
##  One Sample t-test
## 
## data:  HSB$write
## t = 6.0059, df = 599, p-value = 3.301e-09
## alternative hypothesis: true mean is not equal to 50
## 95 percent confidence interval:
##  51.60499 53.16467
## sample estimates:
## mean of x 
##  52.38483

wilcox.test(HSB$write, mu=50)

## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  HSB$write
## V = 118590, p-value = 2.121e-11
## alternative hypothesis: true location is not equal to 50

#binom.test(HSB$gender,mu=0.5)
#library(rattle)
attach(HSB)

t.test(write,read)

## 
##  Welch Two Sample t-test
## 
## data:  write and read
## t = 0.84362, df = 1196.3, p-value = 0.399
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.6402743  1.6062741
## sample estimates:
## mean of x mean of y 
##  52.38483  51.90183

HSBf=HSB[gender=="female",]
HSBm=HSB[gender=="male",]
HSB[2,3]

## [1] african-amer
## Levels: hispanic asian african-amer white

HSB[2,]

##    id gender         race    ses    sch     prog locus concept mot
## 2 114   male african-amer middle public academic  0.24   -0.35   1
##      career read write math  sci   ss
## 2 operative 30.5  35.9 36.9 33.6 40.6

head(HSB)

##    id gender         race    ses    sch     prog locus concept  mot
## 1  55 female     hispanic    low public  general -1.78    0.56 1.00
## 2 114   male african-amer middle public academic  0.24   -0.35 1.00
## 3 490   male        white middle public vocation -1.28    0.34 0.33
## 4  44 female     hispanic    low public vocation  0.22   -0.76 1.00
## 5  26 female     hispanic middle public academic  1.12   -0.74 0.67
## 6 510   male        white middle public vocation -0.86    1.19 0.33
##      career read write math  sci   ss
## 1     prof1 28.3  46.3 42.8 44.4 50.6
## 2 operative 30.5  35.9 36.9 33.6 40.6
## 3     prof1 31.0  35.9 46.1 39.0 45.6
## 4   service 31.0  41.1 49.2 33.6 35.6
## 5   service 31.0  41.1 36.0 36.9 45.6
## 6 operative 33.6  28.1 31.8 39.6 35.6

t.test(HSBf$write,HSBm$write)

## 
##  Welch Two Sample t-test
## 
## data:  HSBf$write and HSBm$write
## t = 6.0807, df = 540.78, p-value = 2.264e-09
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  3.227937 6.308770
## sample estimates:
## mean of x mean of y 
##  54.55443  49.78608

t.test(HSBf$read,HSBm$read)

## 
##  Welch Two Sample t-test
## 
## data:  HSBf$read and HSBm$read
## t = -1.019, df = 572.67, p-value = 0.3087
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.477264  0.784896
## sample estimates:
## mean of x mean of y 
##  51.51682  52.36300

#null hypothesis umean >=10000
xbar=9900
umean=10000
sd=120
n=30
z=(xbar-umean)/(sd/sqrt(n))
z

## [1] -4.564355

alpha=0.05
z.alpha=qnorm(1-alpha)
-z.alpha

## [1] -1.644854

stats_class_2.R

Dell

Sat May 20 23:23:52 2017