Repaso Final

#elaborar Histogramas y Distribuciones de frecuenias, asi como su interpretacin.
#Utilice la variable "conc"
head(CO2)

##   Plant   Type  Treatment conc uptake
## 1   Qn1 Quebec nonchilled   95   16.0
## 2   Qn1 Quebec nonchilled  175   30.4
## 3   Qn1 Quebec nonchilled  250   34.8
## 4   Qn1 Quebec nonchilled  350   37.2
## 5   Qn1 Quebec nonchilled  500   35.3
## 6   Qn1 Quebec nonchilled  675   39.2

#Utilice los siguientes datos
set.seed(2015)
dfr<-rnorm(100, 25, 3.7)
#Elabore una distribucion de frecuencias
library(fdth)

## 
## Attaching package: 'fdth'
## 
## The following objects are masked from 'package:stats':
## 
##     sd, var

dist <- fdt(dfr,breaks="Sturges")
dist

##   Class limits  f   rf rf(%)  cf cf(%)
##  [16.13,18.46)  4 0.04     4   4     4
##  [18.46,20.79) 10 0.10    10  14    14
##  [20.79,23.11) 19 0.19    19  33    33
##  [23.11,25.44) 22 0.22    22  55    55
##  [25.44,27.77) 25 0.25    25  80    80
##  [27.77,30.09) 11 0.11    11  91    91
##  [30.09,32.42)  7 0.07     7  98    98
##  [32.42,34.75)  2 0.02     2 100   100

hist(dfr)

#revisar el help de fdth (para ver las dif opciones graficas)

#Que tipo de figura se trata la siguiente?
plot(dist, type="cfp")

#Hacer estadisticas descriptivas
library(PASWR)

## Warning: package 'PASWR' was built under R version 3.2.2

## Loading required package: e1071
## Loading required package: MASS
## Loading required package: lattice

EDA(dfr)

## [1] "dfr"

## Size (n)  Missing  Minimum   1st Qu     Mean   Median   TrMean   3rd Qu 
##  100.000    0.000   16.296   21.993   24.812   24.918   24.783   27.394 
##     Max.   Stdev.     Var.  SE Mean   I.Q.R.    Range Kurtosis Skewness 
##   34.404    3.735   13.952    0.374    5.401   18.108   -0.529    0.085 
## SW p-val 
##    0.791

#Revise cada una de ellas e interprete.  Explore otras opciones de encontrar las mediciones de forma indpendiente, ejm.  Moda, mediana, sd, etc.

#Probabilidades
#demostrar los diferentes usos y respuestas de cada uno de los comandos (masa, acumulado), e igual para poisson.

#Anote para cada uno de ellas, el tipo de pregunta que se esta respondiendo, de acuerdo a como fue escrito el codigo.
dbinom(3,10,0.25)

## [1] 0.2502823

pbinom(3,10,0.25)

## [1] 0.7758751

1-pbinom(3,10,0.25)

## [1] 0.2241249

pbinom(6,10,0.25) - pbinom(3,10,0.25)

## [1] 0.2206192

plot(dbinom(0:23, size = 23, prob = 0.354))

y<-dbinom(0:10,10,0.75)
cbind(x=0:10,datos=y)

##        x        datos
##  [1,]  0 9.536743e-07
##  [2,]  1 2.861023e-05
##  [3,]  2 3.862381e-04
##  [4,]  3 3.089905e-03
##  [5,]  4 1.622200e-02
##  [6,]  5 5.839920e-02
##  [7,]  6 1.459980e-01
##  [8,]  7 2.502823e-01
##  [9,]  8 2.815676e-01
## [10,]  9 1.877117e-01
## [11,] 10 5.631351e-02

plot(y, type="l")

#Determinacion de areas bajo la curva normal
#Dibuje en su cuaderno el area bajo la curva, dado las siguientes funciones
1-pnorm(2.65)

## [1] 0.004024589

pnorm(1.35)-pnorm(0)

## [1] 0.411492

pnorm(1)

## [1] 0.8413447

library(TeachingDemos)

## Warning: package 'TeachingDemos' was built under R version 3.2.2

## 
## Attaching package: 'TeachingDemos'
## 
## The following object is masked from 'package:PASWR':
## 
##     z.test

z.test(x=60,mu=63,sd=4.4)

## 
##  One Sample z-test
## 
## data:  60
## z = -0.68182, n = 1.0, Std. Dev. = 4.4, Std. Dev. of the sample
## mean = 4.4, p-value = 0.4954
## alternative hypothesis: true mean is not equal to 63
## 95 percent confidence interval:
##  51.37616 68.62384
## sample estimates:
## mean of 60 
##         60

z.test(70,63,4.4,alternative="greater")  #utilzar las diferentes colas (una y dos)

## 
##  One Sample z-test
## 
## data:  70
## z = 1.5909, n = 1.0, Std. Dev. = 4.4, Std. Dev. of the sample mean
## = 4.4, p-value = 0.05582
## alternative hypothesis: true mean is greater than 63
## 95 percent confidence interval:
##  62.76264      Inf
## sample estimates:
## mean of 70 
##         70

#Test hipotesis una poblacion y supuestos de normalidad
t.test(dfr, mu=26)

## 
##  One Sample t-test
## 
## data:  dfr
## t = -3.1817, df = 99, p-value = 0.001956
## alternative hypothesis: true mean is not equal to 26
## 95 percent confidence interval:
##  24.07041 25.55271
## sample estimates:
## mean of x 
##  24.81156

shapiro.test(dfr)

## 
##  Shapiro-Wilk normality test
## 
## data:  dfr
## W = 0.99159, p-value = 0.7908

wilcox.test(dfr)

## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  dfr
## V = 5050, p-value < 2.2e-16
## alternative hypothesis: true location is not equal to 0

par(mfrow=c(2,2))
boxplot(dfr); hist(dfr)

#Generamos un segundo grupo de muestras
rnorm(100, 27, 7)-> dfr2
#Elabore un cuadro de estadisticas de resumenes. Incluya
#Promedio, sd, max, min, mediana, cuantiles, IQR, normalidad, Kolmogorov-test.

#Dos poblaciones
#test pareado
t.test(dfr, dfr2, paired = T)

## 
##  Paired t-test
## 
## data:  dfr and dfr2
## t = -2.1899, df = 99, p-value = 0.03088
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -3.3026835 -0.1627351
## sample estimates:
## mean of the differences 
##               -1.732709

#Muestras independientes
t.test(dfr, dfr2, paired = F)

## 
##  Welch Two Sample t-test
## 
## data:  dfr and dfr2
## t = -2.291, df = 156.85, p-value = 0.02329
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -3.2265771 -0.2388416
## sample estimates:
## mean of x mean of y 
##  24.81156  26.54427

#Homogeneidad de Var
var.test(dfr, dfr2)

## 
##  F test to compare two variances
## 
## data:  dfr and dfr2
## F = 0.32259, num df = 99, denom df = 99, p-value = 4.38e-08
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.2170549 0.4794506
## sample estimates:
## ratio of variances 
##          0.3225944

#Suponiendo var desiguales
t.test(dfr, dfr2, var.equal=F)

## 
##  Welch Two Sample t-test
## 
## data:  dfr and dfr2
## t = -2.291, df = 156.85, p-value = 0.02329
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -3.2265771 -0.2388416
## sample estimates:
## mean of x mean of y 
##  24.81156  26.54427

#Suponiendo var iguales
t.test(dfr, dfr2, var.equal=T)

## 
##  Two Sample t-test
## 
## data:  dfr and dfr2
## t = -2.291, df = 198, p-value = 0.02302
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -3.2241674 -0.2412512
## sample estimates:
## mean of x mean of y 
##  24.81156  26.54427

#revisar el help de t.test
boxplot(dfr, dfr2)

#Kolmogorov
ks.test(dfr, dfr2)

## 
##  Two-sample Kolmogorov-Smirnov test
## 
## data:  dfr and dfr2
## D = 0.25, p-value = 0.003861
## alternative hypothesis: two-sided

#Chi-cuadrado
frec<-c(15,19, 22)
chisq.test(frec)

## 
##  Chi-squared test for given probabilities
## 
## data:  frec
## X-squared = 1.3214, df = 2, p-value = 0.5165

qchisq(0.95,2)

## [1] 5.991465

chisq.test(frec)$expected

## [1] 18.66667 18.66667 18.66667

habitat1<-c(3,6,8)
habitat2<-c(3,12,5)
habt<-data.frame(habitat1,habitat2)
habt

##   habitat1 habitat2
## 1        3        3
## 2        6       12
## 3        8        5

rownames(habt)<-c("machos","hembras", "no_sexados")
habt

##            habitat1 habitat2
## machos            3        3
## hembras           6       12
## no_sexados        8        5

chisq.test(habt)

## Warning in chisq.test(habt): Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  habt
## X-squared = 2.4653, df = 2, p-value = 0.2915

fisher.test(habt,simulate.p.value=TRUE)

## 
##  Fisher's Exact Test for Count Data with simulated p-value (based
##  on 2000 replicates)
## 
## data:  habt
## p-value = 0.2724
## alternative hypothesis: two.sided

mosaicplot(habt, color=TRUE, main="Plot de mosaico")

prop.table(habt)

##              habitat1   habitat2
## machos     0.08108108 0.08108108
## hembras    0.16216216 0.32432432
## no_sexados 0.21621622 0.13513514

chisq.test(c(28,49,27), p=c(1/4,2/4,1/4))

## 
##  Chi-squared test for given probabilities
## 
## data:  c(28, 49, 27)
## X-squared = 0.36538, df = 2, p-value = 0.833

pro<-chisq.test(c(28,49,27), p=c(1/4,2/4,1/4)); pro

## 
##  Chi-squared test for given probabilities
## 
## data:  c(28, 49, 27)
## X-squared = 0.36538, df = 2, p-value = 0.833

pro$expected

## [1] 26 52 26

#Correlacion
head(Orange)

##   Tree  age circumference
## 1    1  118            30
## 2    1  484            58
## 3    1  664            87
## 4    1 1004           115
## 5    1 1231           120
## 6    1 1372           142

cor.test(Orange$age, Orange$circumference, alternative="two.sided", method="pearson")

## 
##  Pearson's product-moment correlation
## 
## data:  Orange$age and Orange$circumference
## t = 12.9, df = 33, p-value = 1.932e-14
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8342364 0.9557955
## sample estimates:
##       cor 
## 0.9135189

cor.test(Orange$age, Orange$circumference, alternative="two.sided", method="spearman")

## Warning in cor.test.default(Orange$age, Orange$circumference, alternative
## = "two.sided", : Cannot compute exact p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  Orange$age and Orange$circumference
## S = 668.09, p-value = 6.712e-14
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.9064294

cor(Orange[,c("age","circumference")], use="complete.obs")

##                     age circumference
## age           1.0000000     0.9135189
## circumference 0.9135189     1.0000000

plot(Orange$age, Orange$circumference)

RegModel.1 <- lm(circumference~age, data=Orange)
summary(RegModel.1)

## 
## Call:
## lm(formula = circumference ~ age, data = Orange)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -46.310 -14.946  -0.076  19.697  45.111 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 17.399650   8.622660   2.018   0.0518 .  
## age          0.106770   0.008277  12.900 1.93e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 23.74 on 33 degrees of freedom
## Multiple R-squared:  0.8345, Adjusted R-squared:  0.8295 
## F-statistic: 166.4 on 1 and 33 DF,  p-value: 1.931e-14

library(car)

## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:PASWR':
## 
##     Wool

scatterplot(circumference~age, reg.line=lm, smooth=TRUE, spread=TRUE, boxplots='xy', span=0.5, data=Orange)
#Forma grafica
scatterplot(circumference~age, reg.line=lm, data=Orange)

par(mfrow=c(2,2))
#explicar como se interpreta cada uno de ellos.
plot(RegModel.1)

head(chickwts)

##   weight      feed
## 1    179 horsebean
## 2    160 horsebean
## 3    136 horsebean
## 4    227 horsebean
## 5    217 horsebean
## 6    168 horsebean

AnovaModel.3 <- aov(weight ~ feed, data=chickwts)
summary(AnovaModel.3)

##             Df Sum Sq Mean Sq F value   Pr(>F)    
## feed         5 231129   46226   15.37 5.94e-10 ***
## Residuals   65 195556    3009                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

tapply(chickwts$weight,chickwts$feed, mean) #Permite ver la media de los factores (Tratamientos)

##    casein horsebean   linseed  meatmeal   soybean sunflower 
##  323.5833  160.2000  218.7500  276.9091  246.4286  328.9167

#Obtenga otras estadisticas descriptivas
TukeyHSD(AnovaModel.3)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = weight ~ feed, data = chickwts)
## 
## $feed
##                            diff         lwr       upr     p adj
## horsebean-casein    -163.383333 -232.346876 -94.41979 0.0000000
## linseed-casein      -104.833333 -170.587491 -39.07918 0.0002100
## meatmeal-casein      -46.674242 -113.906207  20.55772 0.3324584
## soybean-casein       -77.154762 -140.517054 -13.79247 0.0083653
## sunflower-casein       5.333333  -60.420825  71.08749 0.9998902
## linseed-horsebean     58.550000  -10.413543 127.51354 0.1413329
## meatmeal-horsebean   116.709091   46.335105 187.08308 0.0001062
## soybean-horsebean     86.228571   19.541684 152.91546 0.0042167
## sunflower-horsebean  168.716667   99.753124 237.68021 0.0000000
## meatmeal-linseed      58.159091   -9.072873 125.39106 0.1276965
## soybean-linseed       27.678571  -35.683721  91.04086 0.7932853
## sunflower-linseed    110.166667   44.412509 175.92082 0.0000884
## soybean-meatmeal     -30.480519  -95.375109  34.41407 0.7391356
## sunflower-meatmeal    52.007576  -15.224388 119.23954 0.2206962
## sunflower-soybean     82.488095   19.125803 145.85039 0.0038845

plot(TukeyHSD(AnovaModel.3))

library(MASS)
head(Aids2)

##   state sex  diag death status T.categ age
## 1   NSW   M 10905 11081      D      hs  35
## 2   NSW   M 11029 11096      D      hs  53
## 3   NSW   M  9551  9983      D      hs  42
## 4   NSW   M  9577  9654      D    haem  44
## 5   NSW   M 10015 10290      D      hs  39
## 6   NSW   M  9971 10344      D      hs  36

AnovaModel.4 <- (lm(death ~ state*sex*T.categ, data=Aids2))
summary(AnovaModel.4)

## 
## Call:
## lm(formula = death ~ state * sex * T.categ, data = Aids2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2498.1  -310.9   191.9   470.5  1625.7 
## 
## Coefficients: (15 not defined because of singularities)
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   10985.6945   727.7050  15.096  < 2e-16 ***
## stateOther                    -1688.5528  1149.1216  -1.469 0.141829    
## stateQLD                        518.3055   390.9883   1.326 0.185071    
## stateVIC                       -425.7543   549.2831  -0.775 0.438341    
## sexM                            -18.5633   727.5368  -0.026 0.979646    
## T.categhsid                     -55.6713    88.1953  -0.631 0.527944    
## T.categid                       351.7670   747.3487   0.471 0.637901    
## T.categhet                      143.8055   769.6330   0.187 0.851792    
## T.categhaem                     -14.9979   113.1408  -0.133 0.894551    
## T.categblood                   -641.6945   718.7249  -0.893 0.372029    
## T.categmother                   252.3055   951.9649   0.265 0.791001    
## T.categother                    515.5055   777.7474   0.663 0.507501    
## stateOther:sexM                1791.0000  1148.2113   1.560 0.118916    
## stateQLD:sexM                  -451.9773   388.0605  -1.165 0.244237    
## stateVIC:sexM                   509.1865   548.4199   0.928 0.353249    
## stateOther:T.categhsid         -371.6572   322.1732  -1.154 0.248765    
## stateQLD:T.categhsid            342.2118   252.2426   1.357 0.174993    
## stateVIC:T.categhsid             61.8351   206.7004   0.299 0.764845    
## stateOther:T.categid           1781.0913  1193.6468   1.492 0.135775    
## stateQLD:T.categid             -662.7670   747.3487  -0.887 0.375250    
## stateVIC:T.categid              592.2928   841.0530   0.704 0.481350    
## stateOther:T.categhet          1963.2528  1207.7241   1.626 0.104152    
## stateQLD:T.categhet            -504.8055   635.6049  -0.794 0.427139    
## stateVIC:T.categhet             444.6829   646.7644   0.688 0.491793    
## stateOther:T.categhaem          109.5862   278.2585   0.394 0.693738    
## stateQLD:T.categhaem              0.7885   330.1630   0.002 0.998095    
## stateVIC:T.categhaem           -111.2322   276.1957  -0.403 0.687178    
## stateOther:T.categblood        1566.5528  1233.6170   1.270 0.204231    
## stateQLD:T.categblood          -796.0555   213.8040  -3.723 0.000201 ***
## stateVIC:T.categblood           834.2543   709.2547   1.176 0.239599    
## stateOther:T.categmother       -102.4472   753.0705  -0.136 0.891800    
## stateQLD:T.categmother         -252.3055   951.9649  -0.265 0.791001    
## stateVIC:T.categmother          514.7543  1027.1695   0.501 0.616313    
## stateOther:T.categother        -187.5857   243.6333  -0.770 0.441394    
## stateQLD:T.categother           217.6583   326.5458   0.667 0.505116    
## stateVIC:T.categother           131.0543   195.0232   0.672 0.501643    
## sexM:T.categhsid                      NA         NA      NA       NA    
## sexM:T.categid                  -64.2983   763.8046  -0.084 0.932918    
## sexM:T.categhet                 285.2299   789.6078   0.361 0.717955    
## sexM:T.categhaem                      NA         NA      NA       NA    
## sexM:T.categblood               282.5633   712.1329   0.397 0.691557    
## sexM:T.categmother              284.5633  1046.1041   0.272 0.785625    
## sexM:T.categother              -495.1232   784.1090  -0.631 0.527800    
## stateOther:sexM:T.categhsid           NA         NA      NA       NA    
## stateQLD:sexM:T.categhsid             NA         NA      NA       NA    
## stateVIC:sexM:T.categhsid             NA         NA      NA       NA    
## stateOther:sexM:T.categid     -1998.4242  1225.4083  -1.631 0.103040    
## stateQLD:sexM:T.categid         789.1721   840.7867   0.939 0.348011    
## stateVIC:sexM:T.categid        -561.3249   925.7946  -0.606 0.544354    
## stateOther:sexM:T.categhet    -1957.8667  1270.2194  -1.541 0.123342    
## stateQLD:sexM:T.categhet         -3.0227   747.4382  -0.004 0.996774    
## stateVIC:sexM:T.categhet       -647.2817   757.8309  -0.854 0.393109    
## stateOther:sexM:T.categhaem           NA         NA      NA       NA    
## stateQLD:sexM:T.categhaem             NA         NA      NA       NA    
## stateVIC:sexM:T.categhaem             NA         NA      NA       NA    
## stateOther:sexM:T.categblood  -1010.0000  1286.2612  -0.785 0.432390    
## stateQLD:sexM:T.categblood            NA         NA      NA       NA    
## stateVIC:sexM:T.categblood    -1045.1865   836.4350  -1.250 0.211560    
## stateOther:sexM:T.categmother         NA         NA      NA       NA    
## stateQLD:sexM:T.categmother           NA         NA      NA       NA    
## stateVIC:sexM:T.categmother           NA         NA      NA       NA    
## stateOther:sexM:T.categother          NA         NA      NA       NA    
## stateQLD:sexM:T.categother            NA         NA      NA       NA    
## stateVIC:sexM:T.categother            NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 613.7 on 2794 degrees of freedom
## Multiple R-squared:  0.0531, Adjusted R-squared:  0.03683 
## F-statistic: 3.264 on 48 and 2794 DF,  p-value: 5.422e-13

library(Rcmdr)

## Loading required package: splines
## Loading required package: RcmdrMisc
## Loading required package: sandwich
## La interfaz R-Commander sólo funciona en sesiones interactivas

plotMeans(Aids2$death, Aids2$state, error.bars="se") #simple

plotMeans(Aids2$death, Aids2$state, Aids2$sex, error.bars="se") #multiple

Repaso Final

Oscar Ramirez Alan

3 de noviembre de 2015