#elaborar Histogramas y Distribuciones de frecuenias, asi como su interpretacin.
#Utilice la variable "conc"
head(CO2)
## Plant Type Treatment conc uptake
## 1 Qn1 Quebec nonchilled 95 16.0
## 2 Qn1 Quebec nonchilled 175 30.4
## 3 Qn1 Quebec nonchilled 250 34.8
## 4 Qn1 Quebec nonchilled 350 37.2
## 5 Qn1 Quebec nonchilled 500 35.3
## 6 Qn1 Quebec nonchilled 675 39.2
#Utilice los siguientes datos
set.seed(2015)
dfr<-rnorm(100, 25, 3.7)
#Elabore una distribucion de frecuencias
library(fdth)
##
## Attaching package: 'fdth'
##
## The following objects are masked from 'package:stats':
##
## sd, var
dist <- fdt(dfr,breaks="Sturges")
dist
## Class limits f rf rf(%) cf cf(%)
## [16.13,18.46) 4 0.04 4 4 4
## [18.46,20.79) 10 0.10 10 14 14
## [20.79,23.11) 19 0.19 19 33 33
## [23.11,25.44) 22 0.22 22 55 55
## [25.44,27.77) 25 0.25 25 80 80
## [27.77,30.09) 11 0.11 11 91 91
## [30.09,32.42) 7 0.07 7 98 98
## [32.42,34.75) 2 0.02 2 100 100
hist(dfr)

#revisar el help de fdth (para ver las dif opciones graficas)
#Que tipo de figura se trata la siguiente?
plot(dist, type="cfp")

#Hacer estadisticas descriptivas
library(PASWR)
## Warning: package 'PASWR' was built under R version 3.2.2
## Loading required package: e1071
## Loading required package: MASS
## Loading required package: lattice
EDA(dfr)
## [1] "dfr"

## Size (n) Missing Minimum 1st Qu Mean Median TrMean 3rd Qu
## 100.000 0.000 16.296 21.993 24.812 24.918 24.783 27.394
## Max. Stdev. Var. SE Mean I.Q.R. Range Kurtosis Skewness
## 34.404 3.735 13.952 0.374 5.401 18.108 -0.529 0.085
## SW p-val
## 0.791
#Revise cada una de ellas e interprete. Explore otras opciones de encontrar las mediciones de forma indpendiente, ejm. Moda, mediana, sd, etc.
#Probabilidades
#demostrar los diferentes usos y respuestas de cada uno de los comandos (masa, acumulado), e igual para poisson.
#Anote para cada uno de ellas, el tipo de pregunta que se esta respondiendo, de acuerdo a como fue escrito el codigo.
dbinom(3,10,0.25)
## [1] 0.2502823
pbinom(3,10,0.25)
## [1] 0.7758751
1-pbinom(3,10,0.25)
## [1] 0.2241249
pbinom(6,10,0.25) - pbinom(3,10,0.25)
## [1] 0.2206192
plot(dbinom(0:23, size = 23, prob = 0.354))

y<-dbinom(0:10,10,0.75)
cbind(x=0:10,datos=y)
## x datos
## [1,] 0 9.536743e-07
## [2,] 1 2.861023e-05
## [3,] 2 3.862381e-04
## [4,] 3 3.089905e-03
## [5,] 4 1.622200e-02
## [6,] 5 5.839920e-02
## [7,] 6 1.459980e-01
## [8,] 7 2.502823e-01
## [9,] 8 2.815676e-01
## [10,] 9 1.877117e-01
## [11,] 10 5.631351e-02
plot(y, type="l")

#Determinacion de areas bajo la curva normal
#Dibuje en su cuaderno el area bajo la curva, dado las siguientes funciones
1-pnorm(2.65)
## [1] 0.004024589
pnorm(1.35)-pnorm(0)
## [1] 0.411492
pnorm(1)
## [1] 0.8413447
library(TeachingDemos)
## Warning: package 'TeachingDemos' was built under R version 3.2.2
##
## Attaching package: 'TeachingDemos'
##
## The following object is masked from 'package:PASWR':
##
## z.test
z.test(x=60,mu=63,sd=4.4)
##
## One Sample z-test
##
## data: 60
## z = -0.68182, n = 1.0, Std. Dev. = 4.4, Std. Dev. of the sample
## mean = 4.4, p-value = 0.4954
## alternative hypothesis: true mean is not equal to 63
## 95 percent confidence interval:
## 51.37616 68.62384
## sample estimates:
## mean of 60
## 60
z.test(70,63,4.4,alternative="greater") #utilzar las diferentes colas (una y dos)
##
## One Sample z-test
##
## data: 70
## z = 1.5909, n = 1.0, Std. Dev. = 4.4, Std. Dev. of the sample mean
## = 4.4, p-value = 0.05582
## alternative hypothesis: true mean is greater than 63
## 95 percent confidence interval:
## 62.76264 Inf
## sample estimates:
## mean of 70
## 70
#Test hipotesis una poblacion y supuestos de normalidad
t.test(dfr, mu=26)
##
## One Sample t-test
##
## data: dfr
## t = -3.1817, df = 99, p-value = 0.001956
## alternative hypothesis: true mean is not equal to 26
## 95 percent confidence interval:
## 24.07041 25.55271
## sample estimates:
## mean of x
## 24.81156
shapiro.test(dfr)
##
## Shapiro-Wilk normality test
##
## data: dfr
## W = 0.99159, p-value = 0.7908
wilcox.test(dfr)
##
## Wilcoxon signed rank test with continuity correction
##
## data: dfr
## V = 5050, p-value < 2.2e-16
## alternative hypothesis: true location is not equal to 0
par(mfrow=c(2,2))
boxplot(dfr); hist(dfr)

#Generamos un segundo grupo de muestras
rnorm(100, 27, 7)-> dfr2
#Elabore un cuadro de estadisticas de resumenes. Incluya
#Promedio, sd, max, min, mediana, cuantiles, IQR, normalidad, Kolmogorov-test.
#Dos poblaciones
#test pareado
t.test(dfr, dfr2, paired = T)
##
## Paired t-test
##
## data: dfr and dfr2
## t = -2.1899, df = 99, p-value = 0.03088
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -3.3026835 -0.1627351
## sample estimates:
## mean of the differences
## -1.732709
#Muestras independientes
t.test(dfr, dfr2, paired = F)
##
## Welch Two Sample t-test
##
## data: dfr and dfr2
## t = -2.291, df = 156.85, p-value = 0.02329
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -3.2265771 -0.2388416
## sample estimates:
## mean of x mean of y
## 24.81156 26.54427
#Homogeneidad de Var
var.test(dfr, dfr2)
##
## F test to compare two variances
##
## data: dfr and dfr2
## F = 0.32259, num df = 99, denom df = 99, p-value = 4.38e-08
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.2170549 0.4794506
## sample estimates:
## ratio of variances
## 0.3225944
#Suponiendo var desiguales
t.test(dfr, dfr2, var.equal=F)
##
## Welch Two Sample t-test
##
## data: dfr and dfr2
## t = -2.291, df = 156.85, p-value = 0.02329
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -3.2265771 -0.2388416
## sample estimates:
## mean of x mean of y
## 24.81156 26.54427
#Suponiendo var iguales
t.test(dfr, dfr2, var.equal=T)
##
## Two Sample t-test
##
## data: dfr and dfr2
## t = -2.291, df = 198, p-value = 0.02302
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -3.2241674 -0.2412512
## sample estimates:
## mean of x mean of y
## 24.81156 26.54427
#revisar el help de t.test
boxplot(dfr, dfr2)

#Kolmogorov
ks.test(dfr, dfr2)
##
## Two-sample Kolmogorov-Smirnov test
##
## data: dfr and dfr2
## D = 0.25, p-value = 0.003861
## alternative hypothesis: two-sided
#Chi-cuadrado
frec<-c(15,19, 22)
chisq.test(frec)
##
## Chi-squared test for given probabilities
##
## data: frec
## X-squared = 1.3214, df = 2, p-value = 0.5165
qchisq(0.95,2)
## [1] 5.991465
chisq.test(frec)$expected
## [1] 18.66667 18.66667 18.66667
habitat1<-c(3,6,8)
habitat2<-c(3,12,5)
habt<-data.frame(habitat1,habitat2)
habt
## habitat1 habitat2
## 1 3 3
## 2 6 12
## 3 8 5
rownames(habt)<-c("machos","hembras", "no_sexados")
habt
## habitat1 habitat2
## machos 3 3
## hembras 6 12
## no_sexados 8 5
chisq.test(habt)
## Warning in chisq.test(habt): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: habt
## X-squared = 2.4653, df = 2, p-value = 0.2915
fisher.test(habt,simulate.p.value=TRUE)
##
## Fisher's Exact Test for Count Data with simulated p-value (based
## on 2000 replicates)
##
## data: habt
## p-value = 0.2724
## alternative hypothesis: two.sided
mosaicplot(habt, color=TRUE, main="Plot de mosaico")

prop.table(habt)
## habitat1 habitat2
## machos 0.08108108 0.08108108
## hembras 0.16216216 0.32432432
## no_sexados 0.21621622 0.13513514
chisq.test(c(28,49,27), p=c(1/4,2/4,1/4))
##
## Chi-squared test for given probabilities
##
## data: c(28, 49, 27)
## X-squared = 0.36538, df = 2, p-value = 0.833
pro<-chisq.test(c(28,49,27), p=c(1/4,2/4,1/4)); pro
##
## Chi-squared test for given probabilities
##
## data: c(28, 49, 27)
## X-squared = 0.36538, df = 2, p-value = 0.833
pro$expected
## [1] 26 52 26
#Correlacion
head(Orange)
## Tree age circumference
## 1 1 118 30
## 2 1 484 58
## 3 1 664 87
## 4 1 1004 115
## 5 1 1231 120
## 6 1 1372 142
cor.test(Orange$age, Orange$circumference, alternative="two.sided", method="pearson")
##
## Pearson's product-moment correlation
##
## data: Orange$age and Orange$circumference
## t = 12.9, df = 33, p-value = 1.932e-14
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8342364 0.9557955
## sample estimates:
## cor
## 0.9135189
cor.test(Orange$age, Orange$circumference, alternative="two.sided", method="spearman")
## Warning in cor.test.default(Orange$age, Orange$circumference, alternative
## = "two.sided", : Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: Orange$age and Orange$circumference
## S = 668.09, p-value = 6.712e-14
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.9064294
cor(Orange[,c("age","circumference")], use="complete.obs")
## age circumference
## age 1.0000000 0.9135189
## circumference 0.9135189 1.0000000
plot(Orange$age, Orange$circumference)

RegModel.1 <- lm(circumference~age, data=Orange)
summary(RegModel.1)
##
## Call:
## lm(formula = circumference ~ age, data = Orange)
##
## Residuals:
## Min 1Q Median 3Q Max
## -46.310 -14.946 -0.076 19.697 45.111
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.399650 8.622660 2.018 0.0518 .
## age 0.106770 0.008277 12.900 1.93e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 23.74 on 33 degrees of freedom
## Multiple R-squared: 0.8345, Adjusted R-squared: 0.8295
## F-statistic: 166.4 on 1 and 33 DF, p-value: 1.931e-14
library(car)
##
## Attaching package: 'car'
##
## The following object is masked from 'package:PASWR':
##
## Wool
scatterplot(circumference~age, reg.line=lm, smooth=TRUE, spread=TRUE, boxplots='xy', span=0.5, data=Orange)
#Forma grafica
scatterplot(circumference~age, reg.line=lm, data=Orange)

par(mfrow=c(2,2))
#explicar como se interpreta cada uno de ellos.
plot(RegModel.1)

head(chickwts)
## weight feed
## 1 179 horsebean
## 2 160 horsebean
## 3 136 horsebean
## 4 227 horsebean
## 5 217 horsebean
## 6 168 horsebean
AnovaModel.3 <- aov(weight ~ feed, data=chickwts)
summary(AnovaModel.3)
## Df Sum Sq Mean Sq F value Pr(>F)
## feed 5 231129 46226 15.37 5.94e-10 ***
## Residuals 65 195556 3009
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tapply(chickwts$weight,chickwts$feed, mean) #Permite ver la media de los factores (Tratamientos)
## casein horsebean linseed meatmeal soybean sunflower
## 323.5833 160.2000 218.7500 276.9091 246.4286 328.9167
#Obtenga otras estadisticas descriptivas
TukeyHSD(AnovaModel.3)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = weight ~ feed, data = chickwts)
##
## $feed
## diff lwr upr p adj
## horsebean-casein -163.383333 -232.346876 -94.41979 0.0000000
## linseed-casein -104.833333 -170.587491 -39.07918 0.0002100
## meatmeal-casein -46.674242 -113.906207 20.55772 0.3324584
## soybean-casein -77.154762 -140.517054 -13.79247 0.0083653
## sunflower-casein 5.333333 -60.420825 71.08749 0.9998902
## linseed-horsebean 58.550000 -10.413543 127.51354 0.1413329
## meatmeal-horsebean 116.709091 46.335105 187.08308 0.0001062
## soybean-horsebean 86.228571 19.541684 152.91546 0.0042167
## sunflower-horsebean 168.716667 99.753124 237.68021 0.0000000
## meatmeal-linseed 58.159091 -9.072873 125.39106 0.1276965
## soybean-linseed 27.678571 -35.683721 91.04086 0.7932853
## sunflower-linseed 110.166667 44.412509 175.92082 0.0000884
## soybean-meatmeal -30.480519 -95.375109 34.41407 0.7391356
## sunflower-meatmeal 52.007576 -15.224388 119.23954 0.2206962
## sunflower-soybean 82.488095 19.125803 145.85039 0.0038845
plot(TukeyHSD(AnovaModel.3))

library(MASS)
head(Aids2)
## state sex diag death status T.categ age
## 1 NSW M 10905 11081 D hs 35
## 2 NSW M 11029 11096 D hs 53
## 3 NSW M 9551 9983 D hs 42
## 4 NSW M 9577 9654 D haem 44
## 5 NSW M 10015 10290 D hs 39
## 6 NSW M 9971 10344 D hs 36
AnovaModel.4 <- (lm(death ~ state*sex*T.categ, data=Aids2))
summary(AnovaModel.4)
##
## Call:
## lm(formula = death ~ state * sex * T.categ, data = Aids2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2498.1 -310.9 191.9 470.5 1625.7
##
## Coefficients: (15 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10985.6945 727.7050 15.096 < 2e-16 ***
## stateOther -1688.5528 1149.1216 -1.469 0.141829
## stateQLD 518.3055 390.9883 1.326 0.185071
## stateVIC -425.7543 549.2831 -0.775 0.438341
## sexM -18.5633 727.5368 -0.026 0.979646
## T.categhsid -55.6713 88.1953 -0.631 0.527944
## T.categid 351.7670 747.3487 0.471 0.637901
## T.categhet 143.8055 769.6330 0.187 0.851792
## T.categhaem -14.9979 113.1408 -0.133 0.894551
## T.categblood -641.6945 718.7249 -0.893 0.372029
## T.categmother 252.3055 951.9649 0.265 0.791001
## T.categother 515.5055 777.7474 0.663 0.507501
## stateOther:sexM 1791.0000 1148.2113 1.560 0.118916
## stateQLD:sexM -451.9773 388.0605 -1.165 0.244237
## stateVIC:sexM 509.1865 548.4199 0.928 0.353249
## stateOther:T.categhsid -371.6572 322.1732 -1.154 0.248765
## stateQLD:T.categhsid 342.2118 252.2426 1.357 0.174993
## stateVIC:T.categhsid 61.8351 206.7004 0.299 0.764845
## stateOther:T.categid 1781.0913 1193.6468 1.492 0.135775
## stateQLD:T.categid -662.7670 747.3487 -0.887 0.375250
## stateVIC:T.categid 592.2928 841.0530 0.704 0.481350
## stateOther:T.categhet 1963.2528 1207.7241 1.626 0.104152
## stateQLD:T.categhet -504.8055 635.6049 -0.794 0.427139
## stateVIC:T.categhet 444.6829 646.7644 0.688 0.491793
## stateOther:T.categhaem 109.5862 278.2585 0.394 0.693738
## stateQLD:T.categhaem 0.7885 330.1630 0.002 0.998095
## stateVIC:T.categhaem -111.2322 276.1957 -0.403 0.687178
## stateOther:T.categblood 1566.5528 1233.6170 1.270 0.204231
## stateQLD:T.categblood -796.0555 213.8040 -3.723 0.000201 ***
## stateVIC:T.categblood 834.2543 709.2547 1.176 0.239599
## stateOther:T.categmother -102.4472 753.0705 -0.136 0.891800
## stateQLD:T.categmother -252.3055 951.9649 -0.265 0.791001
## stateVIC:T.categmother 514.7543 1027.1695 0.501 0.616313
## stateOther:T.categother -187.5857 243.6333 -0.770 0.441394
## stateQLD:T.categother 217.6583 326.5458 0.667 0.505116
## stateVIC:T.categother 131.0543 195.0232 0.672 0.501643
## sexM:T.categhsid NA NA NA NA
## sexM:T.categid -64.2983 763.8046 -0.084 0.932918
## sexM:T.categhet 285.2299 789.6078 0.361 0.717955
## sexM:T.categhaem NA NA NA NA
## sexM:T.categblood 282.5633 712.1329 0.397 0.691557
## sexM:T.categmother 284.5633 1046.1041 0.272 0.785625
## sexM:T.categother -495.1232 784.1090 -0.631 0.527800
## stateOther:sexM:T.categhsid NA NA NA NA
## stateQLD:sexM:T.categhsid NA NA NA NA
## stateVIC:sexM:T.categhsid NA NA NA NA
## stateOther:sexM:T.categid -1998.4242 1225.4083 -1.631 0.103040
## stateQLD:sexM:T.categid 789.1721 840.7867 0.939 0.348011
## stateVIC:sexM:T.categid -561.3249 925.7946 -0.606 0.544354
## stateOther:sexM:T.categhet -1957.8667 1270.2194 -1.541 0.123342
## stateQLD:sexM:T.categhet -3.0227 747.4382 -0.004 0.996774
## stateVIC:sexM:T.categhet -647.2817 757.8309 -0.854 0.393109
## stateOther:sexM:T.categhaem NA NA NA NA
## stateQLD:sexM:T.categhaem NA NA NA NA
## stateVIC:sexM:T.categhaem NA NA NA NA
## stateOther:sexM:T.categblood -1010.0000 1286.2612 -0.785 0.432390
## stateQLD:sexM:T.categblood NA NA NA NA
## stateVIC:sexM:T.categblood -1045.1865 836.4350 -1.250 0.211560
## stateOther:sexM:T.categmother NA NA NA NA
## stateQLD:sexM:T.categmother NA NA NA NA
## stateVIC:sexM:T.categmother NA NA NA NA
## stateOther:sexM:T.categother NA NA NA NA
## stateQLD:sexM:T.categother NA NA NA NA
## stateVIC:sexM:T.categother NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 613.7 on 2794 degrees of freedom
## Multiple R-squared: 0.0531, Adjusted R-squared: 0.03683
## F-statistic: 3.264 on 48 and 2794 DF, p-value: 5.422e-13
library(Rcmdr)
## Loading required package: splines
## Loading required package: RcmdrMisc
## Loading required package: sandwich
## La interfaz R-Commander sólo funciona en sesiones interactivas
plotMeans(Aids2$death, Aids2$state, error.bars="se") #simple

plotMeans(Aids2$death, Aids2$state, Aids2$sex, error.bars="se") #multiple
