library(UsingR)
length(ls("package:UsingR"))
## [1] 150
data(bumpers)
hist(bumpers)
data(firstchi)
hist(firstchi)
data(math)
hist(math)
## Medias
boxplot(bumpers)
points(mean(bumpers),pch = 19, col = "gold")
summary(bumpers)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 618 1478 2129 2122 2774 3298
sd(bumpers)
## [1] 798.4574
boxplot(firstchi)
points(mean(firstchi),pch = 19, col = "gold")
summary(firstchi)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 14.00 20.00 23.00 23.98 26.00 42.00
sd(firstchi)
## [1] 6.254258
boxplot(math)
points(mean(math),pch = 19, col = "gold")
summary(math)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 38.00 49.00 54.00 54.90 61.75 75.00
sd(math)
## [1] 9.746264
data(brightness)
hist(brightness, freq = F)
density(brightness)
##
## Call:
## density.default(x = brightness)
##
## Data: brightness (966 obs.); Bandwidth 'bw' = 0.2425
##
## x y
## Min. : 1.342 Min. :0.0000156
## 1st Qu.: 4.296 1st Qu.:0.0025868
## Median : 7.250 Median :0.0226027
## Mean : 7.250 Mean :0.0845551
## 3rd Qu.:10.204 3rd Qu.:0.1267748
## Max. :13.158 Max. :0.3961890
plot(density(brightness))
hist(brightness, freq = F)
lines(density(brightness))
## Diagramas de caja
boxplot(brightness)
boxplot.stats(brightness)$out
## [1] 12.31 11.71 5.53 11.28 4.78 5.13 4.37 5.04 12.43 12.04 4.55 11.55
## [13] 12.14 11.63 4.99 11.67 4.61 11.99 12.04 5.55 12.17 11.55 11.79 12.19
## [25] 2.07 11.65 11.73 2.28 5.42 3.88 5.54 5.29 5.01 11.55 4.89 11.80
## [37] 5.41 5.24
sort(boxplot.stats(brightness)$out)
## [1] 2.07 2.28 3.88 4.37 4.55 4.61 4.78 4.89 4.99 5.01 5.04 5.13
## [13] 5.24 5.29 5.41 5.42 5.53 5.54 5.55 11.28 11.55 11.55 11.55 11.63
## [25] 11.65 11.67 11.71 11.73 11.79 11.80 11.99 12.04 12.04 12.14 12.17 12.19
## [37] 12.31 12.43
seg.menor.outlier = sort(boxplot.stats(brightness)$out)[2]
seg.menor.outlier
## [1] 2.28
out_indices <- which(brightness %in% c(boxplot.stats(brightness)$out))
out_indices
## [1] 6 17 107 111 122 145 154 183 191 263 300 307 320 353 355 390 441 454 463
## [20] 475 522 548 560 569 676 730 736 744 759 763 811 812 839 896 908 909 928 948
brightness.sin = brightness[-c(out_indices)]
brightness.sin
## [1] 9.10 9.27 6.61 8.06 8.55 9.64 9.05 8.59 8.59 7.34 8.43 8.80
## [13] 7.25 8.60 8.15 11.03 6.53 8.51 7.55 8.69 7.57 9.05 6.28 9.13
## [25] 9.32 8.83 9.14 8.26 7.63 9.09 8.10 6.43 9.07 7.68 10.44 8.65
## [37] 7.46 8.70 10.61 8.20 6.18 7.91 9.59 8.57 10.78 7.31 9.53 6.49
## [49] 8.94 8.56 10.96 10.57 7.40 8.12 8.27 7.05 9.09 8.34 8.86 8.27
## [61] 6.36 8.08 11.00 8.55 7.83 8.79 8.33 10.42 8.26 8.97 6.90 9.93
## [73] 7.42 9.03 8.41 8.06 8.69 8.40 8.57 9.50 8.85 9.61 10.62 8.05
## [85] 7.80 5.71 7.87 7.64 7.66 8.68 8.12 10.10 8.67 10.46 9.87 9.48
## [97] 7.04 8.44 9.88 7.05 8.29 9.34 7.73 6.22 8.53 7.23 8.61 10.76
## [109] 8.93 7.95 7.46 8.60 8.55 9.20 6.82 8.29 6.83 7.21 5.58 8.70
## [121] 8.06 10.86 6.50 9.32 9.14 8.13 10.62 6.62 9.96 8.64 6.60 6.25
## [133] 7.83 10.03 9.04 8.47 7.33 8.66 10.35 8.96 8.49 11.26 8.15 7.04
## [145] 10.02 8.90 7.78 9.93 8.60 8.51 7.09 6.93 8.68 8.98 9.84 8.98
## [157] 7.98 10.16 8.86 8.58 9.56 9.24 9.63 5.80 9.05 8.45 8.86 7.84
## [169] 8.86 8.93 7.97 6.90 8.47 6.77 8.55 8.48 8.53 6.33 8.99 8.64
## [181] 9.55 8.74 8.16 9.46 5.70 7.62 8.95 8.97 8.94 7.24 10.32 8.24
## [193] 8.62 9.18 8.53 8.54 8.56 9.41 5.87 7.20 9.05 9.52 10.24 7.70
## [205] 8.17 7.29 9.26 7.94 8.42 8.56 7.52 7.74 8.85 9.01 7.17 9.04
## [217] 10.30 9.86 7.64 8.27 8.44 9.58 8.43 8.49 9.64 9.17 8.09 9.00
## [229] 6.25 8.56 10.81 8.76 7.76 7.82 7.90 8.52 9.73 9.19 8.10 8.75
## [241] 8.14 8.65 10.30 6.46 6.73 7.96 9.53 8.87 6.59 8.65 9.64 9.15
## [253] 9.04 8.42 8.09 9.06 8.09 8.18 8.77 7.36 9.16 8.82 11.14 6.24
## [265] 9.44 7.49 6.96 7.94 8.69 8.15 8.45 7.92 7.45 9.01 8.55 9.23
## [277] 9.16 7.90 8.68 7.78 8.21 8.11 8.29 7.89 9.67 8.24 6.80 8.18
## [289] 8.44 7.45 6.31 8.15 8.27 7.66 8.59 7.09 8.54 9.58 8.44 8.59
## [301] 8.01 8.29 9.62 7.26 7.91 9.45 8.19 8.93 7.65 8.53 7.38 8.56
## [313] 8.76 9.56 7.09 9.83 5.90 10.80 8.41 9.05 8.79 8.88 7.59 9.60
## [325] 10.66 8.55 8.11 9.44 9.60 5.78 10.66 6.38 8.80 7.79 8.60 7.77
## [337] 10.37 9.80 10.42 9.22 8.43 7.33 8.93 9.09 9.26 8.73 9.18 8.12
## [349] 9.26 8.94 6.11 9.13 7.90 9.34 7.13 10.82 7.46 8.72 7.02 9.08
## [361] 8.37 5.59 7.37 5.68 8.56 8.72 9.06 8.82 8.18 9.39 9.10 8.46
## [373] 9.15 8.28 8.18 7.93 9.21 6.09 8.31 7.83 8.72 6.61 6.25 7.82
## [385] 8.66 8.15 8.97 8.15 7.47 8.63 8.13 8.23 8.41 6.47 9.83 8.64
## [397] 7.73 8.64 8.94 8.84 6.32 5.80 8.97 7.53 7.41 7.80 8.14 6.71
## [409] 8.73 9.37 8.69 9.95 7.10 8.09 6.88 9.48 9.04 9.30 8.49 8.30
## [421] 7.95 7.08 6.93 8.38 8.56 8.78 7.42 8.26 7.71 6.91 9.16 8.99
## [433] 8.63 9.90 7.59 7.39 7.78 7.47 6.97 8.82 9.13 7.86 7.13 9.45
## [445] 8.78 7.23 9.73 7.36 7.36 8.47 9.37 6.99 8.20 8.36 8.22 9.91
## [457] 9.67 8.60 10.07 10.15 7.75 9.21 9.66 8.47 9.37 9.44 9.99 10.38
## [469] 7.51 8.91 7.45 9.57 8.99 8.58 6.90 7.55 7.93 9.71 9.57 8.55
## [481] 6.62 7.89 7.51 7.36 8.66 8.51 6.65 9.67 7.80 8.21 7.90 8.94
## [493] 9.82 8.69 8.57 8.89 5.98 7.92 7.60 8.22 5.70 8.75 6.93 7.97
## [505] 8.06 10.13 7.31 8.35 5.57 9.85 9.16 9.03 10.07 9.76 9.35 10.95
## [517] 8.87 6.68 9.69 8.05 10.30 6.07 8.51 7.71 8.56 8.26 8.62 10.92
## [529] 10.51 9.83 9.84 9.74 8.21 8.72 8.03 9.00 6.19 8.22 7.93 10.18
## [541] 8.98 9.13 6.91 8.79 8.23 10.24 8.83 7.62 8.96 10.41 8.97 9.61
## [553] 8.29 8.30 8.26 7.44 9.52 8.20 8.68 8.65 10.52 8.41 9.18 8.42
## [565] 8.86 7.92 10.97 8.85 9.31 10.28 7.56 7.88 7.99 8.23 8.52 9.14
## [577] 6.20 7.64 8.95 7.48 7.06 7.33 8.98 8.24 8.53 8.40 7.48 8.46
## [589] 9.29 8.57 8.70 8.50 8.37 6.87 7.50 7.39 8.19 7.56 8.37 7.39
## [601] 6.73 8.66 8.25 8.47 8.01 6.83 9.06 8.79 7.44 6.43 5.93 8.85
## [613] 9.86 8.55 7.66 7.82 9.08 10.10 8.21 8.85 7.79 7.58 7.85 7.18
## [625] 7.54 9.72 7.12 9.77 8.84 5.67 8.15 9.61 8.19 7.27 8.51 8.36
## [637] 10.00 8.74 6.18 10.26 10.16 8.31 8.58 7.04 8.81 5.99 8.22 9.86
## [649] 8.00 9.40 9.10 8.11 8.89 9.43 7.59 8.72 9.86 9.23 9.50 10.73
## [661] 7.59 7.41 9.26 7.78 7.76 8.94 8.95 6.41 6.11 7.76 7.38 6.21
## [673] 7.05 7.44 8.50 7.84 11.01 7.88 9.10 8.65 8.41 7.81 7.43 8.76
## [685] 7.58 9.55 6.82 10.24 6.24 7.31 10.52 9.27 7.13 9.14 8.48 8.57
## [697] 7.21 9.05 7.72 8.03 6.47 5.57 6.32 7.78 8.58 10.37 9.23 9.20
## [709] 6.93 9.32 7.11 9.79 8.21 8.42 7.05 9.26 8.77 9.25 9.30 10.63
## [721] 9.90 9.89 9.33 7.78 7.02 11.26 8.89 9.60 7.07 6.01 9.11 8.24
## [733] 8.97 8.59 7.17 7.94 7.27 9.59 7.94 8.52 7.59 9.17 8.08 9.80
## [745] 8.92 9.91 9.42 8.84 10.15 8.37 9.33 9.35 7.40 8.35 9.53 9.59
## [757] 10.05 8.57 8.48 8.43 8.45 8.84 11.18 8.64 8.42 6.34 7.93 8.36
## [769] 8.32 7.77 6.84 8.78 7.19 8.50 8.82 9.04 7.93 7.66 10.07 9.03
## [781] 8.13 7.51 9.08 7.10 7.88 9.40 9.06 8.38 10.65 7.77 8.50 8.61
## [793] 10.05 8.71 9.37 6.97 8.56 9.34 9.47 8.11 8.91 7.83 8.95 7.20
## [805] 9.37 5.84 9.81 9.27 9.50 9.32 8.92 8.38 7.74 8.60 9.49 8.35
## [817] 7.11 9.87 8.98 7.75 8.24 6.74 6.83 7.70 6.70 8.67 9.94 8.73
## [829] 9.63 6.66 8.29 8.47 8.16 8.97 7.51 8.97 8.55 5.84 7.85 8.68
## [841] 8.05 8.27 7.68 9.40 7.77 6.89 7.55 8.27 8.16 8.07 7.91 7.71
## [853] 10.16 8.41 8.88 9.64 7.93 7.78 8.90 8.55 9.15 10.86 9.08 7.44
## [865] 10.35 6.68 8.85 8.90 8.24 6.74 10.75 8.44 7.69 8.88 7.70 8.60
## [877] 8.44 9.50 9.03 7.15 7.95 8.23 9.81 8.48 9.33 8.97 8.08 7.47
## [889] 8.34 7.75 8.34 7.56 6.93 10.03 8.69 9.04 8.32 7.85 7.21 8.98
## [901] 7.09 8.85 9.21 8.61 7.91 7.47 8.65 8.53 9.92 8.09 7.06 8.45
## [913] 8.73 7.45 9.02 7.51 7.32 8.17 9.45 9.72 9.34 8.75 9.32 7.91
## [925] 7.49 6.53 6.18 8.69
boxplot(brightness.sin)
# 2.3 Paquete MASS
library(UsingR)
library(ggplot2)
tipos_variables = c()
data(UScereal)
class(UScereal[,1]) #Ver tipo de dato de cada columna
## [1] "factor"
colores = c("aquamarine2","aquamarine3","cadetblue2","cadetblue3","cadetblue4","cornflowerblue")
barplot(table(UScereal$mfr,UScereal$shelf), beside = T, col = colores, main = "Relación entre manufacturer y shelf",
xlab = "Estante de exhibición",ylab = "Fabricante (representado por su primera inicial)")
legend("topright",c(levels(UScereal$mfr)),fill = colores)
ggplot(data = UScereal, aes(x = UScereal$fat,y = UScereal$vitamins)) + geom_point()
## Warning: Use of `UScereal$fat` is discouraged. Use `fat` instead.
## Warning: Use of `UScereal$vitamins` is discouraged. Use `vitamins` instead.
### Entre fat y shelf:
plot(UScereal$shelf,UScereal$fat)
cor(UScereal$carbo,UScereal$sugars)
## [1] -0.04082599
plot(UScereal$carbo,UScereal$sugars)
### Entre fibre y manufacturer:
plot(UScereal$mfr,UScereal$fibre)
### Entre sodium y sugars:
cor(UScereal$sodium,UScereal$sugars)
## [1] 0.2112437
ggplot(data = UScereal,aes(UScereal$sodium,UScereal$sugars)) + geom_point()
## Warning: Use of `UScereal$sodium` is discouraged. Use `sodium` instead.
## Warning: Use of `UScereal$sugars` is discouraged. Use `sugars` instead.
# 2.3 Datos mammals
library(PerformanceAnalytics)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
data(mammals)
pairs(mammals$body ~ mammals$brain)
cor(mammals$body,mammals$brain)
## [1] 0.9341638
plot(mammals$body,mammals$brain)
## Función log
pairs(log(mammals$body)~log(mammals$brain))
cor(log(mammals$body),log(mammals$brain))
## [1] 0.9595748
plot(log(mammals$body),log(mammals$brain))
# 2.5 Datos emissions
library(UsingR)
data(emissions)
emissions
## GDP perCapita CO2
## UnitedStates 8083000 29647 6750
## Japan 3080000 24409 1320
## Germany 1740000 21197 1740
## France 1320000 22381 550
## UnitedKingdom 1242000 21010 675
## Italy 1240000 21856 540
## Russia 692000 4727 2000
## Canada 658000 21221 700
## Spain 642400 16401 370
## Australia 394000 20976 480
## Netherlands 343900 21755 240
## Poland 280700 7270 400
## Belgium 236300 23208 145
## Sweden 176200 19773 75
## Austria 174100 21390 80
## Switzerland 172400 23696 54
## Portugal 149500 15074 75
## Greece 137400 12833 125
## Ukraine 124900 2507 420
## Denmark 122500 22868 75
## Norway 120500 27149 56
## Romania 114200 5136 160
## CzechRepublic 111900 10885 150
## Finland 102100 19793 76
## Hungary 73200 7186 85
## Ireland 59900 16488 63
cor(emissions)
## GDP perCapita CO2
## GDP 1.0000000 0.4325303 0.9501753
## perCapita 0.4325303 1.0000000 0.2757962
## CO2 0.9501753 0.2757962 1.0000000
pairs(emissions)
# Modelo de regresión
regresion_lineal <- lm(emissions$CO2 ~ emissions$GDP + emissions$perCapita, data = emissions)
summary(regresion_lineal)
##
## Call:
## lm(formula = emissions$CO2 ~ emissions$GDP + emissions$perCapita,
## data = emissions)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1037.3 -167.4 10.8 153.2 1052.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.100e+02 2.044e+02 2.495 0.0202 *
## emissions$GDP 8.406e-04 5.198e-05 16.172 4.68e-14 ***
## emissions$perCapita -3.039e-02 1.155e-02 -2.631 0.0149 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 382.8 on 23 degrees of freedom
## Multiple R-squared: 0.9253, Adjusted R-squared: 0.9188
## F-statistic: 142.5 on 2 and 23 DF, p-value: 1.102e-13
plot(emissions$GDP+emissions$perCapita,emissions$CO2)
abline(regresion_lineal,col = "red")
## Warning in abline(regresion_lineal, col = "red"): only using the first two of 3
## regression coefficients
emissions$CO2
## [1] 6750 1320 1740 550 675 540 2000 700 370 480 240 400 145 75 80
## [16] 54 75 125 420 75 56 160 150 76 85 63
CO2_predict <- predict(regresion_lineal,emissions)
plot(emissions$GDP+emissions$perCapita,CO2_predict)
CO2_predict
## UnitedStates Japan Germany France UnitedKingdom
## 6403.720110 2357.274571 1328.457202 939.412260 915.510264
## Italy Russia Canada Spain Australia
## 888.117914 948.030553 418.174003 551.546727 203.695487
## Netherlands Poland Belgium Sweden Austria
## 137.905405 524.997147 3.295727 57.168681 6.260516
## Switzerland Portugal Greece Ukraine Denmark
## -65.251059 177.533136 235.468677 538.782254 -82.034073
## Norway Romania CzechRepublic Finland Hungary
## -213.820805 449.888660 273.235200 -5.729292 353.120804
## Ireland
## 59.239930
cor(emissions$CO2,CO2_predict)
## [1] 0.9619321
boxplot(emissions)
boxplot.stats(emissions$CO2)$out
## [1] 6750 1320 1740 2000
out_indices_emisiones <- which(emissions$CO2 %in% c(boxplot.stats(emissions$CO2)$out))
out_indices_emisiones
## [1] 1 2 3 7
CO2.sin.out = emissions$CO2[-c(out_indices_emisiones)]
CO2.sin.out
## [1] 550 675 540 700 370 480 240 400 145 75 80 54 75 125 420 75 56 160 150
## [20] 76 85 63
GDP.sin.out = emissions$GDP[-c(out_indices_emisiones)]
perCapita.sin.out = emissions$perCapita[-c(out_indices_emisiones)]
regresion_lineal1 <- lm(CO2.sin.out ~ GDP.sin.out + perCapita.sin.out, data = emissions)
summary(regresion_lineal1)
##
## Call:
## lm(formula = CO2.sin.out ~ GDP.sin.out + perCapita.sin.out, data = emissions)
##
## Residuals:
## Min 1Q Median 3Q Max
## -130.88 -63.84 -32.27 16.79 334.38
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.233e+02 7.202e+01 3.101 0.00588 **
## GDP.sin.out 4.912e-04 6.989e-05 7.028 1.09e-06 ***
## perCapita.sin.out -8.525e-03 4.114e-03 -2.072 0.05209 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 121.4 on 19 degrees of freedom
## Multiple R-squared: 0.7225, Adjusted R-squared: 0.6933
## F-statistic: 24.73 on 2 and 19 DF, p-value: 5.142e-06
plot(GDP.sin.out + perCapita.sin.out,CO2.sin.out)
abline(regresion_lineal1,col = "blue")
## Warning in abline(regresion_lineal1, col = "blue"): only using the first two of
## 3 regression coefficients
CO2.sin.out
## [1] 550 675 540 700 370 480 240 400 145 75 80 54 75 125 420 75 56 160 150
## [20] 76 85 63
CO2_predict_sin_out <- predict(regresion_lineal1,emissions)
## Warning: 'newdata' had 26 rows but variables found have 22 rows
plot(GDP.sin.out + perCapita.sin.out,CO2_predict_sin_out)
CO2_predict_sin_out
## 1 2 3 4 5 6 7 8
## 680.88253 654.25862 646.06437 365.61631 399.04321 238.03584 206.78744 299.22605
## 9 10 11 12 13 14 15 16
## 141.55107 141.31414 126.49820 106.00521 168.25755 181.41829 263.30493 88.55427
## 17 18 19 20 21 22
## 51.07759 235.63791 185.49960 104.74791 198.02424 112.19471
cor(CO2.sin.out,CO2_predict_sin_out)
## [1] 0.8499957
boxplot(emissions$CO2)
boxplot(CO2.sin.out)
# 2.6 MASS anorexia
library(MASS)
data("anorexia")
anorexia[3,1:3]
## Treat Prewt Postwt
## 3 Cont 91.8 86.4
pos_casos_exito <- which(anorexia$Postwt>anorexia$Prewt) #Arroja las posiciones en los casos donde se subió de peso
casos_exito = anorexia[c(pos_casos_exito),]
mejor_tratamiento = which.max(table(casos_exito$Treat))
mejor_tratamiento = c(paste(names(mejor_tratamiento),": ",max(table(casos_exito$Treat))," casos de éxito"))
table(casos_exito$Treat)
##
## CBT Cont FT
## 18 11 13
mejor_tratamiento
## [1] "CBT : 18 casos de éxito"
pos_casos_fracaso <- which(anorexia$Postwt<anorexia$Prewt)
casos_fracaso = anorexia[c(pos_casos_fracaso),]
ganaron_peso = length(pos_casos_exito);ganaron_peso
## [1] 42
perdieron_peso = nrow(casos_fracaso)
barplot(c(ganaron_peso,perdieron_peso), main = "Pacientes que ganaron vs los que perdieron peso",
ylab = "Cantidad de pacientes", col = c("purple","black"))
legend("topright", legend = c(paste("Ganaron peso: ",ganaron_peso),paste("Perdieron peso: ",perdieron_peso)),
fill = c("purple","black"))
# 2.7 MASS melanoma
library(MASS)
data("Melanoma")
Melanoma
## time status sex age year thickness ulcer
## 1 10 3 1 76 1972 6.76 1
## 2 30 3 1 56 1968 0.65 0
## 3 35 2 1 41 1977 1.34 0
## 4 99 3 0 71 1968 2.90 0
## 5 185 1 1 52 1965 12.08 1
## 6 204 1 1 28 1971 4.84 1
## 7 210 1 1 77 1972 5.16 1
## 8 232 3 0 60 1974 3.22 1
## 9 232 1 1 49 1968 12.88 1
## 10 279 1 0 68 1971 7.41 1
## 11 295 1 0 53 1969 4.19 1
## 12 355 3 0 64 1972 0.16 1
## 13 386 1 0 68 1965 3.87 1
## 14 426 1 1 63 1970 4.84 1
## 15 469 1 0 14 1969 2.42 1
## 16 493 3 1 72 1971 12.56 1
## 17 529 1 1 46 1971 5.80 1
## 18 621 1 1 72 1972 7.06 1
## 19 629 1 1 95 1968 5.48 1
## 20 659 1 1 54 1972 7.73 1
## 21 667 1 0 89 1968 13.85 1
## 22 718 1 1 25 1967 2.34 1
## 23 752 1 1 37 1973 4.19 1
## 24 779 1 1 43 1967 4.04 1
## 25 793 1 1 68 1970 4.84 1
## 26 817 1 0 67 1966 0.32 0
## 27 826 3 0 86 1965 8.54 1
## 28 833 1 0 56 1971 2.58 1
## 29 858 1 0 16 1967 3.56 0
## 30 869 1 0 42 1965 3.54 0
## 31 872 1 0 65 1968 0.97 0
## 32 967 1 1 52 1970 4.83 1
## 33 977 1 1 58 1967 1.62 1
## 34 982 1 0 60 1970 6.44 1
## 35 1041 1 1 68 1967 14.66 0
## 36 1055 1 0 75 1967 2.58 1
## 37 1062 1 1 19 1966 3.87 1
## 38 1075 1 1 66 1971 3.54 1
## 39 1156 1 0 56 1970 1.34 1
## 40 1228 1 1 46 1973 2.24 1
## 41 1252 1 0 58 1971 3.87 1
## 42 1271 1 0 74 1971 3.54 1
## 43 1312 1 0 65 1970 17.42 1
## 44 1427 3 1 64 1972 1.29 0
## 45 1435 1 1 27 1969 3.22 0
## 46 1499 2 1 73 1973 1.29 0
## 47 1506 1 1 56 1970 4.51 1
## 48 1508 2 1 63 1973 8.38 1
## 49 1510 2 0 69 1973 1.94 0
## 50 1512 2 0 77 1973 0.16 0
## 51 1516 1 1 80 1968 2.58 1
## 52 1525 3 0 76 1970 1.29 1
## 53 1542 2 0 65 1973 0.16 0
## 54 1548 1 0 61 1972 1.62 0
## 55 1557 2 0 26 1973 1.29 0
## 56 1560 1 0 57 1973 2.10 0
## 57 1563 2 0 45 1973 0.32 0
## 58 1584 1 1 31 1970 0.81 0
## 59 1605 2 0 36 1973 1.13 0
## 60 1621 1 0 46 1972 5.16 1
## 61 1627 2 0 43 1973 1.62 0
## 62 1634 2 0 68 1973 1.37 0
## 63 1641 2 1 57 1973 0.24 0
## 64 1641 2 0 57 1973 0.81 0
## 65 1648 2 0 55 1973 1.29 0
## 66 1652 2 0 58 1973 1.29 0
## 67 1654 2 1 20 1973 0.97 0
## 68 1654 2 0 67 1973 1.13 0
## 69 1667 1 0 44 1971 5.80 1
## 70 1678 2 0 59 1973 1.29 0
## 71 1685 2 0 32 1973 0.48 0
## 72 1690 1 1 83 1971 1.62 0
## 73 1710 2 0 55 1973 2.26 0
## 74 1710 2 1 15 1973 0.58 0
## 75 1726 1 0 58 1970 0.97 1
## 76 1745 2 0 47 1973 2.58 1
## 77 1762 2 0 54 1973 0.81 0
## 78 1779 2 1 55 1973 3.54 1
## 79 1787 2 1 38 1973 0.97 0
## 80 1787 2 0 41 1973 1.78 1
## 81 1793 2 0 56 1973 1.94 0
## 82 1804 2 0 48 1973 1.29 0
## 83 1812 2 1 44 1973 3.22 1
## 84 1836 2 0 70 1972 1.53 0
## 85 1839 2 0 40 1972 1.29 0
## 86 1839 2 1 53 1972 1.62 1
## 87 1854 2 0 65 1972 1.62 1
## 88 1856 2 1 54 1972 0.32 0
## 89 1860 3 1 71 1969 4.84 1
## 90 1864 2 0 49 1972 1.29 0
## 91 1899 2 0 55 1972 0.97 0
## 92 1914 2 0 69 1972 3.06 0
## 93 1919 2 1 83 1972 3.54 0
## 94 1920 2 1 60 1972 1.62 1
## 95 1927 2 1 40 1972 2.58 1
## 96 1933 1 0 77 1972 1.94 0
## 97 1942 2 0 35 1972 0.81 0
## 98 1955 2 0 46 1972 7.73 1
## 99 1956 2 0 34 1972 0.97 0
## 100 1958 2 0 69 1972 12.88 0
## 101 1963 2 0 60 1972 2.58 0
## 102 1970 2 1 84 1972 4.09 1
## 103 2005 2 0 66 1972 0.64 0
## 104 2007 2 1 56 1972 0.97 0
## 105 2011 2 0 75 1972 3.22 1
## 106 2024 2 0 36 1972 1.62 0
## 107 2028 2 1 52 1972 3.87 1
## 108 2038 2 0 58 1972 0.32 1
## 109 2056 2 0 39 1972 0.32 0
## 110 2059 2 1 68 1972 3.22 1
## 111 2061 1 1 71 1968 2.26 0
## 112 2062 1 0 52 1965 3.06 0
## 113 2075 2 1 55 1972 2.58 1
## 114 2085 3 0 66 1970 0.65 0
## 115 2102 2 1 35 1972 1.13 0
## 116 2103 1 1 44 1966 0.81 0
## 117 2104 2 0 72 1972 0.97 0
## 118 2108 1 0 58 1969 1.76 1
## 119 2112 2 0 54 1972 1.94 1
## 120 2150 2 0 33 1972 0.65 0
## 121 2156 2 0 45 1972 0.97 0
## 122 2165 2 1 62 1972 5.64 0
## 123 2209 2 0 72 1971 9.66 0
## 124 2227 2 0 51 1971 0.10 0
## 125 2227 2 1 77 1971 5.48 1
## 126 2256 1 0 43 1971 2.26 1
## 127 2264 2 0 65 1971 4.83 1
## 128 2339 2 0 63 1971 0.97 0
## 129 2361 2 1 60 1971 0.97 0
## 130 2387 2 0 50 1971 5.16 1
## 131 2388 1 1 40 1966 0.81 0
## 132 2403 2 0 67 1971 2.90 1
## 133 2426 2 0 69 1971 3.87 0
## 134 2426 2 0 74 1971 1.94 1
## 135 2431 2 0 49 1971 0.16 0
## 136 2460 2 0 47 1971 0.64 0
## 137 2467 1 0 42 1965 2.26 1
## 138 2492 2 0 54 1971 1.45 0
## 139 2493 2 1 72 1971 4.82 1
## 140 2521 2 0 45 1971 1.29 1
## 141 2542 2 1 67 1971 7.89 1
## 142 2559 2 0 48 1970 0.81 1
## 143 2565 1 1 34 1970 3.54 1
## 144 2570 2 0 44 1970 1.29 0
## 145 2660 2 0 31 1970 0.64 0
## 146 2666 2 0 42 1970 3.22 1
## 147 2676 2 0 24 1970 1.45 1
## 148 2738 2 0 58 1970 0.48 0
## 149 2782 1 1 78 1969 1.94 0
## 150 2787 2 1 62 1970 0.16 0
## 151 2984 2 1 70 1969 0.16 0
## 152 3032 2 0 35 1969 1.29 0
## 153 3040 2 0 61 1969 1.94 0
## 154 3042 1 0 54 1967 3.54 1
## 155 3067 2 0 29 1969 0.81 0
## 156 3079 2 1 64 1969 0.65 0
## 157 3101 2 1 47 1969 7.09 0
## 158 3144 2 1 62 1969 0.16 0
## 159 3152 2 0 32 1969 1.62 0
## 160 3154 3 1 49 1969 1.62 0
## 161 3180 2 0 25 1969 1.29 0
## 162 3182 3 1 49 1966 6.12 0
## 163 3185 2 0 64 1969 0.48 0
## 164 3199 2 0 36 1969 0.64 0
## 165 3228 2 0 58 1969 3.22 1
## 166 3229 2 0 37 1969 1.94 0
## 167 3278 2 1 54 1969 2.58 0
## 168 3297 2 0 61 1968 2.58 1
## 169 3328 2 1 31 1968 0.81 0
## 170 3330 2 1 61 1968 0.81 1
## 171 3338 1 0 60 1967 3.22 1
## 172 3383 2 0 43 1968 0.32 0
## 173 3384 2 0 68 1968 3.22 1
## 174 3385 2 0 4 1968 2.74 0
## 175 3388 2 1 60 1968 4.84 1
## 176 3402 2 1 50 1968 1.62 0
## 177 3441 2 0 20 1968 0.65 0
## 178 3458 3 0 54 1967 1.45 0
## 179 3459 2 0 29 1968 0.65 0
## 180 3459 2 1 56 1968 1.29 1
## 181 3476 2 0 60 1968 1.62 0
## 182 3523 2 0 46 1968 3.54 0
## 183 3667 2 0 42 1967 3.22 0
## 184 3695 2 0 34 1967 0.65 0
## 185 3695 2 0 56 1967 1.03 0
## 186 3776 2 1 12 1967 7.09 1
## 187 3776 2 0 21 1967 1.29 1
## 188 3830 2 1 46 1967 0.65 0
## 189 3856 2 0 49 1967 1.78 0
## 190 3872 2 0 35 1967 12.24 1
## 191 3909 2 1 42 1967 8.06 1
## 192 3968 2 0 47 1967 0.81 0
## 193 4001 2 0 69 1967 2.10 0
## 194 4103 2 0 52 1966 3.87 0
## 195 4119 2 1 52 1966 0.65 0
## 196 4124 2 0 30 1966 1.94 1
## 197 4207 2 1 22 1966 0.65 0
## 198 4310 2 1 55 1966 2.10 0
## 199 4390 2 0 26 1965 1.94 1
## 200 4479 2 0 19 1965 1.13 1
## 201 4492 2 1 29 1965 7.06 1
## 202 4668 2 0 40 1965 6.12 0
## 203 4688 2 0 42 1965 0.48 0
## 204 4926 2 0 50 1964 2.26 0
## 205 5565 2 0 41 1962 2.90 0
num_fallecidos = nrow(Melanoma[Melanoma$status==1,]) + nrow(Melanoma[Melanoma$status==3,])
num_fallecidos
## [1] 71
colores_status = c("yellow","purple","black")
pie(table(Melanoma$status),labels = c(nrow(Melanoma[Melanoma$status==1,]),nrow(Melanoma[Melanoma$status==2,]),
nrow(Melanoma[Melanoma$status==3,])),main = "Presencia y ausencia de Melanoma en una base de datos de 205 personas", col = colores_status)
legend("topright",c(paste(1,": Melanoma"),paste(2,": Vivo"),paste(3,": Otras causas")),fill = colores_status)
## Relación entre tamaño de tumor y muerte
Melanoma1 = Melanoma
s1 <- which((Melanoma1$status==1))
s2 <- which((Melanoma1$status==2)) #Posiciones del estado de las personas vivas
s3 <- which((Melanoma1$status==3)) #Posiciones del estado de las personas muertas por otras causas
Melanoma1$status = replace(Melanoma1$status,s3,1)
Melanoma1$status = replace(Melanoma1$status,s2,0)
c(paste("Correlación entre el tamaño del tumor y estado del paciente:"),round(cor(Melanoma1$thickness,Melanoma1$status),3))
## [1] "Correlación entre el tamaño del tumor y estado del paciente:"
## [2] "0.314"
rem = replace(Melanoma1$status,Melanoma1$status==0,"Vivos")
rem = replace(rem,Melanoma1$status==1,"Muertos")
boxplot(Melanoma1$thickness~rem, main = "Relación entre tamaño de tumor y muerte",
ylab = "Tamaño del tumor (mm)",xlab = "Estado del paciente")
# 2.8 UsingR Babyboom
library(UsingR)
data(babyboom)
table(babyboom$gender)
##
## girl boy
## 18 26
print("Niños por debajo de 3000gr: ")
## [1] "Niños por debajo de 3000gr: "
babyboom[babyboom$gender=='boy' & babyboom$wt<3000,]
## clock.time gender wt running.time
## 8 422 boy 2846 262
## 18 1133 boy 2902 693
## 19 1209 boy 2635 729
## 40 2104 boy 2121 1264
table(babyboom$gender[babyboom$wt<3000])
##
## girl boy
## 5 4
relacion_sexo_peso = data.frame(babyboom$gender[babyboom$wt<3000],babyboom$wt[babyboom$wt<3000]);
colnames(relacion_sexo_peso ) = c("gender","wt");relacion_sexo_peso
## gender wt
## 1 girl 2208
## 2 girl 1745
## 3 boy 2846
## 4 girl 2576
## 5 boy 2902
## 6 boy 2635
## 7 girl 2184
## 8 girl 2383
## 9 boy 2121
prom_ninos = median(babyboom$wt[babyboom$gender=='boy']);prom_ninos
## [1] 3404
prom_ninas = median(babyboom$wt[babyboom$gender=='girl']);prom_ninas
## [1] 3381
boxplot(babyboom$wt,ylab = "Peso (gr)",main = "Promedio de pesos total (niños y niñas)")
points(prom_ninos, col = "blue", pch = 19)
points(prom_ninas, col = "pink", pch = 19)
legend(x = "topleft", legend = c(paste("Niños: ",prom_ninos),paste("Niñas: ",prom_ninas)), fill = c("blue", "pink"),title = "Promedio: ")
# 2.9 MASS Aids2
library(MASS)
data(Aids2)
table(Aids2$state,Aids2$T.categ)
##
## hs hsid id het haem blood mother other
## NSW 1539 50 28 18 30 70 3 42
## Other 204 4 12 8 6 5 2 8
## QLD 186 7 4 5 4 15 1 4
## VIC 536 11 4 10 6 4 1 16
table(Aids2$status)
##
## A D
## 1082 1761
fallecidos = Aids2[Aids2$status=='D',]
print(paste("Cantidad de fallecidos: ",length(fallecidos$status)))
## [1] "Cantidad de fallecidos: 1761"
table(Aids2$sex,Aids2$T.categ)
##
## hs hsid id het haem blood mother other
## F 1 0 20 20 0 37 4 7
## M 2464 72 28 21 46 57 3 63
barplot(table(Aids2$sex,Aids2$T.categ),beside = T,col = c("pink","blue"))
legend("topright",levels(Aids2$sex),fill = c("pink","blue"))
## Relación entre F (mujeres) y tipos de transmisión:
relacion_sexo_trans_F = Aids2[Aids2$sex=='F',]
barplot(table(relacion_sexo_trans_F$sex=='F',relacion_sexo_trans_F$T.categ),
main = "Relación entre F (mujeres) y tipos de transmisión",ylab = "Cantidad de pacientes")
## Relación entre M (hombres) y tipos de transmisión:
relacion_sexo_trans_M = Aids2[Aids2$sex=='M',]
barplot(table(relacion_sexo_trans_M$sex=='M',relacion_sexo_trans_M$T.categ),
main = "Relación entre M (hombres) y tipos de transmisión",ylab = "Cantidad de pacientes")
## Gráfica
colores_aids = c("aquamarine","blue","blueviolet","brown","darkgreen","darkolivegreen1","darkorange","gold")
pie(table(Aids2$T.categ),col = colores_aids,labels = table(Aids2$T.categ),
main = "Cantidad y tipos de transmisión")
legend("topright",legend = levels(Aids2$T.categ),fill = colores_aids)
table(Aids2$T.categ)
##
## hs hsid id het haem blood mother other
## 2465 72 48 41 46 94 7 70
data(crime)
dat5 = crime
crime
## y1983 y1993
## Alabama 416.0 871.7
## Alaska 613.8 660.5
## Arizona 494.2 670.8
## Arkansas 297.7 576.5
## California 772.6 1119.7
## Colorado 476.4 578.8
## Connecticut 375.0 495.3
## Delaware 453.1 621.2
## DC 1985.4 2832.8
## Florida 826.7 1207.2
## Georgia 456.7 733.2
## Hawaii 252.1 258.4
## Idaho 238.7 281.4
## Illinois 553.0 977.3
## Indiana 283.8 508.3
## Iowa 181.1 278.0
## Kansas 326.6 510.8
## Kentucky 322.2 535.5
## Louisiana 640.9 984.6
## Maine 159.6 130.9
## Maryland 807.1 1000.1
## Massachusetts 576.8 779.0
## Michigan 716.7 770.1
## Minnesota 190.9 338.0
## Mississippi 280.4 411.7
## Missour 477.2 740.4
## Montana 212.6 169.9
## Nebraska 217.7 348.6
## Nevada 655.2 696.8
## New Hampshire 125.1 125.7
## New Jersey 553.1 625.8
## New Mexico 686.8 934.9
## New York 914.1 1122.1
## North Carolina 409.6 681.0
## North Dakota 53.7 83.3
## Ohio 397.9 525.9
## Oklahoma 423.4 622.8
## Oregon 487.8 510.2
## Pennsylvania 342.8 427.0
## Rhode Island 355.2 394.5
## South Carolina 616.8 944.5
## South Dakota 120.0 194.5
## Tennessee 402.0 746.2
## Texas 512.2 806.3
## Utah 256.0 290.5
## Vermont 132.6 109.5
## Virginia 292.5 374.9
## Washington 371.8 534.5
## West Virginia 171.8 211.5
## Wisconsin 190.9 275.7
## Wyoming 237.2 319.5
prom_1983 = mean(dat5$y1983)
prom_1993 = mean(dat5$y1993)
color = c("purple","orange")
barplot(c(prom_1983, prom_1993), main = "2.10 Apartado A",
ylab = "Cantidad ", xlab = "Tasa total", col = color,
names.arg = c("1983", "1993"))
## Tasa total
max_1983 = dat5[dat5$y1983 == max(dat5$y1983),]
max_1993 = dat5[dat5$y1993 == max(dat5$y1993),]
max_1983[1]
## y1983
## DC 1985.4
max_1993[2]
## y1993
## DC 2832.8
sum_year = (dat5$y1983 + dat5$y1993)
max_total = max(sum_year)
index = which(sum_year == max_total)
dat5[index,]
## y1983 y1993
## DC 1985.4 2832.8