2.1 Paquete UsingR

library(UsingR)

Número de datos

length(ls("package:UsingR"))
## [1] 150

Gráficas datasets

data(bumpers)
hist(bumpers)

data(firstchi)
hist(firstchi)

data(math)
hist(math)

## Medias

boxplot(bumpers)
points(mean(bumpers),pch = 19, col = "gold")

summary(bumpers)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     618    1478    2129    2122    2774    3298
sd(bumpers)
## [1] 798.4574
boxplot(firstchi)
points(mean(firstchi),pch = 19, col = "gold")

summary(firstchi)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   14.00   20.00   23.00   23.98   26.00   42.00
sd(firstchi)
## [1] 6.254258
boxplot(math)
points(mean(math),pch = 19, col = "gold")

summary(math)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   38.00   49.00   54.00   54.90   61.75   75.00
sd(math)
## [1] 9.746264

2.2 Datos brightness

data(brightness)
hist(brightness, freq = F)

density(brightness)
## 
## Call:
##  density.default(x = brightness)
## 
## Data: brightness (966 obs.); Bandwidth 'bw' = 0.2425
## 
##        x                y            
##  Min.   : 1.342   Min.   :0.0000156  
##  1st Qu.: 4.296   1st Qu.:0.0025868  
##  Median : 7.250   Median :0.0226027  
##  Mean   : 7.250   Mean   :0.0845551  
##  3rd Qu.:10.204   3rd Qu.:0.1267748  
##  Max.   :13.158   Max.   :0.3961890
plot(density(brightness))

hist(brightness, freq = F)
lines(density(brightness))

## Diagramas de caja

boxplot(brightness)

boxplot.stats(brightness)$out
##  [1] 12.31 11.71  5.53 11.28  4.78  5.13  4.37  5.04 12.43 12.04  4.55 11.55
## [13] 12.14 11.63  4.99 11.67  4.61 11.99 12.04  5.55 12.17 11.55 11.79 12.19
## [25]  2.07 11.65 11.73  2.28  5.42  3.88  5.54  5.29  5.01 11.55  4.89 11.80
## [37]  5.41  5.24
sort(boxplot.stats(brightness)$out)
##  [1]  2.07  2.28  3.88  4.37  4.55  4.61  4.78  4.89  4.99  5.01  5.04  5.13
## [13]  5.24  5.29  5.41  5.42  5.53  5.54  5.55 11.28 11.55 11.55 11.55 11.63
## [25] 11.65 11.67 11.71 11.73 11.79 11.80 11.99 12.04 12.04 12.14 12.17 12.19
## [37] 12.31 12.43
seg.menor.outlier = sort(boxplot.stats(brightness)$out)[2]
seg.menor.outlier
## [1] 2.28

Datos no atípicos

out_indices <- which(brightness %in% c(boxplot.stats(brightness)$out))
out_indices
##  [1]   6  17 107 111 122 145 154 183 191 263 300 307 320 353 355 390 441 454 463
## [20] 475 522 548 560 569 676 730 736 744 759 763 811 812 839 896 908 909 928 948
brightness.sin = brightness[-c(out_indices)]
brightness.sin
##   [1]  9.10  9.27  6.61  8.06  8.55  9.64  9.05  8.59  8.59  7.34  8.43  8.80
##  [13]  7.25  8.60  8.15 11.03  6.53  8.51  7.55  8.69  7.57  9.05  6.28  9.13
##  [25]  9.32  8.83  9.14  8.26  7.63  9.09  8.10  6.43  9.07  7.68 10.44  8.65
##  [37]  7.46  8.70 10.61  8.20  6.18  7.91  9.59  8.57 10.78  7.31  9.53  6.49
##  [49]  8.94  8.56 10.96 10.57  7.40  8.12  8.27  7.05  9.09  8.34  8.86  8.27
##  [61]  6.36  8.08 11.00  8.55  7.83  8.79  8.33 10.42  8.26  8.97  6.90  9.93
##  [73]  7.42  9.03  8.41  8.06  8.69  8.40  8.57  9.50  8.85  9.61 10.62  8.05
##  [85]  7.80  5.71  7.87  7.64  7.66  8.68  8.12 10.10  8.67 10.46  9.87  9.48
##  [97]  7.04  8.44  9.88  7.05  8.29  9.34  7.73  6.22  8.53  7.23  8.61 10.76
## [109]  8.93  7.95  7.46  8.60  8.55  9.20  6.82  8.29  6.83  7.21  5.58  8.70
## [121]  8.06 10.86  6.50  9.32  9.14  8.13 10.62  6.62  9.96  8.64  6.60  6.25
## [133]  7.83 10.03  9.04  8.47  7.33  8.66 10.35  8.96  8.49 11.26  8.15  7.04
## [145] 10.02  8.90  7.78  9.93  8.60  8.51  7.09  6.93  8.68  8.98  9.84  8.98
## [157]  7.98 10.16  8.86  8.58  9.56  9.24  9.63  5.80  9.05  8.45  8.86  7.84
## [169]  8.86  8.93  7.97  6.90  8.47  6.77  8.55  8.48  8.53  6.33  8.99  8.64
## [181]  9.55  8.74  8.16  9.46  5.70  7.62  8.95  8.97  8.94  7.24 10.32  8.24
## [193]  8.62  9.18  8.53  8.54  8.56  9.41  5.87  7.20  9.05  9.52 10.24  7.70
## [205]  8.17  7.29  9.26  7.94  8.42  8.56  7.52  7.74  8.85  9.01  7.17  9.04
## [217] 10.30  9.86  7.64  8.27  8.44  9.58  8.43  8.49  9.64  9.17  8.09  9.00
## [229]  6.25  8.56 10.81  8.76  7.76  7.82  7.90  8.52  9.73  9.19  8.10  8.75
## [241]  8.14  8.65 10.30  6.46  6.73  7.96  9.53  8.87  6.59  8.65  9.64  9.15
## [253]  9.04  8.42  8.09  9.06  8.09  8.18  8.77  7.36  9.16  8.82 11.14  6.24
## [265]  9.44  7.49  6.96  7.94  8.69  8.15  8.45  7.92  7.45  9.01  8.55  9.23
## [277]  9.16  7.90  8.68  7.78  8.21  8.11  8.29  7.89  9.67  8.24  6.80  8.18
## [289]  8.44  7.45  6.31  8.15  8.27  7.66  8.59  7.09  8.54  9.58  8.44  8.59
## [301]  8.01  8.29  9.62  7.26  7.91  9.45  8.19  8.93  7.65  8.53  7.38  8.56
## [313]  8.76  9.56  7.09  9.83  5.90 10.80  8.41  9.05  8.79  8.88  7.59  9.60
## [325] 10.66  8.55  8.11  9.44  9.60  5.78 10.66  6.38  8.80  7.79  8.60  7.77
## [337] 10.37  9.80 10.42  9.22  8.43  7.33  8.93  9.09  9.26  8.73  9.18  8.12
## [349]  9.26  8.94  6.11  9.13  7.90  9.34  7.13 10.82  7.46  8.72  7.02  9.08
## [361]  8.37  5.59  7.37  5.68  8.56  8.72  9.06  8.82  8.18  9.39  9.10  8.46
## [373]  9.15  8.28  8.18  7.93  9.21  6.09  8.31  7.83  8.72  6.61  6.25  7.82
## [385]  8.66  8.15  8.97  8.15  7.47  8.63  8.13  8.23  8.41  6.47  9.83  8.64
## [397]  7.73  8.64  8.94  8.84  6.32  5.80  8.97  7.53  7.41  7.80  8.14  6.71
## [409]  8.73  9.37  8.69  9.95  7.10  8.09  6.88  9.48  9.04  9.30  8.49  8.30
## [421]  7.95  7.08  6.93  8.38  8.56  8.78  7.42  8.26  7.71  6.91  9.16  8.99
## [433]  8.63  9.90  7.59  7.39  7.78  7.47  6.97  8.82  9.13  7.86  7.13  9.45
## [445]  8.78  7.23  9.73  7.36  7.36  8.47  9.37  6.99  8.20  8.36  8.22  9.91
## [457]  9.67  8.60 10.07 10.15  7.75  9.21  9.66  8.47  9.37  9.44  9.99 10.38
## [469]  7.51  8.91  7.45  9.57  8.99  8.58  6.90  7.55  7.93  9.71  9.57  8.55
## [481]  6.62  7.89  7.51  7.36  8.66  8.51  6.65  9.67  7.80  8.21  7.90  8.94
## [493]  9.82  8.69  8.57  8.89  5.98  7.92  7.60  8.22  5.70  8.75  6.93  7.97
## [505]  8.06 10.13  7.31  8.35  5.57  9.85  9.16  9.03 10.07  9.76  9.35 10.95
## [517]  8.87  6.68  9.69  8.05 10.30  6.07  8.51  7.71  8.56  8.26  8.62 10.92
## [529] 10.51  9.83  9.84  9.74  8.21  8.72  8.03  9.00  6.19  8.22  7.93 10.18
## [541]  8.98  9.13  6.91  8.79  8.23 10.24  8.83  7.62  8.96 10.41  8.97  9.61
## [553]  8.29  8.30  8.26  7.44  9.52  8.20  8.68  8.65 10.52  8.41  9.18  8.42
## [565]  8.86  7.92 10.97  8.85  9.31 10.28  7.56  7.88  7.99  8.23  8.52  9.14
## [577]  6.20  7.64  8.95  7.48  7.06  7.33  8.98  8.24  8.53  8.40  7.48  8.46
## [589]  9.29  8.57  8.70  8.50  8.37  6.87  7.50  7.39  8.19  7.56  8.37  7.39
## [601]  6.73  8.66  8.25  8.47  8.01  6.83  9.06  8.79  7.44  6.43  5.93  8.85
## [613]  9.86  8.55  7.66  7.82  9.08 10.10  8.21  8.85  7.79  7.58  7.85  7.18
## [625]  7.54  9.72  7.12  9.77  8.84  5.67  8.15  9.61  8.19  7.27  8.51  8.36
## [637] 10.00  8.74  6.18 10.26 10.16  8.31  8.58  7.04  8.81  5.99  8.22  9.86
## [649]  8.00  9.40  9.10  8.11  8.89  9.43  7.59  8.72  9.86  9.23  9.50 10.73
## [661]  7.59  7.41  9.26  7.78  7.76  8.94  8.95  6.41  6.11  7.76  7.38  6.21
## [673]  7.05  7.44  8.50  7.84 11.01  7.88  9.10  8.65  8.41  7.81  7.43  8.76
## [685]  7.58  9.55  6.82 10.24  6.24  7.31 10.52  9.27  7.13  9.14  8.48  8.57
## [697]  7.21  9.05  7.72  8.03  6.47  5.57  6.32  7.78  8.58 10.37  9.23  9.20
## [709]  6.93  9.32  7.11  9.79  8.21  8.42  7.05  9.26  8.77  9.25  9.30 10.63
## [721]  9.90  9.89  9.33  7.78  7.02 11.26  8.89  9.60  7.07  6.01  9.11  8.24
## [733]  8.97  8.59  7.17  7.94  7.27  9.59  7.94  8.52  7.59  9.17  8.08  9.80
## [745]  8.92  9.91  9.42  8.84 10.15  8.37  9.33  9.35  7.40  8.35  9.53  9.59
## [757] 10.05  8.57  8.48  8.43  8.45  8.84 11.18  8.64  8.42  6.34  7.93  8.36
## [769]  8.32  7.77  6.84  8.78  7.19  8.50  8.82  9.04  7.93  7.66 10.07  9.03
## [781]  8.13  7.51  9.08  7.10  7.88  9.40  9.06  8.38 10.65  7.77  8.50  8.61
## [793] 10.05  8.71  9.37  6.97  8.56  9.34  9.47  8.11  8.91  7.83  8.95  7.20
## [805]  9.37  5.84  9.81  9.27  9.50  9.32  8.92  8.38  7.74  8.60  9.49  8.35
## [817]  7.11  9.87  8.98  7.75  8.24  6.74  6.83  7.70  6.70  8.67  9.94  8.73
## [829]  9.63  6.66  8.29  8.47  8.16  8.97  7.51  8.97  8.55  5.84  7.85  8.68
## [841]  8.05  8.27  7.68  9.40  7.77  6.89  7.55  8.27  8.16  8.07  7.91  7.71
## [853] 10.16  8.41  8.88  9.64  7.93  7.78  8.90  8.55  9.15 10.86  9.08  7.44
## [865] 10.35  6.68  8.85  8.90  8.24  6.74 10.75  8.44  7.69  8.88  7.70  8.60
## [877]  8.44  9.50  9.03  7.15  7.95  8.23  9.81  8.48  9.33  8.97  8.08  7.47
## [889]  8.34  7.75  8.34  7.56  6.93 10.03  8.69  9.04  8.32  7.85  7.21  8.98
## [901]  7.09  8.85  9.21  8.61  7.91  7.47  8.65  8.53  9.92  8.09  7.06  8.45
## [913]  8.73  7.45  9.02  7.51  7.32  8.17  9.45  9.72  9.34  8.75  9.32  7.91
## [925]  7.49  6.53  6.18  8.69
boxplot(brightness.sin)

# 2.3 Paquete MASS

library(UsingR)
library(ggplot2)

tipos_variables = c()
data(UScereal)

Tipo de dato

class(UScereal[,1]) #Ver tipo de dato de cada columna 
## [1] "factor"

Asociaciones entre sus variables

Entre manufacturer y shelf:

colores = c("aquamarine2","aquamarine3","cadetblue2","cadetblue3","cadetblue4","cornflowerblue")

barplot(table(UScereal$mfr,UScereal$shelf), beside = T, col = colores, main = "Relación entre manufacturer y shelf",
        xlab = "Estante de exhibición",ylab = "Fabricante (representado por su primera inicial)")
legend("topright",c(levels(UScereal$mfr)),fill = colores)

Entre fat y vitamins:

ggplot(data = UScereal, aes(x = UScereal$fat,y = UScereal$vitamins)) + geom_point()
## Warning: Use of `UScereal$fat` is discouraged. Use `fat` instead.
## Warning: Use of `UScereal$vitamins` is discouraged. Use `vitamins` instead.

### Entre fat y shelf:

plot(UScereal$shelf,UScereal$fat)

Entre carbohydrates y sugars:

cor(UScereal$carbo,UScereal$sugars)
## [1] -0.04082599
plot(UScereal$carbo,UScereal$sugars)

### Entre fibre y manufacturer:

plot(UScereal$mfr,UScereal$fibre)

### Entre sodium y sugars:

cor(UScereal$sodium,UScereal$sugars)
## [1] 0.2112437
ggplot(data = UScereal,aes(UScereal$sodium,UScereal$sugars)) + geom_point()
## Warning: Use of `UScereal$sodium` is discouraged. Use `sodium` instead.
## Warning: Use of `UScereal$sugars` is discouraged. Use `sugars` instead.

# 2.3 Datos mammals

library(PerformanceAnalytics)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
## 
##     legend
data(mammals)

Correlación lineal

pairs(mammals$body ~ mammals$brain)

cor(mammals$body,mammals$brain)
## [1] 0.9341638

Datos mediante plot

plot(mammals$body,mammals$brain)

## Función log

pairs(log(mammals$body)~log(mammals$brain))

cor(log(mammals$body),log(mammals$brain))
## [1] 0.9595748
plot(log(mammals$body),log(mammals$brain))

# 2.5 Datos emissions

library(UsingR)
data(emissions)
emissions
##                   GDP perCapita  CO2
## UnitedStates  8083000     29647 6750
## Japan         3080000     24409 1320
## Germany       1740000     21197 1740
## France        1320000     22381  550
## UnitedKingdom 1242000     21010  675
## Italy         1240000     21856  540
## Russia         692000      4727 2000
## Canada         658000     21221  700
## Spain          642400     16401  370
## Australia      394000     20976  480
## Netherlands    343900     21755  240
## Poland         280700      7270  400
## Belgium        236300     23208  145
## Sweden         176200     19773   75
## Austria        174100     21390   80
## Switzerland    172400     23696   54
## Portugal       149500     15074   75
## Greece         137400     12833  125
## Ukraine        124900      2507  420
## Denmark        122500     22868   75
## Norway         120500     27149   56
## Romania        114200      5136  160
## CzechRepublic  111900     10885  150
## Finland        102100     19793   76
## Hungary         73200      7186   85
## Ireland         59900     16488   63

Relación entre variables

cor(emissions)
##                 GDP perCapita       CO2
## GDP       1.0000000 0.4325303 0.9501753
## perCapita 0.4325303 1.0000000 0.2757962
## CO2       0.9501753 0.2757962 1.0000000
pairs(emissions)

# Modelo de regresión

regresion_lineal <- lm(emissions$CO2 ~ emissions$GDP + emissions$perCapita, data = emissions)
summary(regresion_lineal)
## 
## Call:
## lm(formula = emissions$CO2 ~ emissions$GDP + emissions$perCapita, 
##     data = emissions)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1037.3  -167.4    10.8   153.2  1052.0 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          5.100e+02  2.044e+02   2.495   0.0202 *  
## emissions$GDP        8.406e-04  5.198e-05  16.172 4.68e-14 ***
## emissions$perCapita -3.039e-02  1.155e-02  -2.631   0.0149 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 382.8 on 23 degrees of freedom
## Multiple R-squared:  0.9253, Adjusted R-squared:  0.9188 
## F-statistic: 142.5 on 2 and 23 DF,  p-value: 1.102e-13
plot(emissions$GDP+emissions$perCapita,emissions$CO2)
abline(regresion_lineal,col = "red")
## Warning in abline(regresion_lineal, col = "red"): only using the first two of 3
## regression coefficients

emissions$CO2
##  [1] 6750 1320 1740  550  675  540 2000  700  370  480  240  400  145   75   80
## [16]   54   75  125  420   75   56  160  150   76   85   63
CO2_predict <- predict(regresion_lineal,emissions)
plot(emissions$GDP+emissions$perCapita,CO2_predict)

CO2_predict
##  UnitedStates         Japan       Germany        France UnitedKingdom 
##   6403.720110   2357.274571   1328.457202    939.412260    915.510264 
##         Italy        Russia        Canada         Spain     Australia 
##    888.117914    948.030553    418.174003    551.546727    203.695487 
##   Netherlands        Poland       Belgium        Sweden       Austria 
##    137.905405    524.997147      3.295727     57.168681      6.260516 
##   Switzerland      Portugal        Greece       Ukraine       Denmark 
##    -65.251059    177.533136    235.468677    538.782254    -82.034073 
##        Norway       Romania CzechRepublic       Finland       Hungary 
##   -213.820805    449.888660    273.235200     -5.729292    353.120804 
##       Ireland 
##     59.239930
cor(emissions$CO2,CO2_predict)
## [1] 0.9619321

Modelo nuevo

boxplot(emissions)

boxplot.stats(emissions$CO2)$out
## [1] 6750 1320 1740 2000
out_indices_emisiones <- which(emissions$CO2 %in% c(boxplot.stats(emissions$CO2)$out))
out_indices_emisiones
## [1] 1 2 3 7
CO2.sin.out = emissions$CO2[-c(out_indices_emisiones)]
CO2.sin.out
##  [1] 550 675 540 700 370 480 240 400 145  75  80  54  75 125 420  75  56 160 150
## [20]  76  85  63
GDP.sin.out = emissions$GDP[-c(out_indices_emisiones)]
perCapita.sin.out = emissions$perCapita[-c(out_indices_emisiones)]

Nueva regresión

regresion_lineal1 <- lm(CO2.sin.out ~ GDP.sin.out + perCapita.sin.out, data = emissions)
summary(regresion_lineal1)
## 
## Call:
## lm(formula = CO2.sin.out ~ GDP.sin.out + perCapita.sin.out, data = emissions)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -130.88  -63.84  -32.27   16.79  334.38 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        2.233e+02  7.202e+01   3.101  0.00588 ** 
## GDP.sin.out        4.912e-04  6.989e-05   7.028 1.09e-06 ***
## perCapita.sin.out -8.525e-03  4.114e-03  -2.072  0.05209 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 121.4 on 19 degrees of freedom
## Multiple R-squared:  0.7225, Adjusted R-squared:  0.6933 
## F-statistic: 24.73 on 2 and 19 DF,  p-value: 5.142e-06
plot(GDP.sin.out + perCapita.sin.out,CO2.sin.out)
abline(regresion_lineal1,col = "blue")
## Warning in abline(regresion_lineal1, col = "blue"): only using the first two of
## 3 regression coefficients

CO2.sin.out
##  [1] 550 675 540 700 370 480 240 400 145  75  80  54  75 125 420  75  56 160 150
## [20]  76  85  63
CO2_predict_sin_out <- predict(regresion_lineal1,emissions)
## Warning: 'newdata' had 26 rows but variables found have 22 rows
plot(GDP.sin.out + perCapita.sin.out,CO2_predict_sin_out)

CO2_predict_sin_out
##         1         2         3         4         5         6         7         8 
## 680.88253 654.25862 646.06437 365.61631 399.04321 238.03584 206.78744 299.22605 
##         9        10        11        12        13        14        15        16 
## 141.55107 141.31414 126.49820 106.00521 168.25755 181.41829 263.30493  88.55427 
##        17        18        19        20        21        22 
##  51.07759 235.63791 185.49960 104.74791 198.02424 112.19471
cor(CO2.sin.out,CO2_predict_sin_out)
## [1] 0.8499957

Boxplots de CO2 con outliers y sin outliers respectivamente:

boxplot(emissions$CO2)

boxplot(CO2.sin.out)

# 2.6 MASS anorexia

library(MASS)
data("anorexia")
anorexia[3,1:3]
##   Treat Prewt Postwt
## 3  Cont  91.8   86.4

Tratamiento más efectivo

pos_casos_exito <- which(anorexia$Postwt>anorexia$Prewt) #Arroja las posiciones en los casos donde se subió de peso
casos_exito = anorexia[c(pos_casos_exito),]
mejor_tratamiento = which.max(table(casos_exito$Treat))
mejor_tratamiento = c(paste(names(mejor_tratamiento),": ",max(table(casos_exito$Treat))," casos de éxito"))
table(casos_exito$Treat)
## 
##  CBT Cont   FT 
##   18   11   13
mejor_tratamiento
## [1] "CBT :  18  casos de éxito"

Pacientes que ganaron y perdieron peso

pos_casos_fracaso <- which(anorexia$Postwt<anorexia$Prewt) 
casos_fracaso = anorexia[c(pos_casos_fracaso),]

ganaron_peso = length(pos_casos_exito);ganaron_peso 
## [1] 42
perdieron_peso = nrow(casos_fracaso) 

Gráfico

barplot(c(ganaron_peso,perdieron_peso), main = "Pacientes que ganaron vs los que perdieron peso",
        ylab = "Cantidad de pacientes", col = c("purple","black"))
legend("topright", legend = c(paste("Ganaron peso: ",ganaron_peso),paste("Perdieron peso: ",perdieron_peso)),
       fill = c("purple","black"))

# 2.7 MASS melanoma

library(MASS)
data("Melanoma")
Melanoma
##     time status sex age year thickness ulcer
## 1     10      3   1  76 1972      6.76     1
## 2     30      3   1  56 1968      0.65     0
## 3     35      2   1  41 1977      1.34     0
## 4     99      3   0  71 1968      2.90     0
## 5    185      1   1  52 1965     12.08     1
## 6    204      1   1  28 1971      4.84     1
## 7    210      1   1  77 1972      5.16     1
## 8    232      3   0  60 1974      3.22     1
## 9    232      1   1  49 1968     12.88     1
## 10   279      1   0  68 1971      7.41     1
## 11   295      1   0  53 1969      4.19     1
## 12   355      3   0  64 1972      0.16     1
## 13   386      1   0  68 1965      3.87     1
## 14   426      1   1  63 1970      4.84     1
## 15   469      1   0  14 1969      2.42     1
## 16   493      3   1  72 1971     12.56     1
## 17   529      1   1  46 1971      5.80     1
## 18   621      1   1  72 1972      7.06     1
## 19   629      1   1  95 1968      5.48     1
## 20   659      1   1  54 1972      7.73     1
## 21   667      1   0  89 1968     13.85     1
## 22   718      1   1  25 1967      2.34     1
## 23   752      1   1  37 1973      4.19     1
## 24   779      1   1  43 1967      4.04     1
## 25   793      1   1  68 1970      4.84     1
## 26   817      1   0  67 1966      0.32     0
## 27   826      3   0  86 1965      8.54     1
## 28   833      1   0  56 1971      2.58     1
## 29   858      1   0  16 1967      3.56     0
## 30   869      1   0  42 1965      3.54     0
## 31   872      1   0  65 1968      0.97     0
## 32   967      1   1  52 1970      4.83     1
## 33   977      1   1  58 1967      1.62     1
## 34   982      1   0  60 1970      6.44     1
## 35  1041      1   1  68 1967     14.66     0
## 36  1055      1   0  75 1967      2.58     1
## 37  1062      1   1  19 1966      3.87     1
## 38  1075      1   1  66 1971      3.54     1
## 39  1156      1   0  56 1970      1.34     1
## 40  1228      1   1  46 1973      2.24     1
## 41  1252      1   0  58 1971      3.87     1
## 42  1271      1   0  74 1971      3.54     1
## 43  1312      1   0  65 1970     17.42     1
## 44  1427      3   1  64 1972      1.29     0
## 45  1435      1   1  27 1969      3.22     0
## 46  1499      2   1  73 1973      1.29     0
## 47  1506      1   1  56 1970      4.51     1
## 48  1508      2   1  63 1973      8.38     1
## 49  1510      2   0  69 1973      1.94     0
## 50  1512      2   0  77 1973      0.16     0
## 51  1516      1   1  80 1968      2.58     1
## 52  1525      3   0  76 1970      1.29     1
## 53  1542      2   0  65 1973      0.16     0
## 54  1548      1   0  61 1972      1.62     0
## 55  1557      2   0  26 1973      1.29     0
## 56  1560      1   0  57 1973      2.10     0
## 57  1563      2   0  45 1973      0.32     0
## 58  1584      1   1  31 1970      0.81     0
## 59  1605      2   0  36 1973      1.13     0
## 60  1621      1   0  46 1972      5.16     1
## 61  1627      2   0  43 1973      1.62     0
## 62  1634      2   0  68 1973      1.37     0
## 63  1641      2   1  57 1973      0.24     0
## 64  1641      2   0  57 1973      0.81     0
## 65  1648      2   0  55 1973      1.29     0
## 66  1652      2   0  58 1973      1.29     0
## 67  1654      2   1  20 1973      0.97     0
## 68  1654      2   0  67 1973      1.13     0
## 69  1667      1   0  44 1971      5.80     1
## 70  1678      2   0  59 1973      1.29     0
## 71  1685      2   0  32 1973      0.48     0
## 72  1690      1   1  83 1971      1.62     0
## 73  1710      2   0  55 1973      2.26     0
## 74  1710      2   1  15 1973      0.58     0
## 75  1726      1   0  58 1970      0.97     1
## 76  1745      2   0  47 1973      2.58     1
## 77  1762      2   0  54 1973      0.81     0
## 78  1779      2   1  55 1973      3.54     1
## 79  1787      2   1  38 1973      0.97     0
## 80  1787      2   0  41 1973      1.78     1
## 81  1793      2   0  56 1973      1.94     0
## 82  1804      2   0  48 1973      1.29     0
## 83  1812      2   1  44 1973      3.22     1
## 84  1836      2   0  70 1972      1.53     0
## 85  1839      2   0  40 1972      1.29     0
## 86  1839      2   1  53 1972      1.62     1
## 87  1854      2   0  65 1972      1.62     1
## 88  1856      2   1  54 1972      0.32     0
## 89  1860      3   1  71 1969      4.84     1
## 90  1864      2   0  49 1972      1.29     0
## 91  1899      2   0  55 1972      0.97     0
## 92  1914      2   0  69 1972      3.06     0
## 93  1919      2   1  83 1972      3.54     0
## 94  1920      2   1  60 1972      1.62     1
## 95  1927      2   1  40 1972      2.58     1
## 96  1933      1   0  77 1972      1.94     0
## 97  1942      2   0  35 1972      0.81     0
## 98  1955      2   0  46 1972      7.73     1
## 99  1956      2   0  34 1972      0.97     0
## 100 1958      2   0  69 1972     12.88     0
## 101 1963      2   0  60 1972      2.58     0
## 102 1970      2   1  84 1972      4.09     1
## 103 2005      2   0  66 1972      0.64     0
## 104 2007      2   1  56 1972      0.97     0
## 105 2011      2   0  75 1972      3.22     1
## 106 2024      2   0  36 1972      1.62     0
## 107 2028      2   1  52 1972      3.87     1
## 108 2038      2   0  58 1972      0.32     1
## 109 2056      2   0  39 1972      0.32     0
## 110 2059      2   1  68 1972      3.22     1
## 111 2061      1   1  71 1968      2.26     0
## 112 2062      1   0  52 1965      3.06     0
## 113 2075      2   1  55 1972      2.58     1
## 114 2085      3   0  66 1970      0.65     0
## 115 2102      2   1  35 1972      1.13     0
## 116 2103      1   1  44 1966      0.81     0
## 117 2104      2   0  72 1972      0.97     0
## 118 2108      1   0  58 1969      1.76     1
## 119 2112      2   0  54 1972      1.94     1
## 120 2150      2   0  33 1972      0.65     0
## 121 2156      2   0  45 1972      0.97     0
## 122 2165      2   1  62 1972      5.64     0
## 123 2209      2   0  72 1971      9.66     0
## 124 2227      2   0  51 1971      0.10     0
## 125 2227      2   1  77 1971      5.48     1
## 126 2256      1   0  43 1971      2.26     1
## 127 2264      2   0  65 1971      4.83     1
## 128 2339      2   0  63 1971      0.97     0
## 129 2361      2   1  60 1971      0.97     0
## 130 2387      2   0  50 1971      5.16     1
## 131 2388      1   1  40 1966      0.81     0
## 132 2403      2   0  67 1971      2.90     1
## 133 2426      2   0  69 1971      3.87     0
## 134 2426      2   0  74 1971      1.94     1
## 135 2431      2   0  49 1971      0.16     0
## 136 2460      2   0  47 1971      0.64     0
## 137 2467      1   0  42 1965      2.26     1
## 138 2492      2   0  54 1971      1.45     0
## 139 2493      2   1  72 1971      4.82     1
## 140 2521      2   0  45 1971      1.29     1
## 141 2542      2   1  67 1971      7.89     1
## 142 2559      2   0  48 1970      0.81     1
## 143 2565      1   1  34 1970      3.54     1
## 144 2570      2   0  44 1970      1.29     0
## 145 2660      2   0  31 1970      0.64     0
## 146 2666      2   0  42 1970      3.22     1
## 147 2676      2   0  24 1970      1.45     1
## 148 2738      2   0  58 1970      0.48     0
## 149 2782      1   1  78 1969      1.94     0
## 150 2787      2   1  62 1970      0.16     0
## 151 2984      2   1  70 1969      0.16     0
## 152 3032      2   0  35 1969      1.29     0
## 153 3040      2   0  61 1969      1.94     0
## 154 3042      1   0  54 1967      3.54     1
## 155 3067      2   0  29 1969      0.81     0
## 156 3079      2   1  64 1969      0.65     0
## 157 3101      2   1  47 1969      7.09     0
## 158 3144      2   1  62 1969      0.16     0
## 159 3152      2   0  32 1969      1.62     0
## 160 3154      3   1  49 1969      1.62     0
## 161 3180      2   0  25 1969      1.29     0
## 162 3182      3   1  49 1966      6.12     0
## 163 3185      2   0  64 1969      0.48     0
## 164 3199      2   0  36 1969      0.64     0
## 165 3228      2   0  58 1969      3.22     1
## 166 3229      2   0  37 1969      1.94     0
## 167 3278      2   1  54 1969      2.58     0
## 168 3297      2   0  61 1968      2.58     1
## 169 3328      2   1  31 1968      0.81     0
## 170 3330      2   1  61 1968      0.81     1
## 171 3338      1   0  60 1967      3.22     1
## 172 3383      2   0  43 1968      0.32     0
## 173 3384      2   0  68 1968      3.22     1
## 174 3385      2   0   4 1968      2.74     0
## 175 3388      2   1  60 1968      4.84     1
## 176 3402      2   1  50 1968      1.62     0
## 177 3441      2   0  20 1968      0.65     0
## 178 3458      3   0  54 1967      1.45     0
## 179 3459      2   0  29 1968      0.65     0
## 180 3459      2   1  56 1968      1.29     1
## 181 3476      2   0  60 1968      1.62     0
## 182 3523      2   0  46 1968      3.54     0
## 183 3667      2   0  42 1967      3.22     0
## 184 3695      2   0  34 1967      0.65     0
## 185 3695      2   0  56 1967      1.03     0
## 186 3776      2   1  12 1967      7.09     1
## 187 3776      2   0  21 1967      1.29     1
## 188 3830      2   1  46 1967      0.65     0
## 189 3856      2   0  49 1967      1.78     0
## 190 3872      2   0  35 1967     12.24     1
## 191 3909      2   1  42 1967      8.06     1
## 192 3968      2   0  47 1967      0.81     0
## 193 4001      2   0  69 1967      2.10     0
## 194 4103      2   0  52 1966      3.87     0
## 195 4119      2   1  52 1966      0.65     0
## 196 4124      2   0  30 1966      1.94     1
## 197 4207      2   1  22 1966      0.65     0
## 198 4310      2   1  55 1966      2.10     0
## 199 4390      2   0  26 1965      1.94     1
## 200 4479      2   0  19 1965      1.13     1
## 201 4492      2   1  29 1965      7.06     1
## 202 4668      2   0  40 1965      6.12     0
## 203 4688      2   0  42 1965      0.48     0
## 204 4926      2   0  50 1964      2.26     0
## 205 5565      2   0  41 1962      2.90     0

Número de fallecidos por melanoma y otras causas

num_fallecidos = nrow(Melanoma[Melanoma$status==1,]) + nrow(Melanoma[Melanoma$status==3,])
num_fallecidos
## [1] 71
colores_status = c("yellow","purple","black")
pie(table(Melanoma$status),labels = c(nrow(Melanoma[Melanoma$status==1,]),nrow(Melanoma[Melanoma$status==2,]),
                                      nrow(Melanoma[Melanoma$status==3,])),main = "Presencia y ausencia de Melanoma en una base de datos de 205 personas", col = colores_status)
legend("topright",c(paste(1,": Melanoma"),paste(2,": Vivo"),paste(3,": Otras causas")),fill = colores_status)

## Relación entre tamaño de tumor y muerte

Melanoma1 = Melanoma
s1 <- which((Melanoma1$status==1))
s2 <- which((Melanoma1$status==2)) #Posiciones del estado de las personas vivas
s3 <- which((Melanoma1$status==3)) #Posiciones del estado de las personas muertas por otras causas

Melanoma1$status = replace(Melanoma1$status,s3,1) 
Melanoma1$status = replace(Melanoma1$status,s2,0) 

c(paste("Correlación entre el tamaño del tumor y estado del paciente:"),round(cor(Melanoma1$thickness,Melanoma1$status),3))
## [1] "Correlación entre el tamaño del tumor y estado del paciente:"
## [2] "0.314"

Gráfica

rem = replace(Melanoma1$status,Melanoma1$status==0,"Vivos")
rem = replace(rem,Melanoma1$status==1,"Muertos")
boxplot(Melanoma1$thickness~rem, main = "Relación entre tamaño de tumor y muerte",
        ylab = "Tamaño del tumor (mm)",xlab = "Estado del paciente")

# 2.8 UsingR Babyboom

library(UsingR)
data(babyboom)

Número de niños y niñas

table(babyboom$gender)  
## 
## girl  boy 
##   18   26

Niños nacieron por debajo de 3000gr

print("Niños por debajo de 3000gr: ") 
## [1] "Niños por debajo de 3000gr: "
babyboom[babyboom$gender=='boy' & babyboom$wt<3000,]
##    clock.time gender   wt running.time
## 8         422    boy 2846          262
## 18       1133    boy 2902          693
## 19       1209    boy 2635          729
## 40       2104    boy 2121         1264

Relación entre peso por debajo de 3000gr y sexo

table(babyboom$gender[babyboom$wt<3000]) 
## 
## girl  boy 
##    5    4
relacion_sexo_peso = data.frame(babyboom$gender[babyboom$wt<3000],babyboom$wt[babyboom$wt<3000]);
colnames(relacion_sexo_peso ) = c("gender","wt");relacion_sexo_peso 
##   gender   wt
## 1   girl 2208
## 2   girl 1745
## 3    boy 2846
## 4   girl 2576
## 5    boy 2902
## 6    boy 2635
## 7   girl 2184
## 8   girl 2383
## 9    boy 2121

Promedio de pesos total, de niños y de niñas de forma adecuada

prom_ninos = median(babyboom$wt[babyboom$gender=='boy']);prom_ninos
## [1] 3404
prom_ninas = median(babyboom$wt[babyboom$gender=='girl']);prom_ninas
## [1] 3381

Gráfica:

boxplot(babyboom$wt,ylab = "Peso (gr)",main = "Promedio de pesos total (niños y niñas)")
points(prom_ninos, col = "blue", pch = 19)
points(prom_ninas, col = "pink", pch = 19)
legend(x = "topleft", legend = c(paste("Niños: ",prom_ninos),paste("Niñas: ",prom_ninas)), fill = c("blue", "pink"),title = "Promedio: ")

# 2.9 MASS Aids2

library(MASS)
data(Aids2)

Número de contagios por estado

table(Aids2$state,Aids2$T.categ)
##        
##           hs hsid   id  het haem blood mother other
##   NSW   1539   50   28   18   30    70      3    42
##   Other  204    4   12    8    6     5      2     8
##   QLD    186    7    4    5    4    15      1     4
##   VIC    536   11    4   10    6     4      1    16

Número de fallecidos

table(Aids2$status)
## 
##    A    D 
## 1082 1761
fallecidos = Aids2[Aids2$status=='D',]
print(paste("Cantidad de fallecidos: ",length(fallecidos$status)))
## [1] "Cantidad de fallecidos:  1761"

Relación entre sexo y tipo de transmisión

table(Aids2$sex,Aids2$T.categ)
##    
##       hs hsid   id  het haem blood mother other
##   F    1    0   20   20    0    37      4     7
##   M 2464   72   28   21   46    57      3    63

Gráfica sexo y tipo de trasmisión de ambos sexos:

barplot(table(Aids2$sex,Aids2$T.categ),beside = T,col = c("pink","blue")) 
legend("topright",levels(Aids2$sex),fill = c("pink","blue"))

## Relación entre F (mujeres) y tipos de transmisión:

relacion_sexo_trans_F = Aids2[Aids2$sex=='F',]
barplot(table(relacion_sexo_trans_F$sex=='F',relacion_sexo_trans_F$T.categ),
        main = "Relación entre F (mujeres) y tipos de transmisión",ylab = "Cantidad de pacientes")

## Relación entre M (hombres) y tipos de transmisión:

relacion_sexo_trans_M = Aids2[Aids2$sex=='M',]
barplot(table(relacion_sexo_trans_M$sex=='M',relacion_sexo_trans_M$T.categ),
        main = "Relación entre M (hombres) y tipos de transmisión",ylab = "Cantidad de pacientes")        

## Gráfica

colores_aids = c("aquamarine","blue","blueviolet","brown","darkgreen","darkolivegreen1","darkorange","gold")
pie(table(Aids2$T.categ),col = colores_aids,labels = table(Aids2$T.categ),
    main = "Cantidad y tipos de transmisión")
legend("topright",legend = levels(Aids2$T.categ),fill = colores_aids)   

table(Aids2$T.categ)
## 
##     hs   hsid     id    het   haem  blood mother  other 
##   2465     72     48     41     46     94      7     70

2.10

data(crime)
dat5 = crime
crime
##                 y1983  y1993
## Alabama         416.0  871.7
## Alaska          613.8  660.5
## Arizona         494.2  670.8
## Arkansas        297.7  576.5
## California      772.6 1119.7
## Colorado        476.4  578.8
## Connecticut     375.0  495.3
## Delaware        453.1  621.2
## DC             1985.4 2832.8
## Florida         826.7 1207.2
## Georgia         456.7  733.2
## Hawaii          252.1  258.4
## Idaho           238.7  281.4
## Illinois        553.0  977.3
## Indiana         283.8  508.3
## Iowa            181.1  278.0
## Kansas          326.6  510.8
## Kentucky        322.2  535.5
## Louisiana       640.9  984.6
## Maine           159.6  130.9
## Maryland        807.1 1000.1
## Massachusetts   576.8  779.0
## Michigan        716.7  770.1
## Minnesota       190.9  338.0
## Mississippi     280.4  411.7
## Missour         477.2  740.4
## Montana         212.6  169.9
## Nebraska        217.7  348.6
## Nevada          655.2  696.8
## New Hampshire   125.1  125.7
## New Jersey      553.1  625.8
## New Mexico      686.8  934.9
## New York        914.1 1122.1
## North Carolina  409.6  681.0
## North Dakota     53.7   83.3
## Ohio            397.9  525.9
## Oklahoma        423.4  622.8
## Oregon          487.8  510.2
## Pennsylvania    342.8  427.0
## Rhode Island    355.2  394.5
## South Carolina  616.8  944.5
## South Dakota    120.0  194.5
## Tennessee       402.0  746.2
## Texas           512.2  806.3
## Utah            256.0  290.5
## Vermont         132.6  109.5
## Virginia        292.5  374.9
## Washington      371.8  534.5
## West Virginia   171.8  211.5
## Wisconsin       190.9  275.7
## Wyoming         237.2  319.5
prom_1983 = mean(dat5$y1983)
prom_1993 = mean(dat5$y1993)

color = c("purple","orange")
barplot(c(prom_1983, prom_1993), main = "2.10 Apartado A",
        ylab = "Cantidad ", xlab = "Tasa total", col = color, 
        names.arg = c("1983", "1993"))

## Tasa total

max_1983 = dat5[dat5$y1983 == max(dat5$y1983),]

max_1993 = dat5[dat5$y1993 == max(dat5$y1993),]
max_1983[1]
##     y1983
## DC 1985.4

Estado que presenta más crímenes

max_1993[2] 
##     y1993
## DC 2832.8

En ambos años

sum_year = (dat5$y1983 + dat5$y1993)
max_total = max(sum_year)
index = which(sum_year == max_total)
dat5[index,] 
##     y1983  y1993
## DC 1985.4 2832.8