CTIC - UNIVERSIDAD NACIONAL DE INGENIERIA

Programación en R para DataScience

Perfil Linkelind

Google Drive

Presentación del Codigo - Pregunta 1
> #Importamos los achivos bank y bank-names
> #Creando Cabeceras
nombres <-c("age","job","marital","education","default","balance","housing","loan","contac","day","month","duration","campaign","pdays","previous","poutcome","Sub")
> #Importando
bdbank <- read.csv(file="bank.csv",header = FALSE,sep=";",col.names = nombres)
> #Datos - Observamos que Apersar de indicar expresamente que NO importar las cabeceras, estas se importaron
head(bdbank)
  age         job marital education default balance housing loan   contac day month duration campaign pdays previous poutcome Sub
1 age         job marital education default balance housing loan  contact day month duration campaign pdays previous poutcome   y
2  30  unemployed married   primary      no    1787      no   no cellular  19   oct       79        1    -1        0  unknown  no
3  33    services married secondary      no    4789     yes  yes cellular  11   may      220        1   339        4  failure  no
4  35  management  single  tertiary      no    1350     yes   no cellular  16   apr      185        1   330        1  failure  no
5  30  management married  tertiary      no    1476     yes  yes  unknown   3   jun      199        4    -1        0  unknown  no
6  59 blue-collar married secondary      no       0     yes   no  unknown   5   may      226        1    -1        0  unknown  no
> #Covertimos a DataFrame
> df_bank = as.data.frame(bdbank)
> head(df_bank)
> #Eliminamos la Primera Fila
> df_bank<-df_bank[!(df_bank$age=="age"),]
> head(df_bank)
  age         job marital education default balance housing loan   contac day month duration campaign pdays previous poutcome Sub
2  30  unemployed married   primary      no    1787      no   no cellular  19   oct       79        1    -1        0  unknown  no
3  33    services married secondary      no    4789     yes  yes cellular  11   may      220        1   339        4  failure  no
4  35  management  single  tertiary      no    1350     yes   no cellular  16   apr      185        1   330        1  failure  no
5  30  management married  tertiary      no    1476     yes  yes  unknown   3   jun      199        4    -1        0  unknown  no
6  59 blue-collar married secondary      no       0     yes   no  unknown   5   may      226        1    -1        0  unknown  no
7  35  management  single  tertiary      no     747      no   no cellular  23   feb      141        2   176        3  failure  no
> #Realizar un Analisis Exploratorio
> summary(df_bank)
     age                job              marital           education           default            balance            housing         
 Length:4521        Length:4521        Length:4521        Length:4521        Length:4521        Length:4521        Length:4521       
 Class :character   Class :character   Class :character   Class :character   Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character   Mode  :character   Mode  :character   Mode  :character   Mode  :character  
     loan              contac              day               month             duration           campaign            pdays          
 Length:4521        Length:4521        Length:4521        Length:4521        Length:4521        Length:4521        Length:4521       
 Class :character   Class :character   Class :character   Class :character   Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character   Mode  :character   Mode  :character   Mode  :character   Mode  :character  
   previous           poutcome             Sub           
 Length:4521        Length:4521        Length:4521       
 Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character  
> lapply(df_bank,class)
$age
[1] "character"

$job
[1] "character"

$marital
[1] "character"

$education
[1] "character"

$default
[1] "character"

$balance
[1] "character"

$housing
[1] "character"

$loan
[1] "character"

$contac
[1] "character"

$day
[1] "character"

$month
[1] "character"

$duration
[1] "character"

$campaign
[1] "character"

$pdays
[1] "character"

$previous
[1] "character"

$poutcome
[1] "character"

$Sub
[1] "character"
Presentación del Codigo - Pregunta 2
> #Creando Cabeceras
nombres <-c("buying","maint","doors","persons","lug_boot","safety","ND")

> #Importando
bdcar <- read.table(file="car.data",header = FALSE,sep=",",col.names = nombres)

> #Datos
> head(bdcar)
  buying maint doors persons lug_boot safety    ND
1  vhigh vhigh     2       2    small    low unacc
2  vhigh vhigh     2       2    small    med unacc
3  vhigh vhigh     2       2    small   high unacc
4  vhigh vhigh     2       2      med    low unacc
5  vhigh vhigh     2       2      med    med unacc
6  vhigh vhigh     2       2      med   high unacc

> #Creando DataFrame
> df_auto<-as.data.frame(bdcar)
> head(df_auto)
  buying maint doors persons lug_boot safety    ND
1  vhigh vhigh     2       2    small    low unacc
2  vhigh vhigh     2       2    small    med unacc
3  vhigh vhigh     2       2    small   high unacc
4  vhigh vhigh     2       2      med    low unacc
5  vhigh vhigh     2       2      med    med unacc
6  vhigh vhigh     2       2      med   high unacc

> #Extratendo un SubDataFrame
> sub_df_auto<-subset.data.frame(x=df_auto,df_auto$safety=="high" & df_auto$lug_boot=="big")
> head(sub_df_auto)
   buying maint doors persons lug_boot safety    ND
9   vhigh vhigh     2       2      big   high unacc
18  vhigh vhigh     2       4      big   high unacc
27  vhigh vhigh     2    more      big   high unacc
36  vhigh vhigh     3       2      big   high unacc
45  vhigh vhigh     3       4      big   high unacc
54  vhigh vhigh     3    more      big   high unacc
Presentación del Codigo - Pregunta 3
> #Importando
bdwine <- read.csv(file="winequality-red.csv",header = TRUE,sep=";")

> #Datos
> head(bdwine)
  fixed.acidity volatile.acidity citric.acid residual.sugar chlorides free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol quality
1           7.4             0.70        0.00            1.9     0.076                  11                   34  0.9978 3.51      0.56     9.4       5
2           7.8             0.88        0.00            2.6     0.098                  25                   67  0.9968 3.20      0.68     9.8       5
3           7.8             0.76        0.04            2.3     0.092                  15                   54  0.9970 3.26      0.65     9.8       5
4          11.2             0.28        0.56            1.9     0.075                  17                   60  0.9980 3.16      0.58     9.8       6
5           7.4             0.70        0.00            1.9     0.076                  11                   34  0.9978 3.51      0.56     9.4       5
6           7.4             0.66        0.00            1.8     0.075                  13                   40  0.9978 3.51      0.56     9.4       5

> #Convirtiendo en DataFrame
> df_vinorojo<-as.data.frame(bdwine)
> head(df_vinorojo)
  fixed.acidity volatile.acidity citric.acid residual.sugar chlorides free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol quality
1           7.4             0.70        0.00            1.9     0.076                  11                   34  0.9978 3.51      0.56     9.4       5
2           7.8             0.88        0.00            2.6     0.098                  25                   67  0.9968 3.20      0.68     9.8       5
3           7.8             0.76        0.04            2.3     0.092                  15                   54  0.9970 3.26      0.65     9.8       5
4          11.2             0.28        0.56            1.9     0.075                  17                   60  0.9980 3.16      0.58     9.8       6
5           7.4             0.70        0.00            1.9     0.076                  11                   34  0.9978 3.51      0.56     9.4       5
6           7.4             0.66        0.00            1.8     0.075                  13                   40  0.9978 3.51      0.56     9.4       5

> #Capturando un SubDataFrame
> df_vinorojo_filtro<-subset.data.frame(x=df_vinorojo,df_vinorojo$density>0.9975 & df_vinorojo$quality>5)
> head(df_vinorojo_filtro)
    fixed.acidity volatile.acidity citric.acid residual.sugar chlorides free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol quality
4            11.2            0.280        0.56            1.9     0.075                  17                   60  0.9980 3.16      0.58     9.8       6
34            6.9            0.605        0.12           10.7     0.073                  40                   83  0.9993 3.45      0.52     9.4       6
36            7.8            0.645        0.00            5.5     0.086                   5                   18  0.9986 3.40      0.55     9.6       6
109           8.0            0.330        0.53            2.5     0.091                  18                   80  0.9976 3.37      0.80     9.6       6
114          10.1            0.310        0.44            2.3     0.080                  22                   46  0.9988 3.32      0.67     9.7       6
117           8.3            0.540        0.28            1.9     0.077                  11                   40  0.9978 3.39      0.61    10.0       6
Presentación del Codigo - Pregunta 4
> #Importando
> bdwine <- read.csv(file="winequality-white.csv",header = TRUE,sep=";")

> #Datos
> head(bdwine)
 fixed.acidity volatile.acidity citric.acid residual.sugar chlorides free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol quality
1           7.0             0.27        0.36           20.7     0.045                  45                  170  1.0010 3.00      0.45     8.8       6
2           6.3             0.30        0.34            1.6     0.049                  14                  132  0.9940 3.30      0.49     9.5       6
3           8.1             0.28        0.40            6.9     0.050                  30                   97  0.9951 3.26      0.44    10.1       6
4           7.2             0.23        0.32            8.5     0.058                  47                  186  0.9956 3.19      0.40     9.9       6
5           7.2             0.23        0.32            8.5     0.058                  47                  186  0.9956 3.19      0.40     9.9       6
6           8.1             0.28        0.40            6.9     0.050                  30                   97  0.9951 3.26      0.44    10.1       6

> #Convirtiendo en DataFrame
> df_vinoblanco<-as.data.frame(bdwine)
> head(df_vinoblanco)
  fixed.acidity volatile.acidity citric.acid residual.sugar chlorides free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol quality
1           7.0             0.27        0.36           20.7     0.045                  45                  170  1.0010 3.00      0.45     8.8       6
2           6.3             0.30        0.34            1.6     0.049                  14                  132  0.9940 3.30      0.49     9.5       6
3           8.1             0.28        0.40            6.9     0.050                  30                   97  0.9951 3.26      0.44    10.1       6
4           7.2             0.23        0.32            8.5     0.058                  47                  186  0.9956 3.19      0.40     9.9       6
5           7.2             0.23        0.32            8.5     0.058                  47                  186  0.9956 3.19      0.40     9.9       6
6           8.1             0.28        0.40            6.9     0.050                  30                   97  0.9951 3.26      0.44    10.1       6

> #Capturando un SubDataFrame
> df_vinoblanco_filtro<-subset.data.frame(x=df_vinoblanco,df_vinoblanco$density>0.9975 & df_vinoblanco$quality>5)
> head(df_vinoblanco_filtro)
    fixed.acidity volatile.acidity citric.acid residual.sugar chlorides free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol quality
1             7.0             0.27        0.36           20.7     0.045                  45                  170  1.0010 3.00      0.45     8.8       6
8             7.0             0.27        0.36           20.7     0.045                  45                  170  1.0010 3.00      0.45     8.8       6
82            7.1             0.20        0.34           16.0     0.050                  51                  166  0.9985 3.21      0.60     9.2       6
97            6.0             0.34        0.66           15.9     0.046                  26                  164  0.9979 3.14      0.50     8.8       6
100           6.0             0.34        0.66           15.9     0.046                  26                  164  0.9979 3.14      0.50     8.8       6
108           7.1             0.23        0.35           16.5     0.040                  60                  171  0.9990 3.16      0.59     9.1       6
Presentación del Codigo - Pregunta 5
> #Recordemos que para ambos vinos se aplico un filtro de estandares de calidad, donde se planeteo que la densidad del vino fuese mayor a 0.9975 y que su calidad mayor 5

> #Cantidad de Registros que Cumplen con las Restricciones
> dim(df_vinorojo_filtro)
[1] 234  12
> dim(df_vinoblanco_filtro)
[1] 396  12

> #Histogramas respecto a la densidad de los Vinos
> hist(x=df_vinorojo_filtro$density, main = "Histograma de Densidad Vino Rojo", xlab = "Densidad", ylab = "Frecuencia", col = "red")
> hist(x=df_vinorojo_filtro$density, main = "Histograma de Densidad Vino Blanco", xlab = "Densidad", ylab = "Frecuencia", col = "ivory")

> #Diagrama de Dispersión
> plot(x=df_vinorojo_filtro$fixed.acidity,y=df_vinorojo_filtro$volatile.acidity, main = "Diagrama de Dispersion del Vino Rojo", xlab = "fixed acidity", ylab = "volatile acidity")
plot(x=df_vinoblanco_filtro$fixed.acidity,y=df_vinoblanco_filtro$volatile.acidity, main = "Diagrama de Dispersion del Vino Blanco", xlab = "fixed acidity", ylab = "volatile acidity")
