Los datos
# Cargar datos de salarios
# salarios <- read.csv("Va la ruta en donde estan los datos")
#salarios <- read.csv("https://raw.githubusercontent.com/rpizarrog/Curso-Titulacion-Data-Science-/master/2019/Datos/Salaries.csv")
salarios <- read_csv("C:/Users/tthan/Documents/CIENCIA DE LOS DATOS/Datos/Salaries.csv")
## Parsed with column specification:
## cols(
## Id = col_double(),
## EmployeeName = col_character(),
## JobTitle = col_character(),
## BasePay = col_double(),
## OvertimePay = col_double(),
## OtherPay = col_double(),
## Benefits = col_logical(),
## TotalPay = col_double(),
## TotalPayBenefits = col_double(),
## Year = col_double(),
## Notes = col_logical(),
## Agency = col_character(),
## Status = col_logical()
## )
## Warning: 150614 parsing failures.
## row col expected actual file
## 36160 Benefits 1/0/T/F/TRUE/FALSE 44430.12 'C:/Users/tthan/Documents/CIENCIA DE LOS DATOS/Datos/Salaries.csv'
## 36161 Benefits 1/0/T/F/TRUE/FALSE 69810.19 'C:/Users/tthan/Documents/CIENCIA DE LOS DATOS/Datos/Salaries.csv'
## 36162 Benefits 1/0/T/F/TRUE/FALSE 53102.29 'C:/Users/tthan/Documents/CIENCIA DE LOS DATOS/Datos/Salaries.csv'
## 36163 Benefits 1/0/T/F/TRUE/FALSE 72047.88 'C:/Users/tthan/Documents/CIENCIA DE LOS DATOS/Datos/Salaries.csv'
## 36164 Benefits 1/0/T/F/TRUE/FALSE 44438.25 'C:/Users/tthan/Documents/CIENCIA DE LOS DATOS/Datos/Salaries.csv'
## ..... ........ .................. ........ ..................................................................
## See problems(...) for more details.
#C:\Users\tthan\Documents\CIENCIA DE LOS DATOS\Datos
#View(Salarios)
# salarios # Ya no los queremos ver
head (salarios)
## # A tibble: 6 x 13
## Id EmployeeName JobTitle BasePay OvertimePay OtherPay Benefits
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <lgl>
## 1 1 NATHANIEL F~ GENERAL~ 167411. 0 400184. NA
## 2 2 GARY JIMENEZ CAPTAIN~ 155966. 245132. 137811. NA
## 3 3 ALBERT PARD~ CAPTAIN~ 212739. 106088. 16453. NA
## 4 4 CHRISTOPHER~ WIRE RO~ 77916 56121. 198307. NA
## 5 5 PATRICK GAR~ DEPUTY ~ 134402. 9737 182235. NA
## 6 6 DAVID SULLI~ ASSISTA~ 118602 8601 189083. NA
## # ... with 6 more variables: TotalPay <dbl>, TotalPayBenefits <dbl>,
## # Year <dbl>, Notes <lgl>, Agency <chr>, Status <lgl>
str(salarios)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 148654 obs. of 13 variables:
## $ Id : num 1 2 3 4 5 6 7 8 9 10 ...
## $ EmployeeName : chr "NATHANIEL FORD" "GARY JIMENEZ" "ALBERT PARDINI" "CHRISTOPHER CHONG" ...
## $ JobTitle : chr "GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY" "CAPTAIN III (POLICE DEPARTMENT)" "CAPTAIN III (POLICE DEPARTMENT)" "WIRE ROPE CABLE MAINTENANCE MECHANIC" ...
## $ BasePay : num 167411 155966 212739 77916 134402 ...
## $ OvertimePay : num 0 245132 106088 56121 9737 ...
## $ OtherPay : num 400184 137811 16453 198307 182235 ...
## $ Benefits : logi NA NA NA NA NA NA ...
## $ TotalPay : num 567595 538909 335280 332344 326373 ...
## $ TotalPayBenefits: num 567595 538909 335280 332344 326373 ...
## $ Year : num 2011 2011 2011 2011 2011 ...
## $ Notes : logi NA NA NA NA NA NA ...
## $ Agency : chr "San Francisco" "San Francisco" "San Francisco" "San Francisco" ...
## $ Status : logi NA NA NA NA NA NA ...
## - attr(*, "problems")=Classes 'tbl_df', 'tbl' and 'data.frame': 150614 obs. of 5 variables:
## ..$ row : int 36160 36161 36162 36163 36164 36165 36166 36167 36168 36169 ...
## ..$ col : chr "Benefits" "Benefits" "Benefits" "Benefits" ...
## ..$ expected: chr "1/0/T/F/TRUE/FALSE" "1/0/T/F/TRUE/FALSE" "1/0/T/F/TRUE/FALSE" "1/0/T/F/TRUE/FALSE" ...
## ..$ actual : chr "44430.12" "69810.19" "53102.29" "72047.88" ...
## ..$ file : chr "'C:/Users/tthan/Documents/CIENCIA DE LOS DATOS/Datos/Salaries.csv'" "'C:/Users/tthan/Documents/CIENCIA DE LOS DATOS/Datos/Salaries.csv'" "'C:/Users/tthan/Documents/CIENCIA DE LOS DATOS/Datos/Salaries.csv'" "'C:/Users/tthan/Documents/CIENCIA DE LOS DATOS/Datos/Salaries.csv'" ...
## - attr(*, "spec")=
## .. cols(
## .. Id = col_double(),
## .. EmployeeName = col_character(),
## .. JobTitle = col_character(),
## .. BasePay = col_double(),
## .. OvertimePay = col_double(),
## .. OtherPay = col_double(),
## .. Benefits = col_logical(),
## .. TotalPay = col_double(),
## .. TotalPayBenefits = col_double(),
## .. Year = col_double(),
## .. Notes = col_logical(),
## .. Agency = col_character(),
## .. Status = col_logical()
## .. )
summary (salarios)
## Id EmployeeName JobTitle BasePay
## Min. : 1 Length:148654 Length:148654 Min. : -166
## 1st Qu.: 37164 Class :character Class :character 1st Qu.: 33588
## Median : 74328 Mode :character Mode :character Median : 65007
## Mean : 74328 Mean : 66325
## 3rd Qu.:111491 3rd Qu.: 94691
## Max. :148654 Max. :319275
## NA's :609
## OvertimePay OtherPay Benefits TotalPay
## Min. : -0.01 Min. : -7058.6 Mode:logical Min. : -618.1
## 1st Qu.: 0.00 1st Qu.: 0.0 NA's:148654 1st Qu.: 36169.0
## Median : 0.00 Median : 811.3 Median : 71426.6
## Mean : 5066.06 Mean : 3648.8 Mean : 74768.3
## 3rd Qu.: 4658.18 3rd Qu.: 4236.1 3rd Qu.:105839.1
## Max. :245131.88 Max. :400184.2 Max. :567595.4
## NA's :4 NA's :4
## TotalPayBenefits Year Notes Agency
## Min. : -618.1 Min. :2011 Mode:logical Length:148654
## 1st Qu.: 44065.7 1st Qu.:2012 NA's:148654 Class :character
## Median : 92404.1 Median :2013 Mode :character
## Mean : 93692.6 Mean :2013
## 3rd Qu.:132876.5 3rd Qu.:2014
## Max. :567595.4 Max. :2014
##
## Status
## Mode:logical
## NA's:148654
##
##
##
##
##
Determinar Total Pay Benefitis
La media
Desviacion
Maximo
Minimo
maximo <- max(salarios$TotalPayBenefits)
minimo <- min(salarios$TotalPayBenefits)
media <- mean(salarios$TotalPayBenefits)
desvstd <- sd(salarios$TotalPayBenefits)
Mostrar los valores estadisticos
paste("Valor maximo de Ingreso Total",maximo)
## [1] "Valor maximo de Ingreso Total 567595.43"
paste("Valor minimo de Ingreso Total",minimo)
## [1] "Valor minimo de Ingreso Total -618.13"
paste("Valor media de Ingreso Total",media)
## [1] "Valor media de Ingreso Total 93692.5548105668"
paste("Valor desviacion std de Ingreso Total",desvstd)
## [1] "Valor desviacion std de Ingreso Total 62793.5334832377"