OBJETIVO. Analizar datos de salarios

Utilizar la lirberia dplyr para analizar datos de salarios

Las librerias

library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Los datos

# Cargar datos de salarios
# salarios <- read.csv("Va la ruta en donde estan los datos")
#salarios <- read.csv("https://raw.githubusercontent.com/rpizarrog/Curso-Titulacion-Data-Science-/master/2019/Datos/Salaries.csv")

salarios <- read_csv("C:/Users/tthan/Documents/CIENCIA DE LOS DATOS/Datos/Salaries.csv")
## Parsed with column specification:
## cols(
##   Id = col_double(),
##   EmployeeName = col_character(),
##   JobTitle = col_character(),
##   BasePay = col_double(),
##   OvertimePay = col_double(),
##   OtherPay = col_double(),
##   Benefits = col_logical(),
##   TotalPay = col_double(),
##   TotalPayBenefits = col_double(),
##   Year = col_double(),
##   Notes = col_logical(),
##   Agency = col_character(),
##   Status = col_logical()
## )
## Warning: 150614 parsing failures.
##   row      col           expected   actual                                                               file
## 36160 Benefits 1/0/T/F/TRUE/FALSE 44430.12 'C:/Users/tthan/Documents/CIENCIA DE LOS DATOS/Datos/Salaries.csv'
## 36161 Benefits 1/0/T/F/TRUE/FALSE 69810.19 'C:/Users/tthan/Documents/CIENCIA DE LOS DATOS/Datos/Salaries.csv'
## 36162 Benefits 1/0/T/F/TRUE/FALSE 53102.29 'C:/Users/tthan/Documents/CIENCIA DE LOS DATOS/Datos/Salaries.csv'
## 36163 Benefits 1/0/T/F/TRUE/FALSE 72047.88 'C:/Users/tthan/Documents/CIENCIA DE LOS DATOS/Datos/Salaries.csv'
## 36164 Benefits 1/0/T/F/TRUE/FALSE 44438.25 'C:/Users/tthan/Documents/CIENCIA DE LOS DATOS/Datos/Salaries.csv'
## ..... ........ .................. ........ ..................................................................
## See problems(...) for more details.
#C:\Users\tthan\Documents\CIENCIA DE LOS DATOS\Datos
#View(Salarios)
# salarios   # Ya no los queremos ver
head (salarios)
## # A tibble: 6 x 13
##      Id EmployeeName JobTitle BasePay OvertimePay OtherPay Benefits
##   <dbl> <chr>        <chr>      <dbl>       <dbl>    <dbl> <lgl>   
## 1     1 NATHANIEL F~ GENERAL~ 167411.          0   400184. NA      
## 2     2 GARY JIMENEZ CAPTAIN~ 155966.     245132.  137811. NA      
## 3     3 ALBERT PARD~ CAPTAIN~ 212739.     106088.   16453. NA      
## 4     4 CHRISTOPHER~ WIRE RO~  77916       56121.  198307. NA      
## 5     5 PATRICK GAR~ DEPUTY ~ 134402.       9737   182235. NA      
## 6     6 DAVID SULLI~ ASSISTA~ 118602        8601   189083. NA      
## # ... with 6 more variables: TotalPay <dbl>, TotalPayBenefits <dbl>,
## #   Year <dbl>, Notes <lgl>, Agency <chr>, Status <lgl>
str(salarios)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 148654 obs. of  13 variables:
##  $ Id              : num  1 2 3 4 5 6 7 8 9 10 ...
##  $ EmployeeName    : chr  "NATHANIEL FORD" "GARY JIMENEZ" "ALBERT PARDINI" "CHRISTOPHER CHONG" ...
##  $ JobTitle        : chr  "GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY" "CAPTAIN III (POLICE DEPARTMENT)" "CAPTAIN III (POLICE DEPARTMENT)" "WIRE ROPE CABLE MAINTENANCE MECHANIC" ...
##  $ BasePay         : num  167411 155966 212739 77916 134402 ...
##  $ OvertimePay     : num  0 245132 106088 56121 9737 ...
##  $ OtherPay        : num  400184 137811 16453 198307 182235 ...
##  $ Benefits        : logi  NA NA NA NA NA NA ...
##  $ TotalPay        : num  567595 538909 335280 332344 326373 ...
##  $ TotalPayBenefits: num  567595 538909 335280 332344 326373 ...
##  $ Year            : num  2011 2011 2011 2011 2011 ...
##  $ Notes           : logi  NA NA NA NA NA NA ...
##  $ Agency          : chr  "San Francisco" "San Francisco" "San Francisco" "San Francisco" ...
##  $ Status          : logi  NA NA NA NA NA NA ...
##  - attr(*, "problems")=Classes 'tbl_df', 'tbl' and 'data.frame': 150614 obs. of  5 variables:
##   ..$ row     : int  36160 36161 36162 36163 36164 36165 36166 36167 36168 36169 ...
##   ..$ col     : chr  "Benefits" "Benefits" "Benefits" "Benefits" ...
##   ..$ expected: chr  "1/0/T/F/TRUE/FALSE" "1/0/T/F/TRUE/FALSE" "1/0/T/F/TRUE/FALSE" "1/0/T/F/TRUE/FALSE" ...
##   ..$ actual  : chr  "44430.12" "69810.19" "53102.29" "72047.88" ...
##   ..$ file    : chr  "'C:/Users/tthan/Documents/CIENCIA DE LOS DATOS/Datos/Salaries.csv'" "'C:/Users/tthan/Documents/CIENCIA DE LOS DATOS/Datos/Salaries.csv'" "'C:/Users/tthan/Documents/CIENCIA DE LOS DATOS/Datos/Salaries.csv'" "'C:/Users/tthan/Documents/CIENCIA DE LOS DATOS/Datos/Salaries.csv'" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Id = col_double(),
##   ..   EmployeeName = col_character(),
##   ..   JobTitle = col_character(),
##   ..   BasePay = col_double(),
##   ..   OvertimePay = col_double(),
##   ..   OtherPay = col_double(),
##   ..   Benefits = col_logical(),
##   ..   TotalPay = col_double(),
##   ..   TotalPayBenefits = col_double(),
##   ..   Year = col_double(),
##   ..   Notes = col_logical(),
##   ..   Agency = col_character(),
##   ..   Status = col_logical()
##   .. )
summary (salarios)
##        Id         EmployeeName         JobTitle            BasePay      
##  Min.   :     1   Length:148654      Length:148654      Min.   :  -166  
##  1st Qu.: 37164   Class :character   Class :character   1st Qu.: 33588  
##  Median : 74328   Mode  :character   Mode  :character   Median : 65007  
##  Mean   : 74328                                         Mean   : 66325  
##  3rd Qu.:111491                                         3rd Qu.: 94691  
##  Max.   :148654                                         Max.   :319275  
##                                                         NA's   :609     
##   OvertimePay           OtherPay        Benefits          TotalPay       
##  Min.   :    -0.01   Min.   : -7058.6   Mode:logical   Min.   :  -618.1  
##  1st Qu.:     0.00   1st Qu.:     0.0   NA's:148654    1st Qu.: 36169.0  
##  Median :     0.00   Median :   811.3                  Median : 71426.6  
##  Mean   :  5066.06   Mean   :  3648.8                  Mean   : 74768.3  
##  3rd Qu.:  4658.18   3rd Qu.:  4236.1                  3rd Qu.:105839.1  
##  Max.   :245131.88   Max.   :400184.2                  Max.   :567595.4  
##  NA's   :4           NA's   :4                                           
##  TotalPayBenefits        Year       Notes            Agency         
##  Min.   :  -618.1   Min.   :2011   Mode:logical   Length:148654     
##  1st Qu.: 44065.7   1st Qu.:2012   NA's:148654    Class :character  
##  Median : 92404.1   Median :2013                  Mode  :character  
##  Mean   : 93692.6   Mean   :2013                                    
##  3rd Qu.:132876.5   3rd Qu.:2014                                    
##  Max.   :567595.4   Max.   :2014                                    
##                                                                     
##   Status       
##  Mode:logical  
##  NA's:148654   
##                
##                
##                
##                
## 

Determinar Total Pay Benefitis

La media

Desviacion

Maximo

Minimo

maximo <- max(salarios$TotalPayBenefits)
minimo <- min(salarios$TotalPayBenefits)
media <- mean(salarios$TotalPayBenefits)
desvstd <- sd(salarios$TotalPayBenefits)

Mostrar los valores estadisticos

paste("Valor maximo de Ingreso Total",maximo)
## [1] "Valor maximo de Ingreso Total 567595.43"
paste("Valor minimo de Ingreso Total",minimo)
## [1] "Valor minimo de Ingreso Total -618.13"
paste("Valor media de Ingreso Total",media)
## [1] "Valor media de Ingreso Total 93692.5548105668"
paste("Valor desviacion std de Ingreso Total",desvstd)
## [1] "Valor desviacion std de Ingreso Total 62793.5334832377"

Analisis descriptivo

En esta pratica se poner en practica los conocimientos ya adquiridos en la clases anteiores; se practico cargado los datos desde una ruta de URL y cargar los datos desde una direccion local; obserandose que cuando son arhivos con muchos datos es maas conveniente tener la base de datos en una direccion local; nuevamente se pratico el determinar maximo, minimo, desviacion y media de un atributo que en esta caso fue Total Pay Benefits.