leer archivos

Archivos .csv .txt .tsv

library(readr)

read.csv() # .csv delimitados por coma

read.table() # .txt delimitados por espacios

read.csv2() # .csv delimitados por punto y coma

read_tsv() # tab

Archivos eb excel

install.packages(“readxl”)

library(readxl)

read_excel() # .xls .xlsx

read_xls()

read_xlsx()

url <- “https://raw.githubusercontent.com/rafalab/dslabs/master/inst/extdata/murders.csv

murders <- read.csv(url)

descarga un dataset y lo almacena en la ubicacion que este en la variable url

download.file(url, “murder.csv”)

Carga de dataset heights

library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(dslabs)
## Warning: package 'dslabs' was built under R version 4.4.1
data("heights")

head(heights)
##      sex height
## 1   Male     75
## 2   Male     70
## 3   Male     68
## 4   Male     74
## 5   Male     61
## 6 Female     65

calcular el promedio y la desviacion estandar por genero

est <- heights %>%
  group_by(sex) %>%
  summarise(Avg_height = mean(height), std_height = sd(height) )

est[est$sex == 'Female',] # Resultado Mujeres
## # A tibble: 1 × 3
##   sex    Avg_height std_height
##   <fct>       <dbl>      <dbl>
## 1 Female       64.9       3.76
est[est$sex == 'Male',] # resultado Hombrers
## # A tibble: 1 × 3
##   sex   Avg_height std_height
##   <fct>      <dbl>      <dbl>
## 1 Male        69.3       3.61
data("murders") # cargo dataset murders

devuelve la fila donde esta el valor maximo segun la columna total

max_total <- which.max(murders$total) # devuelve la fila donde esta el valor maximo segun la columna total
murders[max_total,]
##        state abb region population total
## 5 California  CA   West   37253956  1257
max_total_stado <- murders$state[which.max(murders$total)]  # devuelve el estado donde se presenta el mayor numero de asesinatos

max_population <- murders$state[which.max(murders$total)]

asesinatos por cada 100000 habitantes

murders <- murders %>% mutate(rate = total*100000/population)
head(murders)
##        state abb region population total     rate
## 1    Alabama  AL  South    4779736   135 2.824424
## 2     Alaska  AK   West     710231    19 2.675186
## 3    Arizona  AZ   West    6392017   232 3.629527
## 4   Arkansas  AR  South    2915918    93 3.189390
## 5 California  CA   West   37253956  1257 3.374138
## 6   Colorado  CO   West    5029196    65 1.292453
avgTasa <- mean(murders$rate)
avgTasa
## [1] 2.779125
avgTasa2 <- murders %>% select(rate) %>% summarise(mean(rate)) 
avgTasa2
##   mean(rate)
## 1   2.779125
murders <- murders %>% mutate(cat = rate - avgTasa )
head(murders)
##        state abb region population total     rate         cat
## 1    Alabama  AL  South    4779736   135 2.824424  0.04529833
## 2     Alaska  AK   West     710231    19 2.675186 -0.10393949
## 3    Arizona  AZ   West    6392017   232 3.629527  0.85040182
## 4   Arkansas  AR  South    2915918    93 3.189390  0.41026465
## 5 California  CA   West   37253956  1257 3.374138  0.59501286
## 6   Colorado  CO   West    5029196    65 1.292453 -1.48667234

estado por estados segun su ubicacion respecto a la media

murders[murders$cat>0,] # estados por encima de la media
##                   state abb        region population total      rate
## 1               Alabama  AL         South    4779736   135  2.824424
## 3               Arizona  AZ          West    6392017   232  3.629527
## 4              Arkansas  AR         South    2915918    93  3.189390
## 5            California  CA          West   37253956  1257  3.374138
## 8              Delaware  DE         South     897934    38  4.231937
## 9  District of Columbia  DC         South     601723    99 16.452753
## 10              Florida  FL         South   19687653   669  3.398069
## 11              Georgia  GA         South    9920000   376  3.790323
## 14             Illinois  IL North Central   12830632   364  2.836961
## 19            Louisiana  LA         South    4533372   351  7.742581
## 21             Maryland  MD         South    5773552   293  5.074866
## 23             Michigan  MI North Central    9883640   413  4.178622
## 25          Mississippi  MS         South    2967297   120  4.044085
## 26             Missouri  MO North Central    5988927   321  5.359892
## 29               Nevada  NV          West    2700551    84  3.110476
## 31           New Jersey  NJ     Northeast    8791894   246  2.798032
## 32           New Mexico  NM          West    2059179    67  3.253724
## 34       North Carolina  NC         South    9535483   286  2.999324
## 37             Oklahoma  OK         South    3751351   111  2.958934
## 39         Pennsylvania  PA     Northeast   12702379   457  3.597751
## 41       South Carolina  SC         South    4625364   207  4.475323
## 43            Tennessee  TN         South    6346105   219  3.450936
## 44                Texas  TX         South   25145561   805  3.201360
## 47             Virginia  VA         South    8001024   250  3.124600
##            cat
## 1   0.04529833
## 3   0.85040182
## 4   0.41026465
## 5   0.59501286
## 8   1.45281142
## 9  13.67362773
## 10  0.61894338
## 11  1.01119713
## 14  0.05783535
## 19  4.96345557
## 21  2.29574007
## 23  1.39949700
## 25  1.26495912
## 26  2.58076623
## 29  0.33135089
## 31  0.01890646
## 32  0.47459848
## 34  0.22019823
## 37  0.17980854
## 39  0.81862581
## 41  1.69619800
## 43  0.67181020
## 44  0.42223482
## 47  0.34547460
murders[murders$cat< 0,] # estados por debajo de la media
##            state abb        region population total      rate         cat
## 2         Alaska  AK          West     710231    19 2.6751860 -0.10393949
## 6       Colorado  CO          West    5029196    65 1.2924531 -1.48667234
## 7    Connecticut  CT     Northeast    3574097    97 2.7139722 -0.06515322
## 12        Hawaii  HI          West    1360301     7 0.5145920 -2.26453346
## 13         Idaho  ID          West    1567582    12 0.7655102 -2.01361526
## 15       Indiana  IN North Central    6483802   142 2.1900730 -0.58905240
## 16          Iowa  IA North Central    3046355    21 0.6893484 -2.08977703
## 17        Kansas  KS North Central    2853118    63 2.2081106 -0.57101489
## 18      Kentucky  KY         South    4339367   116 2.6732010 -0.10592450
## 20         Maine  ME     Northeast    1328361    11 0.8280881 -1.95103730
## 22 Massachusetts  MA     Northeast    6547629   118 1.8021791 -0.97694637
## 24     Minnesota  MN North Central    5303925    53 0.9992600 -1.77986547
## 27       Montana  MT          West     989415    12 1.2128379 -1.56628756
## 28      Nebraska  NE North Central    1826341    32 1.7521372 -1.02698825
## 30 New Hampshire  NH     Northeast    1316470     5 0.3798036 -2.39932189
## 33      New York  NY     Northeast   19378102   517 2.6679599 -0.11116550
## 35  North Dakota  ND North Central     672591     4 0.5947151 -2.18441039
## 36          Ohio  OH North Central   11536504   310 2.6871225 -0.09200290
## 38        Oregon  OR          West    3831074    36 0.9396843 -1.83944117
## 40  Rhode Island  RI     Northeast    1052567    16 1.5200933 -1.25903219
## 42  South Dakota  SD North Central     814180     8 0.9825837 -1.79654175
## 45          Utah  UT          West    2763885    22 0.7959810 -1.98314443
## 46       Vermont  VT     Northeast     625741     2 0.3196211 -2.45950439
## 48    Washington  WA          West    6724540    93 1.3829942 -1.39613122
## 49 West Virginia  WV         South    1852994    27 1.4571013 -1.32202413
## 50     Wisconsin  WI North Central    5686986    97 1.7056487 -1.07347680
## 51       Wyoming  WY          West     563626     5 0.8871131 -1.89201237