Rbasics

Author

Chasfat

Quarto

Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see https://quarto.org.

Running Code

When you click the Render button a document will be generated that includes both content and the output of embedded code. You can embed code like this:

1 + 1
[1] 2

You can add options to executable code like this

[1] 4

The echo: false option disables the printing of code (only output is displayed).

R basics

6+9
[1] 15

Import Packages

library(ggplot2)
Warning: package 'ggplot2' was built under R version 4.0.5
library(dplyr)
Warning: package 'dplyr' was built under R version 4.0.5

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(tidyr)
Warning: package 'tidyr' was built under R version 4.0.5
?starwars
starting httpd help server ...
 done

Using the Pipe %>% Operator

names(starwars)
 [1] "name"       "height"     "mass"       "hair_color" "skin_color"
 [6] "eye_color"  "birth_year" "sex"        "gender"     "homeworld" 
[11] "species"    "films"      "vehicles"   "starships" 

Pipe operator

starwars %>%
  filter(height >150 & mass <200 ) %>%
  mutate(height_in_meters=height/100) %>%
  select(height_in_meters,mass) %>%
  arrange(mass) %>%
  plot()

head(msleep)
# A tibble: 6 x 11
  name    genus vore  order conservation sleep_total sleep_rem sleep_cycle awake
  <chr>   <chr> <chr> <chr> <chr>              <dbl>     <dbl>       <dbl> <dbl>
1 Cheetah Acin~ carni Carn~ lc                  12.1      NA        NA      11.9
2 Owl mo~ Aotus omni  Prim~ <NA>                17         1.8      NA       7  
3 Mounta~ Aplo~ herbi Rode~ nt                  14.4       2.4      NA       9.6
4 Greate~ Blar~ omni  Sori~ lc                  14.9       2.3       0.133   9.1
5 Cow     Bos   herbi Arti~ domesticated         4         0.7       0.667  20  
6 Three-~ Brad~ herbi Pilo~ <NA>                14.4       2.2       0.767   9.6
# ... with 2 more variables: brainwt <dbl>, bodywt <dbl>
head(starwars)
# A tibble: 6 x 14
  name      height  mass hair_color skin_color eye_color birth_year sex   gender
  <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
1 Luke Sky~    172    77 blond      fair       blue            19   male  mascu~
2 C-3PO        167    75 <NA>       gold       yellow         112   none  mascu~
3 R2-D2         96    32 <NA>       white, bl~ red             33   none  mascu~
4 Darth Va~    202   136 none       white      yellow          41.9 male  mascu~
5 Leia Org~    150    49 brown      light      brown           19   fema~ femin~
6 Owen Lars    178   120 brown, gr~ light      blue            52   male  mascu~
# ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
#   vehicles <list>, starships <list>
length(starwars$height)
[1] 87
unique(starwars$hair_color)
 [1] "blond"         NA              "none"          "brown"        
 [5] "brown, grey"   "black"         "auburn, white" "auburn, grey" 
 [9] "white"         "grey"          "auburn"        "blonde"       
[13] "unknown"      
unique(starwars$names)
Warning: Unknown or uninitialised column: `names`.
NULL
missing <- !complete.cases(msleep)
missing
 [1]  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE
[13]  TRUE FALSE  TRUE  TRUE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE  TRUE
[25] FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
[37]  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE
[49]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
[61]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE
[73]  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
#Shows all the rows with missing data 
msleep[missing,]
# A tibble: 63 x 11
   name   genus vore  order conservation sleep_total sleep_rem sleep_cycle awake
   <chr>  <chr> <chr> <chr> <chr>              <dbl>     <dbl>       <dbl> <dbl>
 1 Cheet~ Acin~ carni Carn~ lc                  12.1      NA        NA      11.9
 2 Owl m~ Aotus omni  Prim~ <NA>                17         1.8      NA       7  
 3 Mount~ Aplo~ herbi Rode~ nt                  14.4       2.4      NA       9.6
 4 Three~ Brad~ herbi Pilo~ <NA>                14.4       2.2       0.767   9.6
 5 North~ Call~ carni Carn~ vu                   8.7       1.4       0.383  15.3
 6 Vespe~ Calo~ <NA>  Rode~ <NA>                 7        NA        NA      17  
 7 Roe d~ Capr~ herbi Arti~ lc                   3        NA        NA      21  
 8 Goat   Capri herbi Arti~ lc                   5.3       0.6      NA      18.7
 9 Grivet Cerc~ omni  Prim~ lc                  10         0.7      NA      14  
10 Star-~ Cond~ omni  Sori~ lc                  10.3       2.2      NA      13.7
# ... with 53 more rows, and 2 more variables: brainwt <dbl>, bodywt <dbl>

Select variables

starwars %>%
  select(name,height,mass)
# A tibble: 87 x 3
   name               height  mass
   <chr>               <int> <dbl>
 1 Luke Skywalker        172    77
 2 C-3PO                 167    75
 3 R2-D2                  96    32
 4 Darth Vader           202   136
 5 Leia Organa           150    49
 6 Owen Lars             178   120
 7 Beru Whitesun lars    165    75
 8 R5-D4                  97    32
 9 Biggs Darklighter     183    84
10 Obi-Wan Kenobi        182    77
# ... with 77 more rows
starwars %>% 
  select(1:3)
# A tibble: 87 x 3
   name               height  mass
   <chr>               <int> <dbl>
 1 Luke Skywalker        172    77
 2 C-3PO                 167    75
 3 R2-D2                  96    32
 4 Darth Vader           202   136
 5 Leia Organa           150    49
 6 Owen Lars             178   120
 7 Beru Whitesun lars    165    75
 8 R5-D4                  97    32
 9 Biggs Darklighter     183    84
10 Obi-Wan Kenobi        182    77
# ... with 77 more rows
starwars %>%
  select(ends_with("color")) 
# A tibble: 87 x 3
   hair_color    skin_color  eye_color
   <chr>         <chr>       <chr>    
 1 blond         fair        blue     
 2 <NA>          gold        yellow   
 3 <NA>          white, blue red      
 4 none          white       yellow   
 5 brown         light       brown    
 6 brown, grey   light       blue     
 7 brown         light       blue     
 8 <NA>          white, red  red      
 9 black         light       brown    
10 auburn, white fair        blue-gray
# ... with 77 more rows
starwars %>%
  rename("characters"="name") %>%
  head()
# A tibble: 6 x 14
  characters     height  mass hair_color  skin_color  eye_color birth_year sex  
  <chr>           <int> <dbl> <chr>       <chr>       <chr>          <dbl> <chr>
1 Luke Skywalker    172    77 blond       fair        blue            19   male 
2 C-3PO             167    75 <NA>        gold        yellow         112   none 
3 R2-D2              96    32 <NA>        white, blue red             33   none 
4 Darth Vader       202   136 none        white       yellow          41.9 male 
5 Leia Organa       150    49 brown       light       brown           19   fema~
6 Owen Lars         178   120 brown, grey light       blue            52   male 
# ... with 6 more variables: gender <chr>, homeworld <chr>, species <chr>,
#   films <list>, vehicles <list>, starships <list>
class(starwars$hair_color)
[1] "character"
starwars$hair_color <-as.factor(starwars$hair_color)
starwars$hair_color
 [1] blond         <NA>          <NA>          none          brown        
 [6] brown, grey   brown         <NA>          black         auburn, white
[11] blond         auburn, grey  brown         brown         <NA>         
[16] <NA>          brown         brown         white         grey         
[21] black         none          none          black         none         
[26] none          auburn        brown         brown         none         
[31] brown         none          blond         none          none         
[36] none          brown         black         none          black        
[41] black         none          none          none          none         
[46] none          none          none          white         none         
[51] black         none          none          none          none         
[56] none          black         brown         brown         none         
[61] black         black         brown         white         black        
[66] black         blonde        none          none          none         
[71] white         none          none          none          none         
[76] none          none          brown         brown         none         
[81] none          black         brown         brown         none         
[86] unknown       brown        
12 Levels: auburn auburn, grey auburn, white black blond blonde ... white
starwars$hair_color <-as.character(starwars$hair_color)


starwars%>%
  glimpse()
Rows: 87
Columns: 14
$ name       <chr> "Luke Skywalker", "C-3PO", "R2-D2", "Darth Vader", "Leia Or~
$ height     <int> 172, 167, 96, 202, 150, 178, 165, 97, 183, 182, 188, 180, 2~
$ mass       <dbl> 77.0, 75.0, 32.0, 136.0, 49.0, 120.0, 75.0, 32.0, 84.0, 77.~
$ hair_color <chr> "blond", NA, NA, "none", "brown", "brown, grey", "brown", N~
$ skin_color <chr> "fair", "gold", "white, blue", "white", "light", "light", "~
$ eye_color  <chr> "blue", "yellow", "red", "yellow", "brown", "blue", "blue",~
$ birth_year <dbl> 19.0, 112.0, 33.0, 41.9, 19.0, 52.0, 47.0, NA, 24.0, 57.0, ~
$ sex        <chr> "male", "none", "none", "male", "female", "male", "female",~
$ gender     <chr> "masculine", "masculine", "masculine", "masculine", "femini~
$ homeworld  <chr> "Tatooine", "Tatooine", "Naboo", "Tatooine", "Alderaan", "T~
$ species    <chr> "Human", "Droid", "Droid", "Human", "Human", "Human", "Huma~
$ films      <list> <"The Empire Strikes Back", "Revenge of the Sith", "Return~
$ vehicles   <list> <"Snowspeeder", "Imperial Speeder Bike">, <>, <>, <>, "Imp~
$ starships  <list> <"X-wing", "Imperial shuttle">, <>, <>, "TIE Advanced x1",~
levels(starwars$sex)
NULL
starwars$sex<-as.factor(starwars$sex)
levels(starwars$sex)
[1] "female"         "hermaphroditic" "male"           "none"          
#Filter rows 
starwars %>%
  select(mass,sex) %>%
  filter(mass<55 & sex=='male')
# A tibble: 6 x 2
   mass sex  
  <dbl> <fct>
1    17 male 
2    20 male 
3    40 male 
4    45 male 
5    15 male 
6    48 male 
#Recode data
starwars %>%
  select(sex) %>%
  mutate(sex=recode(sex,"male"="man","female"="woman"))
# A tibble: 87 x 1
   sex  
   <fct>
 1 man  
 2 none 
 3 none 
 4 man  
 5 woman
 6 man  
 7 woman
 8 none 
 9 man  
10 man  
# ... with 77 more rows
#Mean
mean(starwars$height,na.rm=TRUE)
[1] 174.358
#Remove duplicates 
starwars %>%
  distinct()
# A tibble: 87 x 14
   name     height  mass hair_color skin_color eye_color birth_year sex   gender
   <chr>     <int> <dbl> <chr>      <chr>      <chr>          <dbl> <fct> <chr> 
 1 Luke Sk~    172    77 blond      fair       blue            19   male  mascu~
 2 C-3PO       167    75 <NA>       gold       yellow         112   none  mascu~
 3 R2-D2        96    32 <NA>       white, bl~ red             33   none  mascu~
 4 Darth V~    202   136 none       white      yellow          41.9 male  mascu~
 5 Leia Or~    150    49 brown      light      brown           19   fema~ femin~
 6 Owen La~    178   120 brown, gr~ light      blue            52   male  mascu~
 7 Beru Wh~    165    75 brown      light      blue            47   fema~ femin~
 8 R5-D4        97    32 <NA>       white, red red             NA   none  mascu~
 9 Biggs D~    183    84 black      light      brown           24   male  mascu~
10 Obi-Wan~    182    77 auburn, w~ fair       blue-gray       57   male  mascu~
# ... with 77 more rows, and 5 more variables: homeworld <chr>, species <chr>,
#   films <list>, vehicles <list>, starships <list>
#Manipulate data 
starwars %>%
  mutate(height_m=height/100) %>%
  select(name,height,height_m)
# A tibble: 87 x 3
   name               height height_m
   <chr>               <int>    <dbl>
 1 Luke Skywalker        172     1.72
 2 C-3PO                 167     1.67
 3 R2-D2                  96     0.96
 4 Darth Vader           202     2.02
 5 Leia Organa           150     1.5 
 6 Owen Lars             178     1.78
 7 Beru Whitesun lars    165     1.65
 8 R5-D4                  97     0.97
 9 Biggs Darklighter     183     1.83
10 Obi-Wan Kenobi        182     1.82
# ... with 77 more rows
#Conditional changes 
starwars %>%
  mutate(height_m=height/100) %>%
  select(name,height,height_m) %>%
  mutate(tallness=
           if_else(height_m<1,"short","tall"))
# A tibble: 87 x 4
   name               height height_m tallness
   <chr>               <int>    <dbl> <chr>   
 1 Luke Skywalker        172     1.72 tall    
 2 C-3PO                 167     1.67 tall    
 3 R2-D2                  96     0.96 short   
 4 Darth Vader           202     2.02 tall    
 5 Leia Organa           150     1.5  tall    
 6 Owen Lars             178     1.78 tall    
 7 Beru Whitesun lars    165     1.65 tall    
 8 R5-D4                  97     0.97 short   
 9 Biggs Darklighter     183     1.83 tall    
10 Obi-Wan Kenobi        182     1.82 tall    
# ... with 77 more rows

Reshape Data

library(gapminder)
names(gapminder)
[1] "country"   "continent" "year"      "lifeExp"   "pop"       "gdpPercap"
data<-select(gapminder,country,year,lifeExp)
data
# A tibble: 1,704 x 3
   country      year lifeExp
   <fct>       <int>   <dbl>
 1 Afghanistan  1952    28.8
 2 Afghanistan  1957    30.3
 3 Afghanistan  1962    32.0
 4 Afghanistan  1967    34.0
 5 Afghanistan  1972    36.1
 6 Afghanistan  1977    38.4
 7 Afghanistan  1982    39.9
 8 Afghanistan  1987    40.8
 9 Afghanistan  1992    41.7
10 Afghanistan  1997    41.8
# ... with 1,694 more rows
gapminder
# A tibble: 1,704 x 6
   country     continent  year lifeExp      pop gdpPercap
   <fct>       <fct>     <int>   <dbl>    <int>     <dbl>
 1 Afghanistan Asia       1952    28.8  8425333      779.
 2 Afghanistan Asia       1957    30.3  9240934      821.
 3 Afghanistan Asia       1962    32.0 10267083      853.
 4 Afghanistan Asia       1967    34.0 11537966      836.
 5 Afghanistan Asia       1972    36.1 13079460      740.
 6 Afghanistan Asia       1977    38.4 14880372      786.
 7 Afghanistan Asia       1982    39.9 12881816      978.
 8 Afghanistan Asia       1987    40.8 13867957      852.
 9 Afghanistan Asia       1992    41.7 16317921      649.
10 Afghanistan Asia       1997    41.8 22227415      635.
# ... with 1,694 more rows
wide_data <-data %>%
  pivot_wider(names_from=year,values_from=lifeExp)
wide_data
# A tibble: 142 x 13
   country `1952` `1957` `1962` `1967` `1972` `1977` `1982` `1987` `1992` `1997`
   <fct>    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
 1 Afghan~   28.8   30.3   32.0   34.0   36.1   38.4   39.9   40.8   41.7   41.8
 2 Albania   55.2   59.3   64.8   66.2   67.7   68.9   70.4   72     71.6   73.0
 3 Algeria   43.1   45.7   48.3   51.4   54.5   58.0   61.4   65.8   67.7   69.2
 4 Angola    30.0   32.0   34     36.0   37.9   39.5   39.9   39.9   40.6   41.0
 5 Argent~   62.5   64.4   65.1   65.6   67.1   68.5   69.9   70.8   71.9   73.3
 6 Austra~   69.1   70.3   70.9   71.1   71.9   73.5   74.7   76.3   77.6   78.8
 7 Austria   66.8   67.5   69.5   70.1   70.6   72.2   73.2   74.9   76.0   77.5
 8 Bahrain   50.9   53.8   56.9   59.9   63.3   65.6   69.1   70.8   72.6   73.9
 9 Bangla~   37.5   39.3   41.2   43.5   45.3   46.9   50.0   52.8   56.0   59.4
10 Belgium   68     69.2   70.2   70.9   71.4   72.8   73.9   75.4   76.5   77.5
# ... with 132 more rows, and 2 more variables: `2002` <dbl>, `2007` <dbl>
long_data <- wide_data %>%
  pivot_longer(2:13,
               names_to="year",
               values_to="lifeExp")
long_data
# A tibble: 1,704 x 3
   country     year  lifeExp
   <fct>       <chr>   <dbl>
 1 Afghanistan 1952     28.8
 2 Afghanistan 1957     30.3
 3 Afghanistan 1962     32.0
 4 Afghanistan 1967     34.0
 5 Afghanistan 1972     36.1
 6 Afghanistan 1977     38.4
 7 Afghanistan 1982     39.9
 8 Afghanistan 1987     40.8
 9 Afghanistan 1992     41.7
10 Afghanistan 1997     41.8
# ... with 1,694 more rows