Dplyr Paketi Çalışması

Bu kısımda da veri düzenlemede kullanılan birkaç fonksiyonu denedim.

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Select

data(starwars)
starwars
## # A tibble: 87 × 14
##    name     height  mass hair_color skin_color eye_color birth_year sex   gender
##    <chr>     <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
##  1 Luke Sk…    172    77 blond      fair       blue            19   male  mascu…
##  2 C-3PO       167    75 <NA>       gold       yellow         112   none  mascu…
##  3 R2-D2        96    32 <NA>       white, bl… red             33   none  mascu…
##  4 Darth V…    202   136 none       white      yellow          41.9 male  mascu…
##  5 Leia Or…    150    49 brown      light      brown           19   fema… femin…
##  6 Owen La…    178   120 brown, gr… light      blue            52   male  mascu…
##  7 Beru Wh…    165    75 brown      light      blue            47   fema… femin…
##  8 R5-D4        97    32 <NA>       white, red red             NA   none  mascu…
##  9 Biggs D…    183    84 black      light      brown           24   male  mascu…
## 10 Obi-Wan…    182    77 auburn, w… fair       blue-gray       57   male  mascu…
## # ℹ 77 more rows
## # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>, starships <list>
starwars %>%
  dplyr::select(starts_with("m"))
## # A tibble: 87 × 1
##     mass
##    <dbl>
##  1    77
##  2    75
##  3    32
##  4   136
##  5    49
##  6   120
##  7    75
##  8    32
##  9    84
## 10    77
## # ℹ 77 more rows
starwars %>%  
  select(contains("h"))
## # A tibble: 87 × 6
##    height hair_color    birth_year homeworld vehicles  starships
##     <int> <chr>              <dbl> <chr>     <list>    <list>   
##  1    172 blond               19   Tatooine  <chr [2]> <chr [2]>
##  2    167 <NA>               112   Tatooine  <chr [0]> <chr [0]>
##  3     96 <NA>                33   Naboo     <chr [0]> <chr [0]>
##  4    202 none                41.9 Tatooine  <chr [0]> <chr [1]>
##  5    150 brown               19   Alderaan  <chr [1]> <chr [0]>
##  6    178 brown, grey         52   Tatooine  <chr [0]> <chr [0]>
##  7    165 brown               47   Tatooine  <chr [0]> <chr [0]>
##  8     97 <NA>                NA   Tatooine  <chr [0]> <chr [0]>
##  9    183 black               24   Tatooine  <chr [0]> <chr [1]>
## 10    182 auburn, white       57   Stewjon   <chr [1]> <chr [5]>
## # ℹ 77 more rows
starwars %>%  
  select(-last_col())
## # A tibble: 87 × 13
##    name     height  mass hair_color skin_color eye_color birth_year sex   gender
##    <chr>     <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
##  1 Luke Sk…    172    77 blond      fair       blue            19   male  mascu…
##  2 C-3PO       167    75 <NA>       gold       yellow         112   none  mascu…
##  3 R2-D2        96    32 <NA>       white, bl… red             33   none  mascu…
##  4 Darth V…    202   136 none       white      yellow          41.9 male  mascu…
##  5 Leia Or…    150    49 brown      light      brown           19   fema… femin…
##  6 Owen La…    178   120 brown, gr… light      blue            52   male  mascu…
##  7 Beru Wh…    165    75 brown      light      blue            47   fema… femin…
##  8 R5-D4        97    32 <NA>       white, red red             NA   none  mascu…
##  9 Biggs D…    183    84 black      light      brown           24   male  mascu…
## 10 Obi-Wan…    182    77 auburn, w… fair       blue-gray       57   male  mascu…
## # ℹ 77 more rows
## # ℹ 4 more variables: homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>
select(starwars, name:sex)
## # A tibble: 87 × 8
##    name            height  mass hair_color skin_color eye_color birth_year sex  
##    <chr>            <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr>
##  1 Luke Skywalker     172    77 blond      fair       blue            19   male 
##  2 C-3PO              167    75 <NA>       gold       yellow         112   none 
##  3 R2-D2               96    32 <NA>       white, bl… red             33   none 
##  4 Darth Vader        202   136 none       white      yellow          41.9 male 
##  5 Leia Organa        150    49 brown      light      brown           19   fema…
##  6 Owen Lars          178   120 brown, gr… light      blue            52   male 
##  7 Beru Whitesun …    165    75 brown      light      blue            47   fema…
##  8 R5-D4               97    32 <NA>       white, red red             NA   none 
##  9 Biggs Darkligh…    183    84 black      light      brown           24   male 
## 10 Obi-Wan Kenobi     182    77 auburn, w… fair       blue-gray       57   male 
## # ℹ 77 more rows

Rename

starwars %>%  
  rename(hair=hair_color, skin=skin_color, eye=eye_color)
## # A tibble: 87 × 14
##    name         height  mass hair  skin  eye   birth_year sex   gender homeworld
##    <chr>         <int> <dbl> <chr> <chr> <chr>      <dbl> <chr> <chr>  <chr>    
##  1 Luke Skywal…    172    77 blond fair  blue        19   male  mascu… Tatooine 
##  2 C-3PO           167    75 <NA>  gold  yell…      112   none  mascu… Tatooine 
##  3 R2-D2            96    32 <NA>  whit… red         33   none  mascu… Naboo    
##  4 Darth Vader     202   136 none  white yell…       41.9 male  mascu… Tatooine 
##  5 Leia Organa     150    49 brown light brown       19   fema… femin… Alderaan 
##  6 Owen Lars       178   120 brow… light blue        52   male  mascu… Tatooine 
##  7 Beru Whites…    165    75 brown light blue        47   fema… femin… Tatooine 
##  8 R5-D4            97    32 <NA>  whit… red         NA   none  mascu… Tatooine 
##  9 Biggs Darkl…    183    84 black light brown       24   male  mascu… Tatooine 
## 10 Obi-Wan Ken…    182    77 aubu… fair  blue…       57   male  mascu… Stewjon  
## # ℹ 77 more rows
## # ℹ 4 more variables: species <chr>, films <list>, vehicles <list>,
## #   starships <list>

Büyük harfle yazmak için rename_with fonksiyonunu kullandım.

starwars %>%  
  rename_with(toupper)
## # A tibble: 87 × 14
##    NAME     HEIGHT  MASS HAIR_COLOR SKIN_COLOR EYE_COLOR BIRTH_YEAR SEX   GENDER
##    <chr>     <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
##  1 Luke Sk…    172    77 blond      fair       blue            19   male  mascu…
##  2 C-3PO       167    75 <NA>       gold       yellow         112   none  mascu…
##  3 R2-D2        96    32 <NA>       white, bl… red             33   none  mascu…
##  4 Darth V…    202   136 none       white      yellow          41.9 male  mascu…
##  5 Leia Or…    150    49 brown      light      brown           19   fema… femin…
##  6 Owen La…    178   120 brown, gr… light      blue            52   male  mascu…
##  7 Beru Wh…    165    75 brown      light      blue            47   fema… femin…
##  8 R5-D4        97    32 <NA>       white, red red             NA   none  mascu…
##  9 Biggs D…    183    84 black      light      brown           24   male  mascu…
## 10 Obi-Wan…    182    77 auburn, w… fair       blue-gray       57   male  mascu…
## # ℹ 77 more rows
## # ℹ 5 more variables: HOMEWORLD <chr>, SPECIES <chr>, FILMS <list>,
## #   VEHICLES <list>, STARSHIPS <list>

Aşağıdaki gibi yapınca height değişkenini en başına aldığını fark ettim.Belli bir değişkeni hem başa almak hem de ismini değiştirmek için kullanılabilir. Fakat yerini değiştirmek istemiyorsak sadece rename ile kullanmak daha mantıklı.

select(starwars, h=height, everything())
## # A tibble: 87 × 14
##        h name       mass hair_color skin_color eye_color birth_year sex   gender
##    <int> <chr>     <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
##  1   172 Luke Sky…    77 blond      fair       blue            19   male  mascu…
##  2   167 C-3PO        75 <NA>       gold       yellow         112   none  mascu…
##  3    96 R2-D2        32 <NA>       white, bl… red             33   none  mascu…
##  4   202 Darth Va…   136 none       white      yellow          41.9 male  mascu…
##  5   150 Leia Org…    49 brown      light      brown           19   fema… femin…
##  6   178 Owen Lars   120 brown, gr… light      blue            52   male  mascu…
##  7   165 Beru Whi…    75 brown      light      blue            47   fema… femin…
##  8    97 R5-D4        32 <NA>       white, red red             NA   none  mascu…
##  9   183 Biggs Da…    84 black      light      brown           24   male  mascu…
## 10   182 Obi-Wan …    77 auburn, w… fair       blue-gray       57   male  mascu…
## # ℹ 77 more rows
## # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>, starships <list>

Filter

Filtreleme yaparken == iki tane eşittir kullanmam gerektiğini fark ettim çünkü tek bir eşittir atama operatörü görevi görüyor. Filtrelemede ise iki tane eşittir filtrelemedeki kontrol eşitliğini sağlamak için kullanılıyor.

starwars %>%  
  filter(homeworld=="Naboo")
## # A tibble: 11 × 14
##    name     height  mass hair_color skin_color eye_color birth_year sex   gender
##    <chr>     <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
##  1 R2-D2        96    32 <NA>       white, bl… red               33 none  mascu…
##  2 Palpati…    170    75 grey       pale       yellow            82 male  mascu…
##  3 Padmé A…    185    45 brown      light      brown             46 fema… femin…
##  4 Jar Jar…    196    66 none       orange     orange            52 male  mascu…
##  5 Roos Ta…    224    82 none       grey       orange            NA male  mascu…
##  6 Rugor N…    206    NA none       green      orange            NA male  mascu…
##  7 Ric Olié    183    NA brown      fair       blue              NA male  mascu…
##  8 Quarsh …    183    NA black      dark       brown             62 male  mascu…
##  9 Gregar …    185    85 black      dark       brown             NA <NA>  <NA>  
## 10 Cordé       157    NA brown      light      brown             NA <NA>  <NA>  
## 11 Dormé       165    NA brown      light      brown             NA fema… femin…
## # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>, starships <list>
filter(starwars, height >=160)
## # A tibble: 68 × 14
##    name     height  mass hair_color skin_color eye_color birth_year sex   gender
##    <chr>     <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
##  1 Luke Sk…    172    77 blond      fair       blue            19   male  mascu…
##  2 C-3PO       167    75 <NA>       gold       yellow         112   none  mascu…
##  3 Darth V…    202   136 none       white      yellow          41.9 male  mascu…
##  4 Owen La…    178   120 brown, gr… light      blue            52   male  mascu…
##  5 Beru Wh…    165    75 brown      light      blue            47   fema… femin…
##  6 Biggs D…    183    84 black      light      brown           24   male  mascu…
##  7 Obi-Wan…    182    77 auburn, w… fair       blue-gray       57   male  mascu…
##  8 Anakin …    188    84 blond      fair       blue            41.9 male  mascu…
##  9 Wilhuff…    180    NA auburn, g… fair       blue            64   male  mascu…
## 10 Chewbac…    228   112 brown      unknown    blue           200   male  mascu…
## # ℹ 58 more rows
## # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>, starships <list>
starwars %>% 
  filter(height <=172, mass >=60)
## # A tibble: 7 × 14
##   name      height  mass hair_color skin_color eye_color birth_year sex   gender
##   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
## 1 Luke Sky…    172    77 blond      fair       blue              19 male  mascu…
## 2 C-3PO        167    75 <NA>       gold       yellow           112 none  mascu…
## 3 Beru Whi…    165    75 brown      light      blue              47 fema… femin…
## 4 Wedge An…    170    77 brown      fair       hazel             21 male  mascu…
## 5 Palpatine    170    75 grey       pale       yellow            82 male  mascu…
## 6 Nien Nunb    160    68 none       grey       black             NA male  mascu…
## 7 Ben Quad…    163    65 none       grey, gre… orange            NA male  mascu…
## # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>, starships <list>

Summarize

starwars %>%  
  summarize(ortalama= mean(height))
## # A tibble: 1 × 1
##   ortalama
##      <dbl>
## 1       NA

Kayıp veri olduğu için ortalama hesaplanmadı. Bu sebeple önce kayıp verileri filtreledim.

starwars %>%  
  filter(!is.na(height)) %>% 
  summarize(ortalama=mean(height))
## # A tibble: 1 × 1
##   ortalama
##      <dbl>
## 1     175.

Group By

starwars %>%
  dplyr::group_by(species) %>% 
  summarize(average_height=mean(height),
            average_mass=mean(mass), na.rm=TRUE)
## # A tibble: 38 × 4
##    species   average_height average_mass na.rm
##    <chr>              <dbl>        <dbl> <lgl>
##  1 Aleena               79            15 TRUE 
##  2 Besalisk            198           102 TRUE 
##  3 Cerean              198            82 TRUE 
##  4 Chagrian            196            NA TRUE 
##  5 Clawdite            168            55 TRUE 
##  6 Droid                NA            NA TRUE 
##  7 Dug                 112            40 TRUE 
##  8 Ewok                 88            20 TRUE 
##  9 Geonosian           183            80 TRUE 
## 10 Gungan              209.           NA TRUE 
## # ℹ 28 more rows

Yukarıdaki tabloda görüldüğü üzere, na.rm=TRUE yazmama rağmen kayıp verileri çıkarmadı. Aksine na.rm diye bir sütun ekledi. Sonradan araştırarak öğrencim ki yanlış yerde na.rm kodunu kullanmışım. Summarize fonksiyonu içerisinde kullandığım için onu bir sütun olarak aldıladı. Değişken içerisindeki kayıp verilerden kurtulmak için aşağıdaki gibi güncelleme yaptım.

starwars %>%
  dplyr::group_by(species) %>% 
  summarize(average_height=mean(height, na.rm=TRUE),
            average_mass=mean(mass, na.rm=TRUE))
## # A tibble: 38 × 3
##    species   average_height average_mass
##    <chr>              <dbl>        <dbl>
##  1 Aleena               79          15  
##  2 Besalisk            198         102  
##  3 Cerean              198          82  
##  4 Chagrian            196         NaN  
##  5 Clawdite            168          55  
##  6 Droid               131.         69.8
##  7 Dug                 112          40  
##  8 Ewok                 88          20  
##  9 Geonosian           183          80  
## 10 Gungan              209.         74  
## # ℹ 28 more rows

Not: NaN, ’de bir sonuç değil anlamında not a number anlamında sanırım.

Arrange

starwars %>%  
  select(homeworld, species, height) %>% 
  filter(species=="Human") %>%
  arrange(desc(height)) %>% 
  filter(!is.na(homeworld))
## # A tibble: 29 × 3
##    homeworld  species height
##    <chr>      <chr>    <int>
##  1 Tatooine   Human      202
##  2 Serenno    Human      193
##  3 Alderaan   Human      191
##  4 Tatooine   Human      188
##  5 Haruun Kal Human      188
##  6 Alderaan   Human      188
##  7 Naboo      Human      185
##  8 Tatooine   Human      183
##  9 Kamino     Human      183
## 10 Naboo      Human      183
## # ℹ 19 more rows

Mutate

starwars %>%  
  mutate("new_var"=height + mass)
## # A tibble: 87 × 15
##    name     height  mass hair_color skin_color eye_color birth_year sex   gender
##    <chr>     <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
##  1 Luke Sk…    172    77 blond      fair       blue            19   male  mascu…
##  2 C-3PO       167    75 <NA>       gold       yellow         112   none  mascu…
##  3 R2-D2        96    32 <NA>       white, bl… red             33   none  mascu…
##  4 Darth V…    202   136 none       white      yellow          41.9 male  mascu…
##  5 Leia Or…    150    49 brown      light      brown           19   fema… femin…
##  6 Owen La…    178   120 brown, gr… light      blue            52   male  mascu…
##  7 Beru Wh…    165    75 brown      light      blue            47   fema… femin…
##  8 R5-D4        97    32 <NA>       white, red red             NA   none  mascu…
##  9 Biggs D…    183    84 black      light      brown           24   male  mascu…
## 10 Obi-Wan…    182    77 auburn, w… fair       blue-gray       57   male  mascu…
## # ℹ 77 more rows
## # ℹ 6 more variables: homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>, starships <list>, new_var <dbl>

Virgülden sonraki basamakları yuvarlamak için mutate fonksiyonu içerisinde round fonksiyonu kullandım.

starwars %>%  
  mutate("new_var"=round(height + mass,digits=0))
## # A tibble: 87 × 15
##    name     height  mass hair_color skin_color eye_color birth_year sex   gender
##    <chr>     <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
##  1 Luke Sk…    172    77 blond      fair       blue            19   male  mascu…
##  2 C-3PO       167    75 <NA>       gold       yellow         112   none  mascu…
##  3 R2-D2        96    32 <NA>       white, bl… red             33   none  mascu…
##  4 Darth V…    202   136 none       white      yellow          41.9 male  mascu…
##  5 Leia Or…    150    49 brown      light      brown           19   fema… femin…
##  6 Owen La…    178   120 brown, gr… light      blue            52   male  mascu…
##  7 Beru Wh…    165    75 brown      light      blue            47   fema… femin…
##  8 R5-D4        97    32 <NA>       white, red red             NA   none  mascu…
##  9 Biggs D…    183    84 black      light      brown           24   male  mascu…
## 10 Obi-Wan…    182    77 auburn, w… fair       blue-gray       57   male  mascu…
## # ℹ 77 more rows
## # ℹ 6 more variables: homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>, starships <list>, new_var <dbl>
starwars %>%  
  mutate("yeni"= paste(homeworld,species, sep=", "))
## # A tibble: 87 × 15
##    name     height  mass hair_color skin_color eye_color birth_year sex   gender
##    <chr>     <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
##  1 Luke Sk…    172    77 blond      fair       blue            19   male  mascu…
##  2 C-3PO       167    75 <NA>       gold       yellow         112   none  mascu…
##  3 R2-D2        96    32 <NA>       white, bl… red             33   none  mascu…
##  4 Darth V…    202   136 none       white      yellow          41.9 male  mascu…
##  5 Leia Or…    150    49 brown      light      brown           19   fema… femin…
##  6 Owen La…    178   120 brown, gr… light      blue            52   male  mascu…
##  7 Beru Wh…    165    75 brown      light      blue            47   fema… femin…
##  8 R5-D4        97    32 <NA>       white, red red             NA   none  mascu…
##  9 Biggs D…    183    84 black      light      brown           24   male  mascu…
## 10 Obi-Wan…    182    77 auburn, w… fair       blue-gray       57   male  mascu…
## # ℹ 77 more rows
## # ℹ 6 more variables: homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>, starships <list>, yeni <chr>

Sadece yeni değişkenin kalmasını istiyorsak transmute fonksiyonunu kullanıyoruz.

starwars %>%  
  transmute("yeni"= paste(homeworld,species, sep=", "))
## # A tibble: 87 × 1
##    yeni           
##    <chr>          
##  1 Tatooine, Human
##  2 Tatooine, Droid
##  3 Naboo, Droid   
##  4 Tatooine, Human
##  5 Alderaan, Human
##  6 Tatooine, Human
##  7 Tatooine, Human
##  8 Tatooine, Droid
##  9 Tatooine, Human
## 10 Stewjon, Human 
## # ℹ 77 more rows