Bu kısımda da veri düzenlemede kullanılan birkaç fonksiyonu denedim.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data(starwars)
starwars
## # A tibble: 87 × 14
## name height mass hair_color skin_color eye_color birth_year sex gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 Luke Sk… 172 77 blond fair blue 19 male mascu…
## 2 C-3PO 167 75 <NA> gold yellow 112 none mascu…
## 3 R2-D2 96 32 <NA> white, bl… red 33 none mascu…
## 4 Darth V… 202 136 none white yellow 41.9 male mascu…
## 5 Leia Or… 150 49 brown light brown 19 fema… femin…
## 6 Owen La… 178 120 brown, gr… light blue 52 male mascu…
## 7 Beru Wh… 165 75 brown light blue 47 fema… femin…
## 8 R5-D4 97 32 <NA> white, red red NA none mascu…
## 9 Biggs D… 183 84 black light brown 24 male mascu…
## 10 Obi-Wan… 182 77 auburn, w… fair blue-gray 57 male mascu…
## # ℹ 77 more rows
## # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
starwars %>%
dplyr::select(starts_with("m"))
## # A tibble: 87 × 1
## mass
## <dbl>
## 1 77
## 2 75
## 3 32
## 4 136
## 5 49
## 6 120
## 7 75
## 8 32
## 9 84
## 10 77
## # ℹ 77 more rows
starwars %>%
select(contains("h"))
## # A tibble: 87 × 6
## height hair_color birth_year homeworld vehicles starships
## <int> <chr> <dbl> <chr> <list> <list>
## 1 172 blond 19 Tatooine <chr [2]> <chr [2]>
## 2 167 <NA> 112 Tatooine <chr [0]> <chr [0]>
## 3 96 <NA> 33 Naboo <chr [0]> <chr [0]>
## 4 202 none 41.9 Tatooine <chr [0]> <chr [1]>
## 5 150 brown 19 Alderaan <chr [1]> <chr [0]>
## 6 178 brown, grey 52 Tatooine <chr [0]> <chr [0]>
## 7 165 brown 47 Tatooine <chr [0]> <chr [0]>
## 8 97 <NA> NA Tatooine <chr [0]> <chr [0]>
## 9 183 black 24 Tatooine <chr [0]> <chr [1]>
## 10 182 auburn, white 57 Stewjon <chr [1]> <chr [5]>
## # ℹ 77 more rows
starwars %>%
select(-last_col())
## # A tibble: 87 × 13
## name height mass hair_color skin_color eye_color birth_year sex gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 Luke Sk… 172 77 blond fair blue 19 male mascu…
## 2 C-3PO 167 75 <NA> gold yellow 112 none mascu…
## 3 R2-D2 96 32 <NA> white, bl… red 33 none mascu…
## 4 Darth V… 202 136 none white yellow 41.9 male mascu…
## 5 Leia Or… 150 49 brown light brown 19 fema… femin…
## 6 Owen La… 178 120 brown, gr… light blue 52 male mascu…
## 7 Beru Wh… 165 75 brown light blue 47 fema… femin…
## 8 R5-D4 97 32 <NA> white, red red NA none mascu…
## 9 Biggs D… 183 84 black light brown 24 male mascu…
## 10 Obi-Wan… 182 77 auburn, w… fair blue-gray 57 male mascu…
## # ℹ 77 more rows
## # ℹ 4 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>
select(starwars, name:sex)
## # A tibble: 87 × 8
## name height mass hair_color skin_color eye_color birth_year sex
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 Luke Skywalker 172 77 blond fair blue 19 male
## 2 C-3PO 167 75 <NA> gold yellow 112 none
## 3 R2-D2 96 32 <NA> white, bl… red 33 none
## 4 Darth Vader 202 136 none white yellow 41.9 male
## 5 Leia Organa 150 49 brown light brown 19 fema…
## 6 Owen Lars 178 120 brown, gr… light blue 52 male
## 7 Beru Whitesun … 165 75 brown light blue 47 fema…
## 8 R5-D4 97 32 <NA> white, red red NA none
## 9 Biggs Darkligh… 183 84 black light brown 24 male
## 10 Obi-Wan Kenobi 182 77 auburn, w… fair blue-gray 57 male
## # ℹ 77 more rows
starwars %>%
rename(hair=hair_color, skin=skin_color, eye=eye_color)
## # A tibble: 87 × 14
## name height mass hair skin eye birth_year sex gender homeworld
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr> <chr>
## 1 Luke Skywal… 172 77 blond fair blue 19 male mascu… Tatooine
## 2 C-3PO 167 75 <NA> gold yell… 112 none mascu… Tatooine
## 3 R2-D2 96 32 <NA> whit… red 33 none mascu… Naboo
## 4 Darth Vader 202 136 none white yell… 41.9 male mascu… Tatooine
## 5 Leia Organa 150 49 brown light brown 19 fema… femin… Alderaan
## 6 Owen Lars 178 120 brow… light blue 52 male mascu… Tatooine
## 7 Beru Whites… 165 75 brown light blue 47 fema… femin… Tatooine
## 8 R5-D4 97 32 <NA> whit… red NA none mascu… Tatooine
## 9 Biggs Darkl… 183 84 black light brown 24 male mascu… Tatooine
## 10 Obi-Wan Ken… 182 77 aubu… fair blue… 57 male mascu… Stewjon
## # ℹ 77 more rows
## # ℹ 4 more variables: species <chr>, films <list>, vehicles <list>,
## # starships <list>
Büyük harfle yazmak için rename_with fonksiyonunu kullandım.
starwars %>%
rename_with(toupper)
## # A tibble: 87 × 14
## NAME HEIGHT MASS HAIR_COLOR SKIN_COLOR EYE_COLOR BIRTH_YEAR SEX GENDER
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 Luke Sk… 172 77 blond fair blue 19 male mascu…
## 2 C-3PO 167 75 <NA> gold yellow 112 none mascu…
## 3 R2-D2 96 32 <NA> white, bl… red 33 none mascu…
## 4 Darth V… 202 136 none white yellow 41.9 male mascu…
## 5 Leia Or… 150 49 brown light brown 19 fema… femin…
## 6 Owen La… 178 120 brown, gr… light blue 52 male mascu…
## 7 Beru Wh… 165 75 brown light blue 47 fema… femin…
## 8 R5-D4 97 32 <NA> white, red red NA none mascu…
## 9 Biggs D… 183 84 black light brown 24 male mascu…
## 10 Obi-Wan… 182 77 auburn, w… fair blue-gray 57 male mascu…
## # ℹ 77 more rows
## # ℹ 5 more variables: HOMEWORLD <chr>, SPECIES <chr>, FILMS <list>,
## # VEHICLES <list>, STARSHIPS <list>
Aşağıdaki gibi yapınca height değişkenini en başına aldığını fark ettim.Belli bir değişkeni hem başa almak hem de ismini değiştirmek için kullanılabilir. Fakat yerini değiştirmek istemiyorsak sadece rename ile kullanmak daha mantıklı.
select(starwars, h=height, everything())
## # A tibble: 87 × 14
## h name mass hair_color skin_color eye_color birth_year sex gender
## <int> <chr> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 172 Luke Sky… 77 blond fair blue 19 male mascu…
## 2 167 C-3PO 75 <NA> gold yellow 112 none mascu…
## 3 96 R2-D2 32 <NA> white, bl… red 33 none mascu…
## 4 202 Darth Va… 136 none white yellow 41.9 male mascu…
## 5 150 Leia Org… 49 brown light brown 19 fema… femin…
## 6 178 Owen Lars 120 brown, gr… light blue 52 male mascu…
## 7 165 Beru Whi… 75 brown light blue 47 fema… femin…
## 8 97 R5-D4 32 <NA> white, red red NA none mascu…
## 9 183 Biggs Da… 84 black light brown 24 male mascu…
## 10 182 Obi-Wan … 77 auburn, w… fair blue-gray 57 male mascu…
## # ℹ 77 more rows
## # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
Filtreleme yaparken == iki tane eşittir kullanmam gerektiğini fark ettim çünkü tek bir eşittir atama operatörü görevi görüyor. Filtrelemede ise iki tane eşittir filtrelemedeki kontrol eşitliğini sağlamak için kullanılıyor.
starwars %>%
filter(homeworld=="Naboo")
## # A tibble: 11 × 14
## name height mass hair_color skin_color eye_color birth_year sex gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 R2-D2 96 32 <NA> white, bl… red 33 none mascu…
## 2 Palpati… 170 75 grey pale yellow 82 male mascu…
## 3 Padmé A… 185 45 brown light brown 46 fema… femin…
## 4 Jar Jar… 196 66 none orange orange 52 male mascu…
## 5 Roos Ta… 224 82 none grey orange NA male mascu…
## 6 Rugor N… 206 NA none green orange NA male mascu…
## 7 Ric Olié 183 NA brown fair blue NA male mascu…
## 8 Quarsh … 183 NA black dark brown 62 male mascu…
## 9 Gregar … 185 85 black dark brown NA <NA> <NA>
## 10 Cordé 157 NA brown light brown NA <NA> <NA>
## 11 Dormé 165 NA brown light brown NA fema… femin…
## # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
filter(starwars, height >=160)
## # A tibble: 68 × 14
## name height mass hair_color skin_color eye_color birth_year sex gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 Luke Sk… 172 77 blond fair blue 19 male mascu…
## 2 C-3PO 167 75 <NA> gold yellow 112 none mascu…
## 3 Darth V… 202 136 none white yellow 41.9 male mascu…
## 4 Owen La… 178 120 brown, gr… light blue 52 male mascu…
## 5 Beru Wh… 165 75 brown light blue 47 fema… femin…
## 6 Biggs D… 183 84 black light brown 24 male mascu…
## 7 Obi-Wan… 182 77 auburn, w… fair blue-gray 57 male mascu…
## 8 Anakin … 188 84 blond fair blue 41.9 male mascu…
## 9 Wilhuff… 180 NA auburn, g… fair blue 64 male mascu…
## 10 Chewbac… 228 112 brown unknown blue 200 male mascu…
## # ℹ 58 more rows
## # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
starwars %>%
filter(height <=172, mass >=60)
## # A tibble: 7 × 14
## name height mass hair_color skin_color eye_color birth_year sex gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 Luke Sky… 172 77 blond fair blue 19 male mascu…
## 2 C-3PO 167 75 <NA> gold yellow 112 none mascu…
## 3 Beru Whi… 165 75 brown light blue 47 fema… femin…
## 4 Wedge An… 170 77 brown fair hazel 21 male mascu…
## 5 Palpatine 170 75 grey pale yellow 82 male mascu…
## 6 Nien Nunb 160 68 none grey black NA male mascu…
## 7 Ben Quad… 163 65 none grey, gre… orange NA male mascu…
## # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
starwars %>%
summarize(ortalama= mean(height))
## # A tibble: 1 × 1
## ortalama
## <dbl>
## 1 NA
Kayıp veri olduğu için ortalama hesaplanmadı. Bu sebeple önce kayıp verileri filtreledim.
starwars %>%
filter(!is.na(height)) %>%
summarize(ortalama=mean(height))
## # A tibble: 1 × 1
## ortalama
## <dbl>
## 1 175.
starwars %>%
dplyr::group_by(species) %>%
summarize(average_height=mean(height),
average_mass=mean(mass), na.rm=TRUE)
## # A tibble: 38 × 4
## species average_height average_mass na.rm
## <chr> <dbl> <dbl> <lgl>
## 1 Aleena 79 15 TRUE
## 2 Besalisk 198 102 TRUE
## 3 Cerean 198 82 TRUE
## 4 Chagrian 196 NA TRUE
## 5 Clawdite 168 55 TRUE
## 6 Droid NA NA TRUE
## 7 Dug 112 40 TRUE
## 8 Ewok 88 20 TRUE
## 9 Geonosian 183 80 TRUE
## 10 Gungan 209. NA TRUE
## # ℹ 28 more rows
Yukarıdaki tabloda görüldüğü üzere, na.rm=TRUE yazmama rağmen kayıp verileri çıkarmadı. Aksine na.rm diye bir sütun ekledi. Sonradan araştırarak öğrencim ki yanlış yerde na.rm kodunu kullanmışım. Summarize fonksiyonu içerisinde kullandığım için onu bir sütun olarak aldıladı. Değişken içerisindeki kayıp verilerden kurtulmak için aşağıdaki gibi güncelleme yaptım.
starwars %>%
dplyr::group_by(species) %>%
summarize(average_height=mean(height, na.rm=TRUE),
average_mass=mean(mass, na.rm=TRUE))
## # A tibble: 38 × 3
## species average_height average_mass
## <chr> <dbl> <dbl>
## 1 Aleena 79 15
## 2 Besalisk 198 102
## 3 Cerean 198 82
## 4 Chagrian 196 NaN
## 5 Clawdite 168 55
## 6 Droid 131. 69.8
## 7 Dug 112 40
## 8 Ewok 88 20
## 9 Geonosian 183 80
## 10 Gungan 209. 74
## # ℹ 28 more rows
Not: NaN, ’de bir sonuç değil anlamında not a number anlamında sanırım.
starwars %>%
select(homeworld, species, height) %>%
filter(species=="Human") %>%
arrange(desc(height)) %>%
filter(!is.na(homeworld))
## # A tibble: 29 × 3
## homeworld species height
## <chr> <chr> <int>
## 1 Tatooine Human 202
## 2 Serenno Human 193
## 3 Alderaan Human 191
## 4 Tatooine Human 188
## 5 Haruun Kal Human 188
## 6 Alderaan Human 188
## 7 Naboo Human 185
## 8 Tatooine Human 183
## 9 Kamino Human 183
## 10 Naboo Human 183
## # ℹ 19 more rows
starwars %>%
mutate("new_var"=height + mass)
## # A tibble: 87 × 15
## name height mass hair_color skin_color eye_color birth_year sex gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 Luke Sk… 172 77 blond fair blue 19 male mascu…
## 2 C-3PO 167 75 <NA> gold yellow 112 none mascu…
## 3 R2-D2 96 32 <NA> white, bl… red 33 none mascu…
## 4 Darth V… 202 136 none white yellow 41.9 male mascu…
## 5 Leia Or… 150 49 brown light brown 19 fema… femin…
## 6 Owen La… 178 120 brown, gr… light blue 52 male mascu…
## 7 Beru Wh… 165 75 brown light blue 47 fema… femin…
## 8 R5-D4 97 32 <NA> white, red red NA none mascu…
## 9 Biggs D… 183 84 black light brown 24 male mascu…
## 10 Obi-Wan… 182 77 auburn, w… fair blue-gray 57 male mascu…
## # ℹ 77 more rows
## # ℹ 6 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>, new_var <dbl>
Virgülden sonraki basamakları yuvarlamak için mutate fonksiyonu içerisinde round fonksiyonu kullandım.
starwars %>%
mutate("new_var"=round(height + mass,digits=0))
## # A tibble: 87 × 15
## name height mass hair_color skin_color eye_color birth_year sex gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 Luke Sk… 172 77 blond fair blue 19 male mascu…
## 2 C-3PO 167 75 <NA> gold yellow 112 none mascu…
## 3 R2-D2 96 32 <NA> white, bl… red 33 none mascu…
## 4 Darth V… 202 136 none white yellow 41.9 male mascu…
## 5 Leia Or… 150 49 brown light brown 19 fema… femin…
## 6 Owen La… 178 120 brown, gr… light blue 52 male mascu…
## 7 Beru Wh… 165 75 brown light blue 47 fema… femin…
## 8 R5-D4 97 32 <NA> white, red red NA none mascu…
## 9 Biggs D… 183 84 black light brown 24 male mascu…
## 10 Obi-Wan… 182 77 auburn, w… fair blue-gray 57 male mascu…
## # ℹ 77 more rows
## # ℹ 6 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>, new_var <dbl>
starwars %>%
mutate("yeni"= paste(homeworld,species, sep=", "))
## # A tibble: 87 × 15
## name height mass hair_color skin_color eye_color birth_year sex gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 Luke Sk… 172 77 blond fair blue 19 male mascu…
## 2 C-3PO 167 75 <NA> gold yellow 112 none mascu…
## 3 R2-D2 96 32 <NA> white, bl… red 33 none mascu…
## 4 Darth V… 202 136 none white yellow 41.9 male mascu…
## 5 Leia Or… 150 49 brown light brown 19 fema… femin…
## 6 Owen La… 178 120 brown, gr… light blue 52 male mascu…
## 7 Beru Wh… 165 75 brown light blue 47 fema… femin…
## 8 R5-D4 97 32 <NA> white, red red NA none mascu…
## 9 Biggs D… 183 84 black light brown 24 male mascu…
## 10 Obi-Wan… 182 77 auburn, w… fair blue-gray 57 male mascu…
## # ℹ 77 more rows
## # ℹ 6 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>, yeni <chr>
Sadece yeni değişkenin kalmasını istiyorsak transmute fonksiyonunu kullanıyoruz.
starwars %>%
transmute("yeni"= paste(homeworld,species, sep=", "))
## # A tibble: 87 × 1
## yeni
## <chr>
## 1 Tatooine, Human
## 2 Tatooine, Droid
## 3 Naboo, Droid
## 4 Tatooine, Human
## 5 Alderaan, Human
## 6 Tatooine, Human
## 7 Tatooine, Human
## 8 Tatooine, Droid
## 9 Tatooine, Human
## 10 Stewjon, Human
## # ℹ 77 more rows