Basics of R Programming

The four quadrents

RStudio has four main panes each in a quadrant of your screen: Source Editor,Console, Workspace Browser (and History), and Plots (and Files, Packages, Help

How to get help when you need it

It’s prefered to use stack overflow or search online for an answer. You can also use the following command

?mean
## starting httpd help server ... done

Objects and functions

Objects are both an input as well as the output of a function (what the function returns). When passing data to a function, it is usually the first argument, with further arguments used to specify behavior.

3+4 
## [1] 7
a <- 3
b <- 4
sum(a,b)
## [1] 7
concat <- c(a,b)
ages <- c(3,4,5)
friends <- c("John", "fred", "berok") # concatenation and makes it variable
fateafame <- data.frame(ages,friends) # creates data frame
View(fateafame)  # opens table in new tab
str(fateafame) # structure view
## 'data.frame':    3 obs. of  2 variables:
##  $ ages   : num  3 4 5
##  $ friends: chr  "John" "fred" "berok"
fateafame$ages # - $  specifies a subdirectory
## [1] 3 4 5
fateafame[1,1] # selects row 1 column 1
## [1] 3

Built in data sets to practice with

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data() # gives a list of datasets you can use
view(starwars) #loads a dataset

Installing and using packages

install.packages("tidyverse")
## Warning: package 'tidyverse' is in use and will not be installed
library(tidyverse)
library(dplyr)
library(tibble)
starwars %>% 
filter(height > 150 & mass < 200) %>% # filters data
mutate(height_in_meters = height/100) %>% # mutates to create new variable
select (height_in_meters, mass) %>%  # selects what you want
arrange(-mass) %>% # biggest to smallest
View() # views data

Explore

Look at data structure and variables

library(tidyverse)
view(msleep)
glimpse(msleep)
## Rows: 83
## Columns: 11
## $ name         <chr> "Cheetah", "Owl monkey", "Mountain beaver", "Greater shor…
## $ genus        <chr> "Acinonyx", "Aotus", "Aplodontia", "Blarina", "Bos", "Bra…
## $ vore         <chr> "carni", "omni", "herbi", "omni", "herbi", "herbi", "carn…
## $ order        <chr> "Carnivora", "Primates", "Rodentia", "Soricomorpha", "Art…
## $ conservation <chr> "lc", NA, "nt", "lc", "domesticated", NA, "vu", NA, "dome…
## $ sleep_total  <dbl> 12.1, 17.0, 14.4, 14.9, 4.0, 14.4, 8.7, 7.0, 10.1, 3.0, 5…
## $ sleep_rem    <dbl> NA, 1.8, 2.4, 2.3, 0.7, 2.2, 1.4, NA, 2.9, NA, 0.6, 0.8, …
## $ sleep_cycle  <dbl> NA, NA, NA, 0.1333333, 0.6666667, 0.7666667, 0.3833333, N…
## $ awake        <dbl> 11.9, 7.0, 9.6, 9.1, 20.0, 9.6, 15.3, 17.0, 13.9, 21.0, 1…
## $ brainwt      <dbl> NA, 0.01550, NA, 0.00029, 0.42300, NA, NA, NA, 0.07000, 0…
## $ bodywt       <dbl> 50.000, 0.480, 1.350, 0.019, 600.000, 3.850, 20.490, 0.04…
head(msleep)
class(msleep)
## [1] "tbl_df"     "tbl"        "data.frame"
str(msleep)
## tibble [83 × 11] (S3: tbl_df/tbl/data.frame)
##  $ name        : chr [1:83] "Cheetah" "Owl monkey" "Mountain beaver" "Greater short-tailed shrew" ...
##  $ genus       : chr [1:83] "Acinonyx" "Aotus" "Aplodontia" "Blarina" ...
##  $ vore        : chr [1:83] "carni" "omni" "herbi" "omni" ...
##  $ order       : chr [1:83] "Carnivora" "Primates" "Rodentia" "Soricomorpha" ...
##  $ conservation: chr [1:83] "lc" NA "nt" "lc" ...
##  $ sleep_total : num [1:83] 12.1 17 14.4 14.9 4 14.4 8.7 7 10.1 3 ...
##  $ sleep_rem   : num [1:83] NA 1.8 2.4 2.3 0.7 2.2 1.4 NA 2.9 NA ...
##  $ sleep_cycle : num [1:83] NA NA NA 0.133 0.667 ...
##  $ awake       : num [1:83] 11.9 7 9.6 9.1 20 9.6 15.3 17 13.9 21 ...
##  $ brainwt     : num [1:83] NA 0.0155 NA 0.00029 0.423 NA NA NA 0.07 0.0982 ...
##  $ bodywt      : num [1:83] 50 0.48 1.35 0.019 600 ...
length(msleep$name)  # gets #columns or #observations
## [1] 83

Name of Variables

library(tidyverse)
  names(starwars) # gets each unique column name
##  [1] "name"       "height"     "mass"       "hair_color" "skin_color"
##  [6] "eye_color"  "birth_year" "sex"        "gender"     "homeworld" 
## [11] "species"    "films"      "vehicles"   "starships"

Unique categories

library(tidyverse)
unique(msleep$vore) # gets each unique data
## [1] "carni"   "omni"    "herbi"   NA        "insecti"

missing data

library(tidyverse)
  missing <- !complete.cases(starwars$name) # ! means no,

cleaning your data

Select variables

library(tidyverse)
country <- c("asia", "us", "eu")
age <- c(13,16, 111)
name <- c("John","James", "Joe")

df <- data.frame(name, age, country)
  select(df, country,age,name)
  select(df, 1:3)
  select(df, ends_with("e")) %>% 
    print()
##    name age
## 1  John  13
## 2 James  16
## 3   Joe 111

Changing variable order

library(tidyverse)
country <- c("asia", "us", "eu")
age <- c(13,16, 111)
name <- c("John","James", "Joe")

df <- data.frame(name, age, country)

  select(df, name ,age, everything())

Changing variable name

library(tidyverse)
country <- c("asia", "us", "eu")
age <- c(13,16, 111)
name <- c("John","James", "Joe")

df <- data.frame(name, age, country)

  rename(df, "countrys" = "country")

Changing variable type

library(tidyverse) 
class(starwars)
## [1] "tbl_df"     "tbl"        "data.frame"
starwars$name <- as.factor(starwars$name)
 
class(starwars)
## [1] "tbl_df"     "tbl"        "data.frame"
mutate(starwars, name = as.factor(name))
glimpse(starwars)
## Rows: 87
## Columns: 14
## $ name       <fct> Luke Skywalker, C-3PO, R2-D2, Darth Vader, Leia Organa, Owe…
## $ height     <int> 172, 167, 96, 202, 150, 178, 165, 97, 183, 182, 188, 180, 2…
## $ mass       <dbl> 77.0, 75.0, 32.0, 136.0, 49.0, 120.0, 75.0, 32.0, 84.0, 77.…
## $ hair_color <chr> "blond", NA, NA, "none", "brown", "brown, grey", "brown", N…
## $ skin_color <chr> "fair", "gold", "white, blue", "white", "light", "light", "…
## $ eye_color  <chr> "blue", "yellow", "red", "yellow", "brown", "blue", "blue",…
## $ birth_year <dbl> 19.0, 112.0, 33.0, 41.9, 19.0, 52.0, 47.0, NA, 24.0, 57.0, …
## $ sex        <chr> "male", "none", "none", "male", "female", "male", "female",…
## $ gender     <chr> "masculine", "masculine", "masculine", "masculine", "femini…
## $ homeworld  <chr> "Tatooine", "Tatooine", "Naboo", "Tatooine", "Alderaan", "T…
## $ species    <chr> "Human", "Droid", "Droid", "Human", "Human", "Human", "Huma…
## $ films      <list> <"The Empire Strikes Back", "Revenge of the Sith", "Return…
## $ vehicles   <list> <"Snowspeeder", "Imperial Speeder Bike">, <>, <>, <>, "Imp…
## $ starships  <list> <"X-wing", "Imperial shuttle">, <>, <>, "TIE Advanced x1",…

Changing factor levels

Factor in R is a variable used to categorize and store the data, having a limited number of different values.

library(tidyverse) 

levels(starwars$name)
##  [1] "Ackbar"                "Adi Gallia"            "Anakin Skywalker"     
##  [4] "Arvel Crynyd"          "Ayla Secura"           "Bail Prestor Organa"  
##  [7] "Barriss Offee"         "BB8"                   "Ben Quadinaros"       
## [10] "Beru Whitesun lars"    "Bib Fortuna"           "Biggs Darklighter"    
## [13] "Boba Fett"             "Bossk"                 "C-3PO"                
## [16] "Captain Phasma"        "Chewbacca"             "Cliegg Lars"          
## [19] "Cordé"                 "Darth Maul"            "Darth Vader"          
## [22] "Dexter Jettster"       "Dooku"                 "Dormé"                
## [25] "Dud Bolt"              "Eeth Koth"             "Finis Valorum"        
## [28] "Finn"                  "Gasgano"               "Greedo"               
## [31] "Gregar Typho"          "Grievous"              "Han Solo"             
## [34] "IG-88"                 "Jabba Desilijic Tiure" "Jango Fett"           
## [37] "Jar Jar Binks"         "Jek Tono Porkins"      "Jocasta Nu"           
## [40] "Ki-Adi-Mundi"          "Kit Fisto"             "Lama Su"              
## [43] "Lando Calrissian"      "Leia Organa"           "Lobot"                
## [46] "Luke Skywalker"        "Luminara Unduli"       "Mace Windu"           
## [49] "Mas Amedda"            "Mon Mothma"            "Nien Nunb"            
## [52] "Nute Gunray"           "Obi-Wan Kenobi"        "Owen Lars"            
## [55] "Padmé Amidala"         "Palpatine"             "Plo Koon"             
## [58] "Poe Dameron"           "Poggle the Lesser"     "Quarsh Panaka"        
## [61] "Qui-Gon Jinn"          "R2-D2"                 "R4-P17"               
## [64] "R5-D4"                 "Ratts Tyerell"         "Raymus Antilles"      
## [67] "Rey"                   "Ric Olié"              "Roos Tarpals"         
## [70] "Rugor Nass"            "Saesee Tiin"           "San Hill"             
## [73] "Sebulba"               "Shaak Ti"              "Shmi Skywalker"       
## [76] "Sly Moore"             "Tarfful"               "Taun We"              
## [79] "Tion Medon"            "Wat Tambor"            "Watto"                
## [82] "Wedge Antilles"        "Wicket Systri Warrick" "Wilhuff Tarkin"       
## [85] "Yarael Poof"           "Yoda"                  "Zam Wesell"
data <- mutate(starwars, name = factor(name,
                                        levels = c(1,3,2)))

Filter rows

library(tidyverse) 

str(starwars)
## tibble [87 × 14] (S3: tbl_df/tbl/data.frame)
##  $ name      : Factor w/ 87 levels "Ackbar","Adi Gallia",..: 46 15 62 21 44 54 10 64 12 53 ...
##  $ height    : int [1:87] 172 167 96 202 150 178 165 97 183 182 ...
##  $ mass      : num [1:87] 77 75 32 136 49 120 75 32 84 77 ...
##  $ hair_color: chr [1:87] "blond" NA NA "none" ...
##  $ skin_color: chr [1:87] "fair" "gold" "white, blue" "white" ...
##  $ eye_color : chr [1:87] "blue" "yellow" "red" "yellow" ...
##  $ birth_year: num [1:87] 19 112 33 41.9 19 52 47 NA 24 57 ...
##  $ sex       : chr [1:87] "male" "none" "none" "male" ...
##  $ gender    : chr [1:87] "masculine" "masculine" "masculine" "masculine" ...
##  $ homeworld : chr [1:87] "Tatooine" "Tatooine" "Naboo" "Tatooine" ...
##  $ species   : chr [1:87] "Human" "Droid" "Droid" "Human" ...
##  $ films     :List of 87
##   ..$ : chr [1:5] "The Empire Strikes Back" "Revenge of the Sith" "Return of the Jedi" "A New Hope" ...
##   ..$ : chr [1:6] "The Empire Strikes Back" "Attack of the Clones" "The Phantom Menace" "Revenge of the Sith" ...
##   ..$ : chr [1:7] "The Empire Strikes Back" "Attack of the Clones" "The Phantom Menace" "Revenge of the Sith" ...
##   ..$ : chr [1:4] "The Empire Strikes Back" "Revenge of the Sith" "Return of the Jedi" "A New Hope"
##   ..$ : chr [1:5] "The Empire Strikes Back" "Revenge of the Sith" "Return of the Jedi" "A New Hope" ...
##   ..$ : chr [1:3] "Attack of the Clones" "Revenge of the Sith" "A New Hope"
##   ..$ : chr [1:3] "Attack of the Clones" "Revenge of the Sith" "A New Hope"
##   ..$ : chr "A New Hope"
##   ..$ : chr "A New Hope"
##   ..$ : chr [1:6] "The Empire Strikes Back" "Attack of the Clones" "The Phantom Menace" "Revenge of the Sith" ...
##   ..$ : chr [1:3] "Attack of the Clones" "The Phantom Menace" "Revenge of the Sith"
##   ..$ : chr [1:2] "Revenge of the Sith" "A New Hope"
##   ..$ : chr [1:5] "The Empire Strikes Back" "Revenge of the Sith" "Return of the Jedi" "A New Hope" ...
##   ..$ : chr [1:4] "The Empire Strikes Back" "Return of the Jedi" "A New Hope" "The Force Awakens"
##   ..$ : chr "A New Hope"
##   ..$ : chr [1:3] "The Phantom Menace" "Return of the Jedi" "A New Hope"
##   ..$ : chr [1:3] "The Empire Strikes Back" "Return of the Jedi" "A New Hope"
##   ..$ : chr "A New Hope"
##   ..$ : chr [1:5] "The Empire Strikes Back" "Attack of the Clones" "The Phantom Menace" "Revenge of the Sith" ...
##   ..$ : chr [1:5] "The Empire Strikes Back" "Attack of the Clones" "The Phantom Menace" "Revenge of the Sith" ...
##   ..$ : chr [1:3] "The Empire Strikes Back" "Attack of the Clones" "Return of the Jedi"
##   ..$ : chr "The Empire Strikes Back"
##   ..$ : chr "The Empire Strikes Back"
##   ..$ : chr [1:2] "The Empire Strikes Back" "Return of the Jedi"
##   ..$ : chr "The Empire Strikes Back"
##   ..$ : chr [1:2] "Return of the Jedi" "The Force Awakens"
##   ..$ : chr "Return of the Jedi"
##   ..$ : chr "Return of the Jedi"
##   ..$ : chr "Return of the Jedi"
##   ..$ : chr "Return of the Jedi"
##   ..$ : chr "The Phantom Menace"
##   ..$ : chr [1:3] "Attack of the Clones" "The Phantom Menace" "Revenge of the Sith"
##   ..$ : chr "The Phantom Menace"
##   ..$ : chr [1:2] "Attack of the Clones" "The Phantom Menace"
##   ..$ : chr "The Phantom Menace"
##   ..$ : chr "The Phantom Menace"
##   ..$ : chr "The Phantom Menace"
##   ..$ : chr [1:2] "Attack of the Clones" "The Phantom Menace"
##   ..$ : chr "The Phantom Menace"
##   ..$ : chr "The Phantom Menace"
##   ..$ : chr [1:2] "Attack of the Clones" "The Phantom Menace"
##   ..$ : chr "The Phantom Menace"
##   ..$ : chr "Return of the Jedi"
##   ..$ : chr [1:3] "Attack of the Clones" "The Phantom Menace" "Revenge of the Sith"
##   ..$ : chr "The Phantom Menace"
##   ..$ : chr "The Phantom Menace"
##   ..$ : chr "The Phantom Menace"
##   ..$ : chr [1:3] "Attack of the Clones" "The Phantom Menace" "Revenge of the Sith"
##   ..$ : chr [1:3] "Attack of the Clones" "The Phantom Menace" "Revenge of the Sith"
##   ..$ : chr [1:3] "Attack of the Clones" "The Phantom Menace" "Revenge of the Sith"
##   ..$ : chr [1:2] "The Phantom Menace" "Revenge of the Sith"
##   ..$ : chr [1:2] "The Phantom Menace" "Revenge of the Sith"
##   ..$ : chr [1:2] "The Phantom Menace" "Revenge of the Sith"
##   ..$ : chr "The Phantom Menace"
##   ..$ : chr [1:3] "Attack of the Clones" "The Phantom Menace" "Revenge of the Sith"
##   ..$ : chr [1:2] "Attack of the Clones" "The Phantom Menace"
##   ..$ : chr "Attack of the Clones"
##   ..$ : chr "Attack of the Clones"
##   ..$ : chr "Attack of the Clones"
##   ..$ : chr [1:2] "Attack of the Clones" "Revenge of the Sith"
##   ..$ : chr [1:2] "Attack of the Clones" "Revenge of the Sith"
##   ..$ : chr "Attack of the Clones"
##   ..$ : chr "Attack of the Clones"
##   ..$ : chr [1:2] "Attack of the Clones" "Revenge of the Sith"
##   ..$ : chr [1:2] "Attack of the Clones" "Revenge of the Sith"
##   ..$ : chr "Attack of the Clones"
##   ..$ : chr "Attack of the Clones"
##   ..$ : chr "Attack of the Clones"
##   ..$ : chr "Attack of the Clones"
##   ..$ : chr "Attack of the Clones"
##   ..$ : chr "Attack of the Clones"
##   ..$ : chr "The Phantom Menace"
##   ..$ : chr [1:2] "Attack of the Clones" "Revenge of the Sith"
##   ..$ : chr "Attack of the Clones"
##   ..$ : chr "Attack of the Clones"
##   ..$ : chr [1:2] "Attack of the Clones" "Revenge of the Sith"
##   ..$ : chr "Revenge of the Sith"
##   ..$ : chr "Revenge of the Sith"
##   ..$ : chr [1:2] "Revenge of the Sith" "A New Hope"
##   ..$ : chr [1:2] "Attack of the Clones" "Revenge of the Sith"
##   ..$ : chr "Revenge of the Sith"
##   ..$ : chr "The Force Awakens"
##   ..$ : chr "The Force Awakens"
##   ..$ : chr "The Force Awakens"
##   ..$ : chr "The Force Awakens"
##   ..$ : chr "The Force Awakens"
##   ..$ : chr [1:3] "Attack of the Clones" "The Phantom Menace" "Revenge of the Sith"
##  $ vehicles  :List of 87
##   ..$ : chr [1:2] "Snowspeeder" "Imperial Speeder Bike"
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr "Imperial Speeder Bike"
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr "Tribubble bongo"
##   ..$ : chr [1:2] "Zephyr-G swoop bike" "XJ-6 airspeeder"
##   ..$ : chr(0) 
##   ..$ : chr "AT-ST"
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr "Snowspeeder"
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr "Tribubble bongo"
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr "Sith speeder"
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr "Flitknot speeder"
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr "Koro-2 Exodrive airspeeder"
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr "Tsmeu-6 personal wheel bike"
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##  $ starships :List of 87
##   ..$ : chr [1:2] "X-wing" "Imperial shuttle"
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr "TIE Advanced x1"
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr "X-wing"
##   ..$ : chr [1:5] "Jedi starfighter" "Trade Federation cruiser" "Naboo star skiff" "Jedi Interceptor" ...
##   ..$ : chr [1:3] "Trade Federation cruiser" "Jedi Interceptor" "Naboo fighter"
##   ..$ : chr(0) 
##   ..$ : chr [1:2] "Millennium Falcon" "Imperial shuttle"
##   ..$ : chr [1:2] "Millennium Falcon" "Imperial shuttle"
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr "X-wing"
##   ..$ : chr "X-wing"
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr "Slave 1"
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr "Millennium Falcon"
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr "A-wing"
##   ..$ : chr(0) 
##   ..$ : chr "Millennium Falcon"
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr "Naboo Royal Starship"
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr "Scimitar"
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr "Jedi starfighter"
##   ..$ : chr(0) 
##   ..$ : chr "Naboo fighter"
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr "Belbullab-22 starfighter"
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr "T-70 X-wing fighter"
##   ..$ : chr(0) 
##   ..$ : chr(0) 
##   ..$ : chr [1:3] "H-type Nubian yacht" "Naboo star skiff" "Naboo fighter"
select(starwars, height) # filters out columns you want
filter(starwars, height == 172) # filters out rows/observations
   ## ==  is where it is the case, not a statement

Recode data

library(tidyverse) 

starwars %>% 
  select(sex) %>%
  mutate(sex = recode(sex,"male" = "man","female" = "women"))

Dealing with missing data

library(tidyverse) 

mean(starwars$height, na.rm = TRUE) # na.rm removes na values
## [1] 174.358

Dealing with duplicates

library(tidyverse) 

names <- c("John", "james", "joe", "John")
ages <- c(12,22,22,12)
  
df <- data.frame(names,ages)
  
distinct(df)
data.frame(names, ages)

Manipulating data

Create or change a variable (Mutate)

library(tidyverse)

starwars %>% 
  mutate(heightm = height/100) %>% 
  select(name,height,heightm)

Conditional changes (if_else)

library(tidyverse)
starwars %>% 
  mutate(heightm = height/100) %>% 
  select(name, height, heightm) %>% 
  mutate(tallness = if_else(heightm < 1, "Short", "Tall"))

Reshape data with pivot wider

library(gapminder)
 
names(gapminder)
## [1] "country"   "continent" "year"      "lifeExp"   "pop"       "gdpPercap"
gapdata <- select(gapminder, country, year, lifeExp)
  
wide_data <- gapdata %>% 
  pivot_wider(names_from = year, values_from = lifeExp)
view(wide_data)

Reshape data with pivot longer

library(tidyverse)
longer_data <- wide_data %>% 
  pivot_longer(2:13, names_to = "year", values_to = "lifeExp")
view(longer_data)

Describe your data

library(tidyverse)
view(msleep)

Range / Spread

library(tidyverse)
df <- c(1,5,161,2,6,2,1,6,6,6,2,3,3,4)  

min(df)
## [1] 1
max(df)
## [1] 161
range(df)
## [1]   1 161
IQR(df)
## [1] 4

Centrality

library(tidyverse)
df <- c(1,5,161,2,6,2,1,6,6,6,2,3,3,4)  

mean(df)
## [1] 14.85714
median(df)
## [1] 3.5
mode(df)
## [1] "numeric"

Variance

library(tidyverse)
df <- c(1,5,161,2,6,2,1,6,6,6,2,3,3,4) 
var(df)
## [1] 1772.901

Summarize your data

library(tidyverse)

msleep %>% 
  drop_na(vore) %>% 
  summarise(lower = max(sleep_total),
  average = mean(sleep_total),
  upper = max(sleep_total),diference = max(sleep_total)-min(sleep_total)) %>% 
  arrange(average) %>% 
  view()

Create tables

library(tidyverse)

table(msleep$vore)
## 
##   carni   herbi insecti    omni 
##      19      32       5      20
msleep %>% 
select(vore, order) %>% 
filter(order %in% c("Rodentia", "Primates")) %>% 
table()
##        order
## vore    Primates Rodentia
##   carni        1        1
##   herbi        1       16
##   omni        10        2

Visualize

library(tidyverse)
plot(pressure)

The grammar of graphics

 data
 mapping
 geometry

Bar plots

library(tidyverse)

ggplot(data = starwars, mapping = aes(x = gender))+
  geom_bar()

library(tidyverse)
starwars %>% 
  drop_na() %>% 
  ggplot(mapping = aes(x = gender))+
  geom_bar()

Histograms

library(tidyverse)
starwars %>% 
  drop_na(height) %>% 
  ggplot(aes(x = height)) +
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#### Box plots

library(tidyverse)
starwars %>% 
  drop_na(height) %>% 
  ggplot(aes(height)) +
  geom_boxplot(fill = "steelblue")+
  theme_bw()+
  labs(title = "box of height", x = "Height of Characters")

#### Density plots

library(tidyverse)

starwars %>% 
  drop_na(height) %>% 
  filter(sex %in% c("male", "female")) %>% 
  ggplot(aes(height, color = sex, fill = sex))+
  geom_density(alpha = 0.2)+
  theme_bw()

#### Scatter plots

library(tidyverse)

starwars %>% 
  filter(mass < 200) %>% 
  ggplot(aes(height, mass, color = sex))+
  geom_point(size = 5, alpha = 0.5)+
  theme_bw()+
  labs(title = "Height and mass by sex")

#### Smooth models

library(tidyverse)
starwars %>% 
  filter(mass < 200) %>% 
  ggplot(aes(height, mass, color = sex))+
  geom_point(size = 5, alpha = 0.5)+
  geom_smooth() +
  facet_wrap(~sex)+
  theme_bw()+
  labs(title = "height/mass by sex")
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : span too small.  fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at 95.48
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 71.52
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 10716
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : span too small.  fewer
## data values than degrees of freedom.
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
## 95.48
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 71.52
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
## number 0
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other near
## singularities as well. 10716
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

### Analyze

Hypothesis testing, t test

library(tidyverse)
library(gapminder)
gapminder %>% 
  filter(continent %in% c("Africa", "Europe")) %>% 
  t.test(lifeExp ~ continent, data = .,   # . means put the piped output here
  alternative = "two.sided", # decides a bias, testing against one or other
  paired = FALSE) # means the observations are not paired
## 
##  Welch Two Sample t-test
## 
## data:  lifeExp by continent
## t = -49.551, df = 981.2, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group Africa and group Europe is not equal to 0
## 95 percent confidence interval:
##  -23.95076 -22.12595
## sample estimates:
## mean in group Africa mean in group Europe 
##             48.86533             71.90369

Anova

library(tidyverse)
library(gapminder)
gapminder %>% 
  filter(year == 2007) %>% 
  filter(continent %in% c("Americas", "Europe", "Asia")) %>% 
  aov(lifeExp ~ continent, data = .) %>% 
  TukeyHSD()
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = lifeExp ~ continent, data = .)
## 
## $continent
##                      diff        lwr        upr     p adj
## Asia-Americas   -2.879635 -6.4839802  0.7247099 0.1432634
## Europe-Americas  4.040480  0.3592746  7.7216854 0.0279460
## Europe-Asia      6.920115  3.4909215 10.3493088 0.0000189
  # https://stats.stackexchange.com/questions/253588/interpreting-tukeyhsd-out#put-in-r

Chi Squared

library(tidyverse)
cars %>% 
table() %>% 
chisq.test()
## Warning in chisq.test(.): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  .
## X-squared = 636.94, df = 612, p-value = 0.235

Linear Model

library(tidyverse)
cars %>% 
lm(dist ~ speed, data = .) %>% 
plot()