Data

# data()
#iris
data("iris")
head(iris)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#? view
#? structure

Structure

str(iris)
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
summary(iris)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 

Iris Row & Coloumn Structure

# data[rows , coloumn]

iris [ 1:50 , 1:4]
# starwars select : 1st 1-50 rows, 1st 1-3

Starwars Row & Coloumn Structure

# data[rows , coloumn]

View(starwars)

# starwars select : 1st 1-50 rows, 1st 1-3

starwars [ 1:50 , 1:3]

Starwars filter options

starwars %>% 
  filter(species == "Human")
starwars %>% 
  filter(
    homeworld %in% c("Tatooine", "Naboo")
  )
# And
starwars %>% 
  filter(
    species == "Human" & homeworld == "Tatooine"
    )
# Or
starwars %>% 
  filter(
    species == "Human" | homeworld == "Tatooine"
    )

New Variable Create

# bmi = mass / height ^ 2

df <- starwars %>% 
  mutate(
    bmi = (mass / (height/100)^2)
  )

df
df1 <- iris %>% 
  mutate(
    area = (Petal.Width * Petal.Length)
  )

df1 
starwars %>% 
  mutate(
    bmi = (mass / (height/100)^2),
    height_m = height/100
  )

Missing Value

library(tidyr)

df$bmi <- df$bmi %>% 
  replace_na(0)
  
df
df$bmi <- df$bmi %>% 
  replace_na(
    mean(df$bmi, na.rm = TRUE)
  )
  
df