#Cleaning the environment
rm(list=ls())
#Importing Dataset
setwd("/Users/nishantaneja/Desktop/Files_for_R")
sales_order = read.csv("sales_order.csv", stringsAsFactors = FALSE)
#Loading dplyr
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Let’s use glimpse() to evaluate the dataframe we just added to our environment:
glimpse(sales_order)
## Rows: 43
## Columns: 7
## $ OrderDate <chr> "2021-01-06", "2021-01-23", "2021-02-09", "2021-02-26", "202…
## $ Region <chr> "East", "Central", "Central", "Central", "West", "East", "Ce…
## $ Rep <chr> "Jones", "Kivell", "Jardine", "Gill", "Sorvino", "Jones", "A…
## $ Item <chr> "Pencil", "Binder", "Pencil", "Pen", "Pencil", "Binder", "Pe…
## $ Units <int> 95, 50, 36, 27, 56, 60, 75, 90, 32, 60, 90, 29, 81, 35, 2, 1…
## $ Unit.Cost <dbl> 1.99, 19.99, 4.99, 19.99, 2.99, 4.99, 1.99, 4.99, 1.99, 8.99…
## $ Total <dbl> 189.05, 999.50, 179.64, 539.73, 167.44, 299.40, 149.25, 449.…
#We can also use the chaining operator
sales_order %>% glimpse()
## Rows: 43
## Columns: 7
## $ OrderDate <chr> "2021-01-06", "2021-01-23", "2021-02-09", "2021-02-26", "202…
## $ Region <chr> "East", "Central", "Central", "Central", "West", "East", "Ce…
## $ Rep <chr> "Jones", "Kivell", "Jardine", "Gill", "Sorvino", "Jones", "A…
## $ Item <chr> "Pencil", "Binder", "Pencil", "Pen", "Pencil", "Binder", "Pe…
## $ Units <int> 95, 50, 36, 27, 56, 60, 75, 90, 32, 60, 90, 29, 81, 35, 2, 1…
## $ Unit.Cost <dbl> 1.99, 19.99, 4.99, 19.99, 2.99, 4.99, 1.99, 4.99, 1.99, 8.99…
## $ Total <dbl> 189.05, 999.50, 179.64, 539.73, 167.44, 299.40, 149.25, 449.…
Note: dbl stands for double class. A double-precision floating point number.
We can also use str() to evaluate the dataframe as well, str() is part of base R. glimpse() is a more cleaner approach and hence is popular among dplyr users
str(sales_order)
## 'data.frame': 43 obs. of 7 variables:
## $ OrderDate: chr "2021-01-06" "2021-01-23" "2021-02-09" "2021-02-26" ...
## $ Region : chr "East" "Central" "Central" "Central" ...
## $ Rep : chr "Jones" "Kivell" "Jardine" "Gill" ...
## $ Item : chr "Pencil" "Binder" "Pencil" "Pen" ...
## $ Units : int 95 50 36 27 56 60 75 90 32 60 ...
## $ Unit.Cost: num 1.99 19.99 4.99 19.99 2.99 ...
## $ Total : num 189 1000 180 540 167 ...
#We can also use the chaining operator
sales_order %>% str()
## 'data.frame': 43 obs. of 7 variables:
## $ OrderDate: chr "2021-01-06" "2021-01-23" "2021-02-09" "2021-02-26" ...
## $ Region : chr "East" "Central" "Central" "Central" ...
## $ Rep : chr "Jones" "Kivell" "Jardine" "Gill" ...
## $ Item : chr "Pencil" "Binder" "Pencil" "Pen" ...
## $ Units : int 95 50 36 27 56 60 75 90 32 60 ...
## $ Unit.Cost: num 1.99 19.99 4.99 19.99 2.99 ...
## $ Total : num 189 1000 180 540 167 ...