#Cleaning the environment

rm(list=ls())

#Importing Dataset

setwd("/Users/nishantaneja/Desktop/Files_for_R")
sales_order = read.csv("sales_order.csv", stringsAsFactors = FALSE)

#Loading dplyr

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Let’s use glimpse() to evaluate the dataframe we just added to our environment:

glimpse(sales_order)
## Rows: 43
## Columns: 7
## $ OrderDate <chr> "2021-01-06", "2021-01-23", "2021-02-09", "2021-02-26", "202…
## $ Region    <chr> "East", "Central", "Central", "Central", "West", "East", "Ce…
## $ Rep       <chr> "Jones", "Kivell", "Jardine", "Gill", "Sorvino", "Jones", "A…
## $ Item      <chr> "Pencil", "Binder", "Pencil", "Pen", "Pencil", "Binder", "Pe…
## $ Units     <int> 95, 50, 36, 27, 56, 60, 75, 90, 32, 60, 90, 29, 81, 35, 2, 1…
## $ Unit.Cost <dbl> 1.99, 19.99, 4.99, 19.99, 2.99, 4.99, 1.99, 4.99, 1.99, 8.99…
## $ Total     <dbl> 189.05, 999.50, 179.64, 539.73, 167.44, 299.40, 149.25, 449.…
#We can also use the chaining operator
sales_order %>% glimpse()
## Rows: 43
## Columns: 7
## $ OrderDate <chr> "2021-01-06", "2021-01-23", "2021-02-09", "2021-02-26", "202…
## $ Region    <chr> "East", "Central", "Central", "Central", "West", "East", "Ce…
## $ Rep       <chr> "Jones", "Kivell", "Jardine", "Gill", "Sorvino", "Jones", "A…
## $ Item      <chr> "Pencil", "Binder", "Pencil", "Pen", "Pencil", "Binder", "Pe…
## $ Units     <int> 95, 50, 36, 27, 56, 60, 75, 90, 32, 60, 90, 29, 81, 35, 2, 1…
## $ Unit.Cost <dbl> 1.99, 19.99, 4.99, 19.99, 2.99, 4.99, 1.99, 4.99, 1.99, 8.99…
## $ Total     <dbl> 189.05, 999.50, 179.64, 539.73, 167.44, 299.40, 149.25, 449.…

Note: dbl stands for double class. A double-precision floating point number.

We can also use str() to evaluate the dataframe as well, str() is part of base R. glimpse() is a more cleaner approach and hence is popular among dplyr users

str(sales_order)
## 'data.frame':    43 obs. of  7 variables:
##  $ OrderDate: chr  "2021-01-06" "2021-01-23" "2021-02-09" "2021-02-26" ...
##  $ Region   : chr  "East" "Central" "Central" "Central" ...
##  $ Rep      : chr  "Jones" "Kivell" "Jardine" "Gill" ...
##  $ Item     : chr  "Pencil" "Binder" "Pencil" "Pen" ...
##  $ Units    : int  95 50 36 27 56 60 75 90 32 60 ...
##  $ Unit.Cost: num  1.99 19.99 4.99 19.99 2.99 ...
##  $ Total    : num  189 1000 180 540 167 ...
#We can also use the chaining operator
sales_order %>%  str()
## 'data.frame':    43 obs. of  7 variables:
##  $ OrderDate: chr  "2021-01-06" "2021-01-23" "2021-02-09" "2021-02-26" ...
##  $ Region   : chr  "East" "Central" "Central" "Central" ...
##  $ Rep      : chr  "Jones" "Kivell" "Jardine" "Gill" ...
##  $ Item     : chr  "Pencil" "Binder" "Pencil" "Pen" ...
##  $ Units    : int  95 50 36 27 56 60 75 90 32 60 ...
##  $ Unit.Cost: num  1.99 19.99 4.99 19.99 2.99 ...
##  $ Total    : num  189 1000 180 540 167 ...