df <- read.csv("Data/Sample - Superstore.csv")
head(df)
##   Row.ID       Order.ID Order.Date  Ship.Date      Ship.Mode Customer.ID
## 1      1 CA-2016-152156  11/8/2016 11/11/2016   Second Class    CG-12520
## 2      2 CA-2016-152156  11/8/2016 11/11/2016   Second Class    CG-12520
## 3      3 CA-2016-138688  6/12/2016  6/16/2016   Second Class    DV-13045
## 4      4 US-2015-108966 10/11/2015 10/18/2015 Standard Class    SO-20335
## 5      5 US-2015-108966 10/11/2015 10/18/2015 Standard Class    SO-20335
## 6      6 CA-2014-115812   6/9/2014  6/14/2014 Standard Class    BH-11710
##     Customer.Name   Segment       Country            City      State
## 1     Claire Gute  Consumer United States       Henderson   Kentucky
## 2     Claire Gute  Consumer United States       Henderson   Kentucky
## 3 Darrin Van Huff Corporate United States     Los Angeles California
## 4  Sean O'Donnell  Consumer United States Fort Lauderdale    Florida
## 5  Sean O'Donnell  Consumer United States Fort Lauderdale    Florida
## 6 Brosina Hoffman  Consumer United States     Los Angeles California
##   Postal.Code Region      Product.ID        Category Sub.Category
## 1       42420  South FUR-BO-10001798       Furniture    Bookcases
## 2       42420  South FUR-CH-10000454       Furniture       Chairs
## 3       90036   West OFF-LA-10000240 Office Supplies       Labels
## 4       33311  South FUR-TA-10000577       Furniture       Tables
## 5       33311  South OFF-ST-10000760 Office Supplies      Storage
## 6       90032   West FUR-FU-10001487       Furniture  Furnishings
##                                                       Product.Name    Sales
## 1                                Bush Somerset Collection Bookcase 261.9600
## 2      Hon Deluxe Fabric Upholstered Stacking Chairs, Rounded Back 731.9400
## 3        Self-Adhesive Address Labels for Typewriters by Universal  14.6200
## 4                    Bretford CR4500 Series Slim Rectangular Table 957.5775
## 5                                   Eldon Fold 'N Roll Cart System  22.3680
## 6 Eldon Expressions Wood and Plastic Desk Accessories, Cherry Wood  48.8600
##   Quantity Discount    Profit
## 1        2     0.00   41.9136
## 2        3     0.00  219.5820
## 3        2     0.00    6.8714
## 4        5     0.45 -383.0310
## 5        2     0.20    2.5164
## 6        7     0.00   14.1694

2.2 Funciones y paquetes de R para análisis de datos

2.2.1 arrange()

# Ordena variables numéricas, tipo texto y fechas
df <- df %>%
  mutate(Order.Date=mdy(Order.Date)) %>%
  mutate(Ship.Date=mdy(Ship.Date)) %>%
  arrange(Order.Date)
head(df)
##   Row.ID       Order.ID Order.Date  Ship.Date      Ship.Mode Customer.ID
## 1   7981 CA-2014-103800 2014-01-03 2014-01-07 Standard Class    DP-13000
## 2    740 CA-2014-112326 2014-01-04 2014-01-08 Standard Class    PO-19195
## 3    741 CA-2014-112326 2014-01-04 2014-01-08 Standard Class    PO-19195
## 4    742 CA-2014-112326 2014-01-04 2014-01-08 Standard Class    PO-19195
## 5   1760 CA-2014-141817 2014-01-05 2014-01-12 Standard Class    MB-18085
## 6   5328 CA-2014-130813 2014-01-06 2014-01-08   Second Class    LS-17230
##      Customer.Name     Segment       Country         City        State
## 1    Darren Powers    Consumer United States      Houston        Texas
## 2    Phillina Ober Home Office United States   Naperville     Illinois
## 3    Phillina Ober Home Office United States   Naperville     Illinois
## 4    Phillina Ober Home Office United States   Naperville     Illinois
## 5       Mick Brown    Consumer United States Philadelphia Pennsylvania
## 6 Lycoris Saunders    Consumer United States  Los Angeles   California
##   Postal.Code  Region      Product.ID        Category Sub.Category
## 1       77095 Central OFF-PA-10000174 Office Supplies        Paper
## 2       60540 Central OFF-LA-10003223 Office Supplies       Labels
## 3       60540 Central OFF-ST-10002743 Office Supplies      Storage
## 4       60540 Central OFF-BI-10004094 Office Supplies      Binders
## 5       19143    East OFF-AR-10003478 Office Supplies          Art
## 6       90049    West OFF-PA-10002005 Office Supplies        Paper
##                                                               Product.Name
## 1 Message Book, Wirebound, Four 5 1/2" X 4" Forms/Pg., 200 Dupl. Sets/Book
## 2                                                                Avery 508
## 3                                            SAFCO Boltless Steel Shelving
## 4                               GBC Standard Plastic Binding Systems Combs
## 5       Avery Hi-Liter EverBold Pen Style Fluorescent Highlighters, 4/Pack
## 6                                                                Xerox 225
##     Sales Quantity Discount   Profit
## 1  16.448        2      0.2   5.5512
## 2  11.784        3      0.2   4.2717
## 3 272.736        3      0.2 -64.7748
## 4   3.540        2      0.8  -5.4870
## 5  19.536        3      0.2   4.8840
## 6  19.440        3      0.0   9.3312

2.2.2 as.character()

df <- df %>%
  mutate(Postal.Code=as.character(Postal.Code))
head(df)
##   Row.ID       Order.ID Order.Date  Ship.Date      Ship.Mode Customer.ID
## 1   7981 CA-2014-103800 2014-01-03 2014-01-07 Standard Class    DP-13000
## 2    740 CA-2014-112326 2014-01-04 2014-01-08 Standard Class    PO-19195
## 3    741 CA-2014-112326 2014-01-04 2014-01-08 Standard Class    PO-19195
## 4    742 CA-2014-112326 2014-01-04 2014-01-08 Standard Class    PO-19195
## 5   1760 CA-2014-141817 2014-01-05 2014-01-12 Standard Class    MB-18085
## 6   5328 CA-2014-130813 2014-01-06 2014-01-08   Second Class    LS-17230
##      Customer.Name     Segment       Country         City        State
## 1    Darren Powers    Consumer United States      Houston        Texas
## 2    Phillina Ober Home Office United States   Naperville     Illinois
## 3    Phillina Ober Home Office United States   Naperville     Illinois
## 4    Phillina Ober Home Office United States   Naperville     Illinois
## 5       Mick Brown    Consumer United States Philadelphia Pennsylvania
## 6 Lycoris Saunders    Consumer United States  Los Angeles   California
##   Postal.Code  Region      Product.ID        Category Sub.Category
## 1       77095 Central OFF-PA-10000174 Office Supplies        Paper
## 2       60540 Central OFF-LA-10003223 Office Supplies       Labels
## 3       60540 Central OFF-ST-10002743 Office Supplies      Storage
## 4       60540 Central OFF-BI-10004094 Office Supplies      Binders
## 5       19143    East OFF-AR-10003478 Office Supplies          Art
## 6       90049    West OFF-PA-10002005 Office Supplies        Paper
##                                                               Product.Name
## 1 Message Book, Wirebound, Four 5 1/2" X 4" Forms/Pg., 200 Dupl. Sets/Book
## 2                                                                Avery 508
## 3                                            SAFCO Boltless Steel Shelving
## 4                               GBC Standard Plastic Binding Systems Combs
## 5       Avery Hi-Liter EverBold Pen Style Fluorescent Highlighters, 4/Pack
## 6                                                                Xerox 225
##     Sales Quantity Discount   Profit
## 1  16.448        2      0.2   5.5512
## 2  11.784        3      0.2   4.2717
## 3 272.736        3      0.2 -64.7748
## 4   3.540        2      0.8  -5.4870
## 5  19.536        3      0.2   4.8840
## 6  19.440        3      0.0   9.3312

2.2.3 as.factor()

# La creación de factores facilita la división
# de la base de datos en grupos. 
# Acelera el cómputo.

df <- df %>%
  mutate(Ship.Mode=as.factor(Ship.Mode)) %>%
  mutate(Segment=as.factor(Segment)) %>%
  mutate(Country=as.factor(Country)) %>%
  mutate(City=as.factor(City)) %>%
  mutate(State=as.factor(State)) %>%
  mutate(Region=as.factor(Region)) %>%
  mutate(Category=as.factor(Category)) %>%
  mutate(Sub.Category=as.factor(Sub.Category))
head(df)
##   Row.ID       Order.ID Order.Date  Ship.Date      Ship.Mode Customer.ID
## 1   7981 CA-2014-103800 2014-01-03 2014-01-07 Standard Class    DP-13000
## 2    740 CA-2014-112326 2014-01-04 2014-01-08 Standard Class    PO-19195
## 3    741 CA-2014-112326 2014-01-04 2014-01-08 Standard Class    PO-19195
## 4    742 CA-2014-112326 2014-01-04 2014-01-08 Standard Class    PO-19195
## 5   1760 CA-2014-141817 2014-01-05 2014-01-12 Standard Class    MB-18085
## 6   5328 CA-2014-130813 2014-01-06 2014-01-08   Second Class    LS-17230
##      Customer.Name     Segment       Country         City        State
## 1    Darren Powers    Consumer United States      Houston        Texas
## 2    Phillina Ober Home Office United States   Naperville     Illinois
## 3    Phillina Ober Home Office United States   Naperville     Illinois
## 4    Phillina Ober Home Office United States   Naperville     Illinois
## 5       Mick Brown    Consumer United States Philadelphia Pennsylvania
## 6 Lycoris Saunders    Consumer United States  Los Angeles   California
##   Postal.Code  Region      Product.ID        Category Sub.Category
## 1       77095 Central OFF-PA-10000174 Office Supplies        Paper
## 2       60540 Central OFF-LA-10003223 Office Supplies       Labels
## 3       60540 Central OFF-ST-10002743 Office Supplies      Storage
## 4       60540 Central OFF-BI-10004094 Office Supplies      Binders
## 5       19143    East OFF-AR-10003478 Office Supplies          Art
## 6       90049    West OFF-PA-10002005 Office Supplies        Paper
##                                                               Product.Name
## 1 Message Book, Wirebound, Four 5 1/2" X 4" Forms/Pg., 200 Dupl. Sets/Book
## 2                                                                Avery 508
## 3                                            SAFCO Boltless Steel Shelving
## 4                               GBC Standard Plastic Binding Systems Combs
## 5       Avery Hi-Liter EverBold Pen Style Fluorescent Highlighters, 4/Pack
## 6                                                                Xerox 225
##     Sales Quantity Discount   Profit
## 1  16.448        2      0.2   5.5512
## 2  11.784        3      0.2   4.2717
## 3 272.736        3      0.2 -64.7748
## 4   3.540        2      0.8  -5.4870
## 5  19.536        3      0.2   4.8840
## 6  19.440        3      0.0   9.3312

2.2.4 as numeric()

df %>%
  select(Sales, Quantity, Discount, Profit) %>%
  mutate_all(as.numeric) %>%
  head(.)  
##     Sales Quantity Discount   Profit
## 1  16.448        2      0.2   5.5512
## 2  11.784        3      0.2   4.2717
## 3 272.736        3      0.2 -64.7748
## 4   3.540        2      0.8  -5.4870
## 5  19.536        3      0.2   4.8840
## 6  19.440        3      0.0   9.3312
# aplica función head a a lo que queda de lo anterior