df <- read.csv("Sample - Superstore.csv")
df
str(df)
## 'data.frame': 9994 obs. of 21 variables:
## $ Row.ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Order.ID : chr "CA-2016-152156" "CA-2016-152156" "CA-2016-138688" "US-2015-108966" ...
## $ Order.Date : chr "11/8/2016" "11/8/2016" "6/12/2016" "10/11/2015" ...
## $ Ship.Date : chr "11/11/2016" "11/11/2016" "6/16/2016" "10/18/2015" ...
## $ Ship.Mode : chr "Second Class" "Second Class" "Second Class" "Standard Class" ...
## $ Customer.ID : chr "CG-12520" "CG-12520" "DV-13045" "SO-20335" ...
## $ Customer.Name: chr "Claire Gute" "Claire Gute" "Darrin Van Huff" "Sean O'Donnell" ...
## $ Segment : chr "Consumer" "Consumer" "Corporate" "Consumer" ...
## $ Country : chr "United States" "United States" "United States" "United States" ...
## $ City : chr "Henderson" "Henderson" "Los Angeles" "Fort Lauderdale" ...
## $ State : chr "Kentucky" "Kentucky" "California" "Florida" ...
## $ Postal.Code : int 42420 42420 90036 33311 33311 90032 90032 90032 90032 90032 ...
## $ Region : chr "South" "South" "West" "South" ...
## $ Product.ID : chr "FUR-BO-10001798" "FUR-CH-10000454" "OFF-LA-10000240" "FUR-TA-10000577" ...
## $ Category : chr "Furniture" "Furniture" "Office Supplies" "Furniture" ...
## $ Sub.Category : chr "Bookcases" "Chairs" "Labels" "Tables" ...
## $ Product.Name : chr "Bush Somerset Collection Bookcase" "Hon Deluxe Fabric Upholstered Stacking Chairs, Rounded Back" "Self-Adhesive Address Labels for Typewriters by Universal" "Bretford CR4500 Series Slim Rectangular Table" ...
## $ Sales : num 262 731.9 14.6 957.6 22.4 ...
## $ Quantity : int 2 3 2 5 2 7 4 6 3 5 ...
## $ Discount : num 0 0 0 0.45 0.2 0 0 0.2 0.2 0 ...
## $ Profit : num 41.91 219.58 6.87 -383.03 2.52 ...
df <- df %>%
mutate(Order.Date=mdy(Order.Date)) %>%
mutate(Ship.Date=mdy(Ship.Date))
df
df <- df %>%
mutate(Ship.Mode=as.factor(Ship.Mode)) %>%
mutate(Segment=as.factor(Segment)) %>%
mutate(Country=as.factor(Country)) %>%
mutate(State=as.factor(State)) %>%
mutate(Region=as.factor(Region)) %>%
mutate(Category=as.factor(Category)) %>%
mutate(Sub.Category=as.factor(Sub.Category))
df
# Vamos a crear una grafica de ventas por dia
daily_sales <- df %>%
mutate(day = floor_date(Order.Date, unit = "day")) %>%
group_by(day) %>%
summarise(sales.d = sum(Sales))
daily_sales
ggplot(daily_sales, aes(x=day, y=sales.d)) +
geom_line(color="blue")

geom_point(color="red")
## geom_point: na.rm = FALSE
## stat_identity: na.rm = FALSE
## position_identity
labs(title = "ventas diarias", x="dias", y="ventas") +
scale_x_date(breaks = "60 days", date_labels = "%b $y")
## NULL