To complete this task, follow these steps:
Pull the data from data warehouse. Build Predictive models involving
at least two factors. Identify the specific predictive algorithms for
the selected features. Discuss what is the rationale behind choosing
specific type of algorithms? Discuss the results and how these results
can help the business.
Sample questions you can consider as you predict trends in data
include, but are not limited to:
What are the forecasted demand for top (e.g. 10) items? What are the
forecasted ROI for the items of highest ROI? What are the
forecasted/predicted replenishment rates of the items with highest
demand?
wideworldimportersdw<- read.csv("C:/Users/Q/Downloads/wideworldimportersDW.csv", header= TRUE)
library(ggplot2)
#Visualizing Profits/ROI for ordered packages
summary(wideworldimportersdw$TotalProfit) #Data has outliers, use logs for normalization
ggplot(wideworldimportersdw, aes(x=as.factor(OrderPackage), y=log(TotalProfit))) +
geom_boxplot(fill="slateblue", alpha=0.2) +
xlab("OrderPackage")
library(dplyr)
# Plot
wideworldimportersdw %>%
ggplot( aes(x=InvoiceYear, y=TotalProfit)) +
geom_point()
qplot(log(TotalProfit),log(TotalSalesWithTax),colour=factor(OrderPackage), data=wideworldimportersdw)
#Feature selection
library(randomForest)
randomf<- randomForest(TotalProfit~TotalSalesWithTax+TotalChillerItems+SaleQuantity+ OrderQuantity+OrderPackage, data= wideworldimportersdw)
importance(randomf,type = 2)
varImpPlot(randomf)
#Prediction with linear regression model
ind<- sample(0.75*nrow(wideworldimportersdw), replace=FALSE)
train<- wideworldimportersdw[ind,]
test<-wideworldimportersdw[-ind,]
mod <- lm(TotalProfit~TotalSalesWithTax+TotalChillerItems+SaleQuantity+OrderQuantity+as.factor(OrderPackage), data= train)
summary(mod)
# Predicted versus actual test set
pred <- predict(mod, test)
qplot(TotalProfit,pred,colour=as.factor(OrderPackage),data=test)
What are the forecasted ROI for the items of highest ROI?
#find mean profits scored by Order Package
forecastROI<-predict(mod, newdata=test) #The forecasted profit for the highly profitable package
test$forecastROI<- forecastROI
aggregate(test$forecastROI, list(test$OrderPackage), FUN=mean,ascending=TRUE)
What are the forecasted demand for top (e.g. 10) items? #Get the
highly demanded package
#use linear regression model to forecast demand for highly sought product
mod1 <- lm(SaleQuantity~TotalSalesWithTax+TotalChillerItems+TotalProfit+OrderQuantity+as.factor(OrderPackage), data= train)
forecastdemandQty<-predict(mod1, newdata=test) #forecasted demand for the top product
test$forecastdemandQty<-forecastdemandQty
aggregate(test$forecastdemandQty, list(test$OrderPackage), FUN=sum,ascending=FALSE)
Get tutoring services https://academicwits.com Or email omondivkey@gmail.com
LS0tDQp0aXRsZTogIldlZWsgNiINCmF1dGhvcjogIlZpY3RvciBPbW9uZGkgT255YW5nbyINCmRhdGU6ICcyMDIyLTA4LTA3Jw0Kb3V0cHV0Og0KICBodG1sX25vdGVib29rOiBkZWZhdWx0DQogIHdvcmRfZG9jdW1lbnQ6IGRlZmF1bHQNCi0tLQ0KDQpUbyBjb21wbGV0ZSB0aGlzIHRhc2ssIGZvbGxvdyB0aGVzZSBzdGVwczoNCg0KUHVsbCB0aGUgZGF0YSBmcm9tIGRhdGEgd2FyZWhvdXNlLg0KQnVpbGQgUHJlZGljdGl2ZSBtb2RlbHMgaW52b2x2aW5nIGF0IGxlYXN0IHR3byBmYWN0b3JzLg0KSWRlbnRpZnkgdGhlIHNwZWNpZmljIHByZWRpY3RpdmUgYWxnb3JpdGhtcyBmb3IgdGhlIHNlbGVjdGVkIGZlYXR1cmVzLg0KRGlzY3VzcyB3aGF0IGlzIHRoZSByYXRpb25hbGUgYmVoaW5kIGNob29zaW5nIHNwZWNpZmljIHR5cGUgb2YgYWxnb3JpdGhtcz8NCkRpc2N1c3MgdGhlIHJlc3VsdHMgYW5kIGhvdyB0aGVzZSByZXN1bHRzIGNhbiBoZWxwIHRoZSBidXNpbmVzcy4NCg0KU2FtcGxlIHF1ZXN0aW9ucyB5b3UgY2FuIGNvbnNpZGVyIGFzIHlvdSBwcmVkaWN0IHRyZW5kcyBpbiBkYXRhIGluY2x1ZGUsIGJ1dCBhcmUgbm90IGxpbWl0ZWQgdG86DQoNCldoYXQgYXJlIHRoZSBmb3JlY2FzdGVkIGRlbWFuZCBmb3IgdG9wIChlLmcuIDEwKSBpdGVtcz8NCldoYXQgYXJlIHRoZSBmb3JlY2FzdGVkIFJPSSBmb3IgdGhlIGl0ZW1zIG9mIGhpZ2hlc3QgUk9JPw0KV2hhdCBhcmUgdGhlIGZvcmVjYXN0ZWQvcHJlZGljdGVkIHJlcGxlbmlzaG1lbnQgcmF0ZXMgb2YgdGhlIGl0ZW1zIHdpdGggaGlnaGVzdCBkZW1hbmQ/DQpgYGB7cn0NCndpZGV3b3JsZGltcG9ydGVyc2R3PC0gcmVhZC5jc3YoIkM6L1VzZXJzL1EvRG93bmxvYWRzL3dpZGV3b3JsZGltcG9ydGVyc0RXLmNzdiIsIGhlYWRlcj0gVFJVRSkNCmxpYnJhcnkoZ2dwbG90MikNCmBgYA0KI1Zpc3VhbGl6aW5nIFByb2ZpdHMvUk9JIGZvciBvcmRlcmVkIHBhY2thZ2VzDQpgYGB7cn0NCnN1bW1hcnkod2lkZXdvcmxkaW1wb3J0ZXJzZHckVG90YWxQcm9maXQpICNEYXRhIGhhcyBvdXRsaWVycywgdXNlIGxvZ3MgZm9yIG5vcm1hbGl6YXRpb24NCmdncGxvdCh3aWRld29ybGRpbXBvcnRlcnNkdywgYWVzKHg9YXMuZmFjdG9yKE9yZGVyUGFja2FnZSksIHk9bG9nKFRvdGFsUHJvZml0KSkpICsgDQogICAgZ2VvbV9ib3hwbG90KGZpbGw9InNsYXRlYmx1ZSIsIGFscGhhPTAuMikgKyANCiAgICB4bGFiKCJPcmRlclBhY2thZ2UiKQ0KYGBgDQpgYGB7cn0NCmxpYnJhcnkoZHBseXIpDQojIFBsb3QNCndpZGV3b3JsZGltcG9ydGVyc2R3ICU+JQ0KICAgIGdncGxvdCggYWVzKHg9SW52b2ljZVllYXIsIHk9VG90YWxQcm9maXQpKSArDQogICAgDQogICAgZ2VvbV9wb2ludCgpDQpxcGxvdChsb2coVG90YWxQcm9maXQpLGxvZyhUb3RhbFNhbGVzV2l0aFRheCksY29sb3VyPWZhY3RvcihPcmRlclBhY2thZ2UpLCBkYXRhPXdpZGV3b3JsZGltcG9ydGVyc2R3KQ0KYGBgDQojRmVhdHVyZSBzZWxlY3Rpb24NCmBgYHtyfQ0KbGlicmFyeShyYW5kb21Gb3Jlc3QpDQpyYW5kb21mPC0gcmFuZG9tRm9yZXN0KFRvdGFsUHJvZml0flRvdGFsU2FsZXNXaXRoVGF4K1RvdGFsQ2hpbGxlckl0ZW1zK1NhbGVRdWFudGl0eSsgT3JkZXJRdWFudGl0eStPcmRlclBhY2thZ2UsIGRhdGE9IHdpZGV3b3JsZGltcG9ydGVyc2R3KQ0KaW1wb3J0YW5jZShyYW5kb21mLHR5cGUgPSAyKQ0KdmFySW1wUGxvdChyYW5kb21mKQ0KYGBgDQojUHJlZGljdGlvbiB3aXRoIGxpbmVhciByZWdyZXNzaW9uIG1vZGVsDQpgYGB7cn0NCmluZDwtIHNhbXBsZSgwLjc1Km5yb3cod2lkZXdvcmxkaW1wb3J0ZXJzZHcpLCByZXBsYWNlPUZBTFNFKQ0KdHJhaW48LSB3aWRld29ybGRpbXBvcnRlcnNkd1tpbmQsXQ0KdGVzdDwtd2lkZXdvcmxkaW1wb3J0ZXJzZHdbLWluZCxdDQoNCm1vZCA8LSBsbShUb3RhbFByb2ZpdH5Ub3RhbFNhbGVzV2l0aFRheCtUb3RhbENoaWxsZXJJdGVtcytTYWxlUXVhbnRpdHkrT3JkZXJRdWFudGl0eSthcy5mYWN0b3IoT3JkZXJQYWNrYWdlKSwgZGF0YT0gdHJhaW4pDQpzdW1tYXJ5KG1vZCkgICAgICAgICAgICAgICAgICAgIA0KDQoNCmBgYA0KYGBge3J9DQojIFByZWRpY3RlZCB2ZXJzdXMgYWN0dWFsIHRlc3Qgc2V0DQpwcmVkIDwtIHByZWRpY3QobW9kLCB0ZXN0KQ0KcXBsb3QoVG90YWxQcm9maXQscHJlZCxjb2xvdXI9YXMuZmFjdG9yKE9yZGVyUGFja2FnZSksZGF0YT10ZXN0KQ0KDQpgYGANCg0KDQpXaGF0IGFyZSB0aGUgZm9yZWNhc3RlZCBST0kgZm9yIHRoZSBpdGVtcyBvZiBoaWdoZXN0IFJPST8NCmBgYHtyfQ0KI2ZpbmQgbWVhbiBwcm9maXRzIHNjb3JlZCBieSBPcmRlciBQYWNrYWdlDQoNCg0KZm9yZWNhc3RST0k8LXByZWRpY3QobW9kLCBuZXdkYXRhPXRlc3QpICNUaGUgZm9yZWNhc3RlZCBwcm9maXQgZm9yIHRoZSBoaWdobHkgcHJvZml0YWJsZSBwYWNrYWdlDQp0ZXN0JGZvcmVjYXN0Uk9JPC0gZm9yZWNhc3RST0kNCmFnZ3JlZ2F0ZSh0ZXN0JGZvcmVjYXN0Uk9JLCBsaXN0KHRlc3QkT3JkZXJQYWNrYWdlKSwgRlVOPW1lYW4sYXNjZW5kaW5nPVRSVUUpDQpgYGANCg0KV2hhdCBhcmUgdGhlIGZvcmVjYXN0ZWQgZGVtYW5kIGZvciB0b3AgKGUuZy4gMTApIGl0ZW1zPw0KI0dldCB0aGUgaGlnaGx5IGRlbWFuZGVkIHBhY2thZ2UNCmBgYHtyfQ0KDQojdXNlIGxpbmVhciByZWdyZXNzaW9uIG1vZGVsIHRvIGZvcmVjYXN0IGRlbWFuZCBmb3IgaGlnaGx5IHNvdWdodCBwcm9kdWN0DQoNCm1vZDEgPC0gbG0oU2FsZVF1YW50aXR5flRvdGFsU2FsZXNXaXRoVGF4K1RvdGFsQ2hpbGxlckl0ZW1zK1RvdGFsUHJvZml0K09yZGVyUXVhbnRpdHkrYXMuZmFjdG9yKE9yZGVyUGFja2FnZSksIGRhdGE9IHRyYWluKQ0KZm9yZWNhc3RkZW1hbmRRdHk8LXByZWRpY3QobW9kMSwgbmV3ZGF0YT10ZXN0KSAjZm9yZWNhc3RlZCBkZW1hbmQgZm9yIHRoZSB0b3AgcHJvZHVjdA0KdGVzdCRmb3JlY2FzdGRlbWFuZFF0eTwtZm9yZWNhc3RkZW1hbmRRdHkNCmFnZ3JlZ2F0ZSh0ZXN0JGZvcmVjYXN0ZGVtYW5kUXR5LCBsaXN0KHRlc3QkT3JkZXJQYWNrYWdlKSwgRlVOPXN1bSxhc2NlbmRpbmc9RkFMU0UpIA0KYGBgDQpHZXQgdHV0b3Jpbmcgc2VydmljZXMgaHR0cHM6Ly9hY2FkZW1pY3dpdHMuY29tDQpPciBlbWFpbCBvbW9uZGl2a2V5QGdtYWlsLmNvbQ==