To complete this task, follow these steps:

Pull the data from data warehouse. Build Predictive models involving at least two factors. Identify the specific predictive algorithms for the selected features. Discuss what is the rationale behind choosing specific type of algorithms? Discuss the results and how these results can help the business.

Sample questions you can consider as you predict trends in data include, but are not limited to:

What are the forecasted demand for top (e.g. 10) items? What are the forecasted ROI for the items of highest ROI? What are the forecasted/predicted replenishment rates of the items with highest demand?

wideworldimportersdw<- read.csv("C:/Users/Q/Downloads/wideworldimportersDW.csv", header= TRUE)
library(ggplot2)

#Visualizing Profits/ROI for ordered packages

summary(wideworldimportersdw$TotalProfit) #Data has outliers, use logs for normalization
ggplot(wideworldimportersdw, aes(x=as.factor(OrderPackage), y=log(TotalProfit))) + 
    geom_boxplot(fill="slateblue", alpha=0.2) + 
    xlab("OrderPackage")
library(dplyr)
# Plot
wideworldimportersdw %>%
    ggplot( aes(x=InvoiceYear, y=TotalProfit)) +
    
    geom_point()
qplot(log(TotalProfit),log(TotalSalesWithTax),colour=factor(OrderPackage), data=wideworldimportersdw)

#Feature selection

library(randomForest)
randomf<- randomForest(TotalProfit~TotalSalesWithTax+TotalChillerItems+SaleQuantity+ OrderQuantity+OrderPackage, data= wideworldimportersdw)
importance(randomf,type = 2)
varImpPlot(randomf)

#Prediction with linear regression model

ind<- sample(0.75*nrow(wideworldimportersdw), replace=FALSE)
train<- wideworldimportersdw[ind,]
test<-wideworldimportersdw[-ind,]

mod <- lm(TotalProfit~TotalSalesWithTax+TotalChillerItems+SaleQuantity+OrderQuantity+as.factor(OrderPackage), data= train)
summary(mod)                    
# Predicted versus actual test set
pred <- predict(mod, test)
qplot(TotalProfit,pred,colour=as.factor(OrderPackage),data=test)

What are the forecasted ROI for the items of highest ROI?

#find mean profits scored by Order Package


forecastROI<-predict(mod, newdata=test) #The forecasted profit for the highly profitable package
test$forecastROI<- forecastROI
aggregate(test$forecastROI, list(test$OrderPackage), FUN=mean,ascending=TRUE)

What are the forecasted demand for top (e.g. 10) items? #Get the highly demanded package


#use linear regression model to forecast demand for highly sought product

mod1 <- lm(SaleQuantity~TotalSalesWithTax+TotalChillerItems+TotalProfit+OrderQuantity+as.factor(OrderPackage), data= train)
forecastdemandQty<-predict(mod1, newdata=test) #forecasted demand for the top product
test$forecastdemandQty<-forecastdemandQty
aggregate(test$forecastdemandQty, list(test$OrderPackage), FUN=sum,ascending=FALSE) 

Get tutoring services https://academicwits.com Or email

LS0tDQp0aXRsZTogIldlZWsgNiINCmF1dGhvcjogIlZpY3RvciBPbW9uZGkgT255YW5nbyINCmRhdGU6ICcyMDIyLTA4LTA3Jw0Kb3V0cHV0Og0KICBodG1sX25vdGVib29rOiBkZWZhdWx0DQogIHdvcmRfZG9jdW1lbnQ6IGRlZmF1bHQNCi0tLQ0KDQpUbyBjb21wbGV0ZSB0aGlzIHRhc2ssIGZvbGxvdyB0aGVzZSBzdGVwczoNCg0KUHVsbCB0aGUgZGF0YSBmcm9tIGRhdGEgd2FyZWhvdXNlLg0KQnVpbGQgUHJlZGljdGl2ZSBtb2RlbHMgaW52b2x2aW5nIGF0IGxlYXN0IHR3byBmYWN0b3JzLg0KSWRlbnRpZnkgdGhlIHNwZWNpZmljIHByZWRpY3RpdmUgYWxnb3JpdGhtcyBmb3IgdGhlIHNlbGVjdGVkIGZlYXR1cmVzLg0KRGlzY3VzcyB3aGF0IGlzIHRoZSByYXRpb25hbGUgYmVoaW5kIGNob29zaW5nIHNwZWNpZmljIHR5cGUgb2YgYWxnb3JpdGhtcz8NCkRpc2N1c3MgdGhlIHJlc3VsdHMgYW5kIGhvdyB0aGVzZSByZXN1bHRzIGNhbiBoZWxwIHRoZSBidXNpbmVzcy4NCg0KU2FtcGxlIHF1ZXN0aW9ucyB5b3UgY2FuIGNvbnNpZGVyIGFzIHlvdSBwcmVkaWN0IHRyZW5kcyBpbiBkYXRhIGluY2x1ZGUsIGJ1dCBhcmUgbm90IGxpbWl0ZWQgdG86DQoNCldoYXQgYXJlIHRoZSBmb3JlY2FzdGVkIGRlbWFuZCBmb3IgdG9wIChlLmcuIDEwKSBpdGVtcz8NCldoYXQgYXJlIHRoZSBmb3JlY2FzdGVkIFJPSSBmb3IgdGhlIGl0ZW1zIG9mIGhpZ2hlc3QgUk9JPw0KV2hhdCBhcmUgdGhlIGZvcmVjYXN0ZWQvcHJlZGljdGVkIHJlcGxlbmlzaG1lbnQgcmF0ZXMgb2YgdGhlIGl0ZW1zIHdpdGggaGlnaGVzdCBkZW1hbmQ/DQpgYGB7cn0NCndpZGV3b3JsZGltcG9ydGVyc2R3PC0gcmVhZC5jc3YoIkM6L1VzZXJzL1EvRG93bmxvYWRzL3dpZGV3b3JsZGltcG9ydGVyc0RXLmNzdiIsIGhlYWRlcj0gVFJVRSkNCmxpYnJhcnkoZ2dwbG90MikNCmBgYA0KI1Zpc3VhbGl6aW5nIFByb2ZpdHMvUk9JIGZvciBvcmRlcmVkIHBhY2thZ2VzDQpgYGB7cn0NCnN1bW1hcnkod2lkZXdvcmxkaW1wb3J0ZXJzZHckVG90YWxQcm9maXQpICNEYXRhIGhhcyBvdXRsaWVycywgdXNlIGxvZ3MgZm9yIG5vcm1hbGl6YXRpb24NCmdncGxvdCh3aWRld29ybGRpbXBvcnRlcnNkdywgYWVzKHg9YXMuZmFjdG9yKE9yZGVyUGFja2FnZSksIHk9bG9nKFRvdGFsUHJvZml0KSkpICsgDQogICAgZ2VvbV9ib3hwbG90KGZpbGw9InNsYXRlYmx1ZSIsIGFscGhhPTAuMikgKyANCiAgICB4bGFiKCJPcmRlclBhY2thZ2UiKQ0KYGBgDQpgYGB7cn0NCmxpYnJhcnkoZHBseXIpDQojIFBsb3QNCndpZGV3b3JsZGltcG9ydGVyc2R3ICU+JQ0KICAgIGdncGxvdCggYWVzKHg9SW52b2ljZVllYXIsIHk9VG90YWxQcm9maXQpKSArDQogICAgDQogICAgZ2VvbV9wb2ludCgpDQpxcGxvdChsb2coVG90YWxQcm9maXQpLGxvZyhUb3RhbFNhbGVzV2l0aFRheCksY29sb3VyPWZhY3RvcihPcmRlclBhY2thZ2UpLCBkYXRhPXdpZGV3b3JsZGltcG9ydGVyc2R3KQ0KYGBgDQojRmVhdHVyZSBzZWxlY3Rpb24NCmBgYHtyfQ0KbGlicmFyeShyYW5kb21Gb3Jlc3QpDQpyYW5kb21mPC0gcmFuZG9tRm9yZXN0KFRvdGFsUHJvZml0flRvdGFsU2FsZXNXaXRoVGF4K1RvdGFsQ2hpbGxlckl0ZW1zK1NhbGVRdWFudGl0eSsgT3JkZXJRdWFudGl0eStPcmRlclBhY2thZ2UsIGRhdGE9IHdpZGV3b3JsZGltcG9ydGVyc2R3KQ0KaW1wb3J0YW5jZShyYW5kb21mLHR5cGUgPSAyKQ0KdmFySW1wUGxvdChyYW5kb21mKQ0KYGBgDQojUHJlZGljdGlvbiB3aXRoIGxpbmVhciByZWdyZXNzaW9uIG1vZGVsDQpgYGB7cn0NCmluZDwtIHNhbXBsZSgwLjc1Km5yb3cod2lkZXdvcmxkaW1wb3J0ZXJzZHcpLCByZXBsYWNlPUZBTFNFKQ0KdHJhaW48LSB3aWRld29ybGRpbXBvcnRlcnNkd1tpbmQsXQ0KdGVzdDwtd2lkZXdvcmxkaW1wb3J0ZXJzZHdbLWluZCxdDQoNCm1vZCA8LSBsbShUb3RhbFByb2ZpdH5Ub3RhbFNhbGVzV2l0aFRheCtUb3RhbENoaWxsZXJJdGVtcytTYWxlUXVhbnRpdHkrT3JkZXJRdWFudGl0eSthcy5mYWN0b3IoT3JkZXJQYWNrYWdlKSwgZGF0YT0gdHJhaW4pDQpzdW1tYXJ5KG1vZCkgICAgICAgICAgICAgICAgICAgIA0KDQoNCmBgYA0KYGBge3J9DQojIFByZWRpY3RlZCB2ZXJzdXMgYWN0dWFsIHRlc3Qgc2V0DQpwcmVkIDwtIHByZWRpY3QobW9kLCB0ZXN0KQ0KcXBsb3QoVG90YWxQcm9maXQscHJlZCxjb2xvdXI9YXMuZmFjdG9yKE9yZGVyUGFja2FnZSksZGF0YT10ZXN0KQ0KDQpgYGANCg0KDQpXaGF0IGFyZSB0aGUgZm9yZWNhc3RlZCBST0kgZm9yIHRoZSBpdGVtcyBvZiBoaWdoZXN0IFJPST8NCmBgYHtyfQ0KI2ZpbmQgbWVhbiBwcm9maXRzIHNjb3JlZCBieSBPcmRlciBQYWNrYWdlDQoNCg0KZm9yZWNhc3RST0k8LXByZWRpY3QobW9kLCBuZXdkYXRhPXRlc3QpICNUaGUgZm9yZWNhc3RlZCBwcm9maXQgZm9yIHRoZSBoaWdobHkgcHJvZml0YWJsZSBwYWNrYWdlDQp0ZXN0JGZvcmVjYXN0Uk9JPC0gZm9yZWNhc3RST0kNCmFnZ3JlZ2F0ZSh0ZXN0JGZvcmVjYXN0Uk9JLCBsaXN0KHRlc3QkT3JkZXJQYWNrYWdlKSwgRlVOPW1lYW4sYXNjZW5kaW5nPVRSVUUpDQpgYGANCg0KV2hhdCBhcmUgdGhlIGZvcmVjYXN0ZWQgZGVtYW5kIGZvciB0b3AgKGUuZy4gMTApIGl0ZW1zPw0KI0dldCB0aGUgaGlnaGx5IGRlbWFuZGVkIHBhY2thZ2UNCmBgYHtyfQ0KDQojdXNlIGxpbmVhciByZWdyZXNzaW9uIG1vZGVsIHRvIGZvcmVjYXN0IGRlbWFuZCBmb3IgaGlnaGx5IHNvdWdodCBwcm9kdWN0DQoNCm1vZDEgPC0gbG0oU2FsZVF1YW50aXR5flRvdGFsU2FsZXNXaXRoVGF4K1RvdGFsQ2hpbGxlckl0ZW1zK1RvdGFsUHJvZml0K09yZGVyUXVhbnRpdHkrYXMuZmFjdG9yKE9yZGVyUGFja2FnZSksIGRhdGE9IHRyYWluKQ0KZm9yZWNhc3RkZW1hbmRRdHk8LXByZWRpY3QobW9kMSwgbmV3ZGF0YT10ZXN0KSAjZm9yZWNhc3RlZCBkZW1hbmQgZm9yIHRoZSB0b3AgcHJvZHVjdA0KdGVzdCRmb3JlY2FzdGRlbWFuZFF0eTwtZm9yZWNhc3RkZW1hbmRRdHkNCmFnZ3JlZ2F0ZSh0ZXN0JGZvcmVjYXN0ZGVtYW5kUXR5LCBsaXN0KHRlc3QkT3JkZXJQYWNrYWdlKSwgRlVOPXN1bSxhc2NlbmRpbmc9RkFMU0UpIA0KYGBgDQpHZXQgdHV0b3Jpbmcgc2VydmljZXMgaHR0cHM6Ly9hY2FkZW1pY3dpdHMuY29tDQpPciBlbWFpbCBvbW9uZGl2a2V5QGdtYWlsLmNvbQ==