Hackaton opdracht - SE & VA
Introductie
De opdracht
Data verzameling
Het laden van de online omgeving
library(DBI)
library(RMySQL)
user = 'Hackaton'
password = 'DBpass2BrKi4AlMem2Grp'
dbConn <- dbConnect(
MySQL(),
user,
password,
dbname = 'hackaton',
host = '127.0.0.1'
)
dbListTables(dbConn)## [1] "All_Orders" "Items" "PostItems" "PostOrders" "PostSubItems"
## [6] "SubItems"
Het inladen van de verscheidene databestanden
PostItems <- dbSendQuery(dbConn,"SELECT * FROM PostItems")
PostItems = fetch(PostItems, n=-1)
PostOrders <- dbSendQuery(dbConn,"SELECT * FROM PostOrders")
PostOrders = fetch(PostOrders, n=-1)
PostSubItems <- dbSendQuery(dbConn,"SELECT * FROM PostSubItems")
PostSubItems = fetch(PostSubItems, n=-1)
Items <- dbSendQuery(dbConn,"SELECT * FROM Items")
Items = fetch(Items, n=-1)
All_Orders <- dbSendQuery(dbConn,"SELECT * FROM All_Orders")
All_Orders = fetch(All_Orders, n=-1)
SubItems <- dbSendQuery(dbConn,"SELECT * FROM SubItems")
SubItems = fetch(SubItems, n=-1)Data schoonmaken
Het schoonmaken van vorige situatie en post-situatie
Orders en PostOrders
#Verwijderen van irrelevante columns op het PostOrders-bestand
PostOrders2 <- PostOrders %>%
select(-account, -weekday_updated, -dateString_updated, -hour_updated,
-minute_updated, -hourAndMinute_updated, -dateString_created, -posId,
-posLocationId, -channelLink, -orderType, -channel, -deliveryIsAsap,
-orderIsAlreadyPaid, -customerName, -customerPhoneNumber,
-customerEmail,-paymentAmount, -note, -decimalDigits,
-numberOfCustomers, -posCustomerId, -"_etag", -weekday_created,
-hour_created, -minute_created, -hourAndMinute_created, -by, -platform,
-channelOrderRawId, -serviceCharge, -channelOrderDisplayId, -id
)
#Maak een nieuwe column pickupTime in post-variant
PostOrders2 <- unite(PostOrders2, pickupTime,
c(dateStringPickUpTime, hourAndMinutePickUpTime),
remove = FALSE, sep = " "
)
#Verwijder oude-situatie columns die irrelevant zijn
All_Orders2 <- All_Orders %>%
select(-created, -weekday, -hour, -minute, -dateString,
-hourAndMinute, -customerName, -customerCompanyName,
-customerEmail, -customerPhoneNumber, -courier, -LocationID,
-paymentAmount, -Ordercol, -note, -channelLink,
-customerCompanyName, -customerCompanyName,
-customerPhoneNumber, -customerEmail,
-deliveryAddressExtraAddressInfo, -orderID, -status
) %>%
#Verander namen oude-situatie naar post-variant
rename(weekdayPickUpTime = weekday2,
dateStringPickUpTime = dateString2,
hourPickUpTime = hour2,
minutePickUpTime = minute2,
hourAndMinutePickUpTime = hourAndMinute2,
channelOrderId = channelOrderID
)
#Time-variant op columns
PostOrders2$pickupTime <- strftime(PostOrders2$pickupTime, format = "%Y-%m-%d"
)
All_Orders2$pickupTime <- strftime(All_Orders2$pickupTime, format = "%Y-%m-%d"
)
PostOrders2 <- PostOrders2 %>%
mutate(pickupTime = as.POSIXct(pickupTime, format = "%Y-%m-%d")) %>%
rename(platform = courierDeliveryBy
)
All_Orders2 <- All_Orders2 %>%
mutate(pickupTime = as.POSIXct(pickupTime, format = "%Y-%m-%d")
)
#Join All_Orders met PostOrders
PostOrdersTotaal <- dplyr::bind_rows(All_Orders2, PostOrders2)Items en PostItems
#Verwijderen van irrelevante columns op het PostItems-bestand
PostItems2 <- PostItems %>%
select(-account, -weekday_updated, -dateString_updated, -hour_updated,
-minute_updated, -hourAndMinute_updated, -dateString_created, -posId,
-posLocationId, -channelLink, -orderType, -channel, -deliveryIsAsap,
-orderIsAlreadyPaid, -customerName, -customerPhoneNumber,
-customerEmail, -deliveryAddressCity, -deliveryAddressPostalCode,
-deliveryAddressStreet, -deliveryAddressStreetNumber,
-deliveryAddressPostalCode, -deliveryAddressCity,
-paymentAmount, -note, -decimalDigits, -numberOfCustomers,
-posCustomerId, -id, -"_etag", -weekday_created, -hour_created,
-minute_created, -hourAndMinute_created, -serviceCharge
)
#Maak een nieuwe column pickupTime in post-variant
PostItems2 <- unite(PostItems, pickupTime,
c(dateStringPickUpTime, hourAndMinutePickUpTime),
remove=FALSE, sep = " "
)
#Verwijder oude-situatie columns die irrelevant zijn
Items2 <- Items %>%
select(-created, -weekday, -hour, -minute, -dateString, -hourAndMinute) %>%
rename(itemsPlu = ItemsPlu
)
#Verander namen oude-situatie naar post-variant
Items2 <- Items %>%
rename(weekdayPickUpTime = weekday2,
dateStringPickUpTime = dateString2,
hourPickUpTime = hour2,
minutePickUpTime = minute2,
hourAndMinutePickUpTime = hourAndMinute2,
channelOrderId = OrderID
)
#Time-variant op columns
Items2$pickupTime <- strftime(Items2$pickupTime, format = "%Y-%m-%d"
)
PostItems2$pickupTime <- strftime(PostItems2$pickupTime, format = "%Y-%m-%d"
)
PostItems2 <- PostItems2 %>%
mutate(pickupTime = as.POSIXct(pickupTime, format = "%Y-%m-%d")
)
Items2 <- Items2 %>%
mutate(pickupTime = as.POSIXct(pickupTime, format = "%Y-%m-%d")
)
#Join Subitems met PostSubItems
ItemsTotaal <- dplyr::bind_rows(PostItems2, Items2
)SubItems en PostSubItems
#Verwijderen van irrelevante columns op het PostSubItems-bestand
PostSubItems2 <- PostSubItems %>%
select(-account, -weekday_updated, -dateString_updated, -hour_updated,
-minute_updated, -hourAndMinute_updated, -dateString_created, -posId,
-posLocationId, -channelLink, -orderType, -channel, -deliveryIsAsap,
-orderIsAlreadyPaid, -customerName, -customerPhoneNumber,
-customerEmail, -deliveryAddressCity, -deliveryAddressPostalCode,
-deliveryAddressStreet, -deliveryAddressStreetNumber,
-deliveryAddressPostalCode, -deliveryAddressCity,
-paymentAmount, -note, -decimalDigits, -numberOfCustomers,
-posCustomerId, -id, -"_etag", -weekday_created, -hour_created,
-minute_created, -hourAndMinute_created, -serviceCharge
)
#Maak een nieuwe column pickupTime in post-variant
PostSubItems2 <- unite(PostSubItems, pickupTime,
c(dateStringPickUpTime, hourAndMinutePickUpTime),
remove=FALSE, sep = " "
)
#Verwijder oude-situatie columns die irrelevant zijn
SubItems2 <- SubItems %>%
select(-created, -weekday, -hour, -minute, -dateString, -hourAndMinute) %>%
rename(subitemsPlu = subItemsPlu
)
#Verander namen oude-situatie naar post-variant
SubItems2 <- SubItems %>%
rename(weekdayPickUpTime = weekday2,
dateStringPickUpTime = dateString2,
hourPickUpTime = hour2,
minutePickUpTime = minute2,
hourAndMinutePickUpTime = hourAndMinute2,
channelOrderId = OrderID
)
#Time-variant op columns
SubItems2$pickupTime <- strftime(SubItems2$pickupTime, format = "%Y-%m-%d"
)
PostSubItems2$pickupTime <- strftime(PostSubItems2$pickupTime, format = "%Y-%m-%d"
)
PostSubItems2 <- PostSubItems2 %>%
mutate(pickupTime = as.POSIXct(pickupTime, format = "%Y-%m-%d")
)
SubItems2 <- SubItems2 %>%
mutate(pickupTime = as.POSIXct(pickupTime, format = "%Y-%m-%d")
)
#Join Subitems met PostSubItems
SubItemsTotaal <- dplyr::bind_rows(PostSubItems2, SubItems2
)