♫ Bells are ringing, children singing, all is merry and bright. Santa’s elves made a big mistake, now he needs your help tonight ♫
Helping Santa filling his baggs.
Libraries
library(stringr)
library(ggplot2)
library(triangle)
library(data.table)
Data Load
gifts <- read.csv('input/gifts.csv')
#Tomo la priper palabra de cada regalo
gifts$Toy <- factor(str_extract(gifts$GiftId, pattern = "[a-z]+"))
#Genero un gráfico para mostrar la distribución de regalos Santa
ggplot(data= gifts, aes(x= reorder(Toy, -table(Toy)[Toy]) , fill =Toy ))+
geom_bar(stat="count") +
theme_classic() +
xlab("Toy")+
guides(fill = FALSE)

Gift Weight Probability Distribution
We compute the weight distribution for each gift
sample_horse
function(n) pmax(0, rnorm(n=n, mean=5, sd=2))
Let’s visualizate the weights distributions
ggplot(data= gifts, aes (x= Weight, fill=Toy)) +
geom_density() +
facet_wrap(~Toy, ncol= 3, scales="free")+
theme_bw() +
guides(fill=FALSE) +
ggtitle("Density plots of the toy's weight by toy type")

Let’s visualizate a boxplot with weights distributions
ggplot (data= gifts, aes (y= Weight, x=Toy, fill=Toy))+
geom_boxplot()+
guides(fill=FALSE) +
ggtitle("Boxplots of weight distribution by toy type")

Evaluation
As the Evaluation tab tells us “Submissions are evaluated on the total amount of weight you fit into Santa’s 1000 bags”, so we need to maximize the total weight of the bags. The important key here is that our primary objective is to make same carry as much weight as possible, without taking into account the total number of gifts.
We have then, a maximization function for all the gifts of all the bags: \[\sum_{i=1}^{n}\left( \sum_{j=1}^{m}\ Weight_i*X_{i,j} \right)\] Where each \(Weight_i\) represents the Weight of each gift and each \(X_{i,j}\) represents if the gift \(i\) goes into the bag \(j\).
\(i\) goes from gift \(1\) to gift \(n\). \(j\) goes from bag \(1\) to bag \(m\).
Constraint Number 1: -Each bag has to weight less that 50 lb. \[ \sum_{j=1}^{m}\ Weight_i*X_{i,j} \le 50 kg\ \forall i = 1,..,n \] Constraint Number 2: -Each bag has to carry at least 3 items.
\[\sum_{i=1}^{n}\ X_{i,j} \ge 3\ \forall j = 1,..,m \]
Constraint Number 3: An item can be only in one bag.
\[ \sum_{j=1}^{m}\ X_{i,j} \le 1 \ \forall i = 1,..,n \] Contraint Number 4: \(X_{i,j}\) is binary \[X_{i,j} \in\ \{ {0,1} \} \ \forall i = 1,..,n \ j = 1,..,m \]
First Approach: Random path
In this first approach I’ll generate a random list of all the gifts and cut this list into bags when they are about to get 50 kg.
set.seed(2223)
sample_gifts<- data.frame(GiftId=integer(),Toy=character(),Weight=double, Bag=integer())
sampleGiftId<-sample(gifts$GiftId)
sampleToy <- factor(str_extract(sampleGiftId, pattern = "[a-z]+"))
sampleWeight<-sapply(sampleToy,function(x) do.call(paste("sample",x, sep="_"), as.list(1)))
sampleBag<-rep(0,length(sampleToy))
sample_gifts<-data.frame(GiftId=sampleGiftId,Toy=sampleToy,Weight=sampleWeight,Bag=sampleBag)
bag_weight<-NULL
bag_items<-NULL
bag_name<-NULL
bag_name[1]<-1
bag_items[1]<-0;
bagweight=0
j=-1
for (i in 1:length(sample_gifts$Toy))
{
if (sample_gifts$Weight[i]<50)
{
bagweight = bagweight + sample_gifts$Weight[i]
if (bagweight>48) {
j = j + 1
bag_weight[j]<-bagweight - sample_gifts$Weight[i]
bag_items[j]<-0
bag_name[j]<-j
bagweight = sample_gifts$Weight[i]
}
sample_gifts$Bag[i]=j
bag_items[j]<-bag_items[j]+1
}
}
summary(bag_weight)
Min. 1st Qu. Median Mean 3rd Qu. Max.
6.692 38.260 43.260 40.760 45.990 48.000
length(bag_weight)
[1] 1243
summary(bag_name)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.0 311.5 622.0 622.0 932.5 1243.0
We take out bags that do not respect the constraints. (Less than 3)
Now I show the weight distribution per bag.
d<-data.frame(bag_weight,bag_items,bag_name)
f1<-cut(bag_items,3)
levels(f1)<-c("Low","Medium","High")
d$bag_item_q<-f1
ggplot(d, aes(bag_weight, ..count..,)) +
geom_density() +
ggtitle("Count of bags by weight")

ggplot(d, aes(bag_items, ..count..,)) +
geom_density() +
ggtitle("Count of bags by weight")

ggplot(d, aes(bag_weight, ..count..,fill=bag_item_q)) +
geom_density() +
facet_wrap(~bag_item_q, ncol= 3, scales="free")+
ggtitle("Count of bags by weight diviving by number of items in the bag")

Now we choose the 1000 bags with more weight in them.
d1<-d[bag_items>=3,]
mt <- head(d1[order(d1$bag_weight,decreasing=TRUE), ],1000)
summary(mt)
bag_weight bag_items bag_name bag_item_q
Min. :32.90 Min. : 3.000 Min. : 1.0 Low :541
1st Qu.:40.47 1st Qu.: 5.000 1st Qu.: 307.8 Medium:429
Median :44.13 Median : 6.000 Median : 614.5 High : 30
Mean :43.12 Mean : 6.524 Mean : 617.8
3rd Qu.:46.34 3rd Qu.: 8.000 3rd Qu.: 923.2
Max. :48.00 Max. :16.000 Max. :1242.0
Total_Weight<-sum(mt$bag_weight)
Total_Weight
[1] 43122.35
The total Weight for this solutions is 43122 lb.
Exporting the solution
summary(sample_gifts_f)
GiftId Toy Weight Bag
ball_0 : 1 book :1123 Min. : 0.000 Min. : 1.0
ball_1 : 1 ball :1023 1st Qu.: 2.044 1st Qu.: 303.0
ball_10 : 1 doll : 939 Median : 4.564 Median : 608.0
ball_100 : 1 horse : 938 Mean : 6.351 Mean : 615.2
ball_1000: 1 blocks : 930 3rd Qu.: 9.322 3rd Qu.: 924.0
ball_1001: 1 train : 926 Max. :39.386 Max. :1242.0
(Other) :6518 (Other): 645
---
title: "SANTAS GIFTS"
output: html_notebook
---
♫ Bells are ringing, children singing, all is merry and bright. Santa's elves made a big mistake, now he needs your help tonight ♫

Helping Santa filling his baggs.

#Libraries#
```{r, warning=FALSE}
library(stringr)
library(ggplot2)
library(triangle)
library(data.table)
```

#Data Load# 
```{r, warning=FALSE}
gifts   <- read.csv('input/gifts.csv')
#Tomo la priper palabra de cada regalo
gifts$Toy <- factor(str_extract(gifts$GiftId, pattern = "[a-z]+")) 

#Genero un gráfico para mostrar la distribución de regalos Santa
ggplot(data= gifts, aes(x= reorder(Toy, -table(Toy)[Toy]) , fill =Toy ))+
  geom_bar(stat="count") + 
  theme_classic() +
  xlab("Toy")+
  guides(fill = FALSE)

```

#Gift Weight Probability Distribution#
##We compute the weight distribution for each gift##
```{r}
# Taken from Ben Gorman's kernel. Thank you.
# https://www.kaggle.com/ben519/santas-uncertain-bags/merry-christmas-y-all

#horse = max(0, np.random.normal(5,2,1)[0])
sample_horse <- function(n) pmax(0, rnorm(n=n, mean=5, sd=2))
#ball = max(0, 1 + np.random.normal(1,0.3,1)[0])
sample_ball <- function (n) pmax(0, 1 + rnorm(n=n,mean=1,sd=0.3))
#bike = max(0, np.random.normal(20,10,1)[0])
sample_bike <- function (n) pmax(0, rnorm(n=n,mean=20, sd=10))
#train = max(0, np.random.normal(10,5,1)[0])
sample_train <- function (n) pmax(0, rnorm(n=n, mean=10, sd=5))
#coal = 47 * np.random.beta(0.5,0.5,1)[0]
sample_coal <- function (n) 47 * rbeta(n=n, shape1=0.5, shape2=0.5)
#book = np.random.chisquare(2,1)[0]
sample_book <- function (n) rchisq(n = n, df = 2)
#doll = np.random.gamma(5,1,1)[0]
sample_doll <- function (n) rgamma(n=n, shape=5, rate=1)
#block = np.random.triangular(5,10,20,1)[0]
sample_blocks<- function(n) if(n == 0) return(numeric(0)) else return(rtriangle(n=n, a=5, c=10, b=20))
#gloves = 3.0 + np.random.rand(1)[0] if np.random.rand(1) < 0.3 else np.random.rand(1)[0]
sample_gloves <- function(n) runif(n=n, min=0, max=1) + (runif(n=n, min=0, max=1) < .3) * 3

set.seed(1234)
gifts$Weight<-sapply(gifts$Toy,function(x) do.call(paste("sample",x, sep="_"), as.list(1)))

```
#Let's visualizate the weights distributions#
```{r}
ggplot(data= gifts, aes (x= Weight, fill=Toy)) +
 geom_density() +
  facet_wrap(~Toy, ncol= 3, scales="free")+
  theme_bw() + 
  guides(fill=FALSE) +
  ggtitle("Density plots of the toy's weight by toy type")
```

#Let's visualizate a boxplot with weights distributions#
```{r}
ggplot (data= gifts, aes (y= Weight, x=Toy, fill=Toy))+
  geom_boxplot()+
   guides(fill=FALSE) +
  ggtitle("Boxplots of weight distribution by toy type")
```

#Evaluation#

As the Evaluation tab tells us "Submissions are evaluated on the total amount of weight you fit into Santa's 1000 bags", so we need to maximize the total weight of the bags. The important key here is that our primary objective is to make same carry as much weight as possible, without taking into account the total number of gifts.

We have then, a maximization function for all the gifts of all the bags:
$$\sum_{i=1}^{n}\left( \sum_{j=1}^{m}\ Weight_i*X_{i,j} \right)$$
Where each $Weight_i$ represents the Weight of each gift and each $X_{i,j}$ represents if the gift $i$ goes into the bag $j$.

$i$ goes from gift $1$ to gift $n$.
$j$ goes from bag $1$ to bag $m$.

Constraint Number 1: -Each bag has to weight less that 50 lb.
$$ \sum_{j=1}^{m}\ Weight_i*X_{i,j} \le 50 kg\  \forall i = 1,..,n $$
Constraint Number 2: -Each bag has to carry at least 3 items.

$$\sum_{i=1}^{n}\ X_{i,j} \ge 3\    \forall j = 1,..,m $$

Constraint Number 3: An item can be only in one bag.

$$ \sum_{j=1}^{m}\ X_{i,j} \le 1 \  \forall i = 1,..,n $$
Contraint Number 4: $X_{i,j}$ is binary 
$$X_{i,j} \in\ \{ {0,1} \} \ \forall i = 1,..,n \ j = 1,..,m $$ 

#First Approach: Random path#

In this first approach I'll generate a random list of all the gifts and cut this list into bags when they are about to get 50 kg.

```{r}
set.seed(2223)
sample_gifts<- data.frame(GiftId=integer(),Toy=character(),Weight=double, Bag=integer())
sampleGiftId<-sample(gifts$GiftId)
sampleToy <- factor(str_extract(sampleGiftId, pattern = "[a-z]+")) 
sampleWeight<-sapply(sampleToy,function(x) do.call(paste("sample",x, sep="_"), as.list(1)))
sampleBag<-rep(0,length(sampleToy))

sample_gifts<-data.frame(GiftId=sampleGiftId,Toy=sampleToy,Weight=sampleWeight,Bag=sampleBag)
bag_weight<-NULL
bag_items<-NULL
bag_name<-NULL
bag_name[1]<-1
bag_items[1]<-0;
bagweight=0
j=-1

for (i in 1:length(sample_gifts$Toy))
{
  if (sample_gifts$Weight[i]<50)
  {
    bagweight = bagweight + sample_gifts$Weight[i]
    if (bagweight>48) {
      
      j = j + 1
      bag_weight[j]<-bagweight - sample_gifts$Weight[i]
      bag_items[j]<-0
      bag_name[j]<-j
      bagweight = sample_gifts$Weight[i]
    }
   sample_gifts$Bag[i]=j
   bag_items[j]<-bag_items[j]+1
  }
}


summary(bag_weight)
length(bag_weight)
summary(bag_name)
     
```
We take out bags that do not respect the constraints. (Less than 3)


Now I show the weight distribution per bag.

```{r}
d<-data.frame(bag_weight,bag_items,bag_name)
f1<-cut(bag_items,3)
levels(f1)<-c("Low","Medium","High")

d$bag_item_q<-f1

  ggplot(d, aes(bag_weight, ..count..,)) +
  geom_density() +
  ggtitle("Count of bags by weight")
    
  ggplot(d, aes(bag_items, ..count..,)) +
  geom_density() +
  ggtitle("Count of bags by weight")
    
   ggplot(d, aes(bag_weight, ..count..,fill=bag_item_q)) +
  geom_density() +
  facet_wrap(~bag_item_q, ncol= 3, scales="free")+
  ggtitle("Count of bags by weight diviving by number of items in the bag")


```



Now we choose the 1000 bags with more weight in them.
```{r}
d1<-d[bag_items>=3,]
mt <- head(d1[order(d1$bag_weight,decreasing=TRUE), ],1000)
summary(mt)
Total_Weight<-sum(mt$bag_weight)
Total_Weight



```
The total Weight for this solutions is `r format(Total_Weight,digits=1)`  lb.

# Exporting the solution
```{r}
#I need to check in the original gift which gifts are in the bags that  exist in the solution 
# and  for eah bag generate a line with the gifts

mt<-mt[order(mt$bag_name,decreasing=FALSE),]
sample_gifts2<-sample_gifts[order(sample_gifts$Bag,decreasing=FALSE),]
sample_gifts_f<-sample_gifts[sample_gifts$Bag %in% mt$bag_name,]
sample_gifts_f<-data.frame(sample_gifts_f)
summary(sample_gifts_f)

tot_result <- aggregate(GiftId~Bag, data = sample_gifts_f, paste, collapse = " ")
result<-list(tot_result$GiftId) 
fwrite(result, file ="result.csv")

```




