♫ Bells are ringing, children singing, all is merry and bright. Santa’s elves made a big mistake, now he needs your help tonight ♫

Helping Santa filling his baggs.

Libraries

library(stringr)
library(ggplot2)
library(triangle)
library(data.table)

Data Load

gifts   <- read.csv('input/gifts.csv')
#Tomo la priper palabra de cada regalo
gifts$Toy <- factor(str_extract(gifts$GiftId, pattern = "[a-z]+")) 
#Genero un gráfico para mostrar la distribución de regalos Santa
ggplot(data= gifts, aes(x= reorder(Toy, -table(Toy)[Toy]) , fill =Toy ))+
  geom_bar(stat="count") + 
  theme_classic() +
  xlab("Toy")+
  guides(fill = FALSE)

Gift Weight Probability Distribution

We compute the weight distribution for each gift

sample_horse
function(n) pmax(0, rnorm(n=n, mean=5, sd=2))

Let’s visualizate the weights distributions

ggplot(data= gifts, aes (x= Weight, fill=Toy)) +
 geom_density() +
  facet_wrap(~Toy, ncol= 3, scales="free")+
  theme_bw() + 
  guides(fill=FALSE) +
  ggtitle("Density plots of the toy's weight by toy type")

Let’s visualizate a boxplot with weights distributions

ggplot (data= gifts, aes (y= Weight, x=Toy, fill=Toy))+
  geom_boxplot()+
   guides(fill=FALSE) +
  ggtitle("Boxplots of weight distribution by toy type")

Evaluation

As the Evaluation tab tells us “Submissions are evaluated on the total amount of weight you fit into Santa’s 1000 bags”, so we need to maximize the total weight of the bags. The important key here is that our primary objective is to make same carry as much weight as possible, without taking into account the total number of gifts.

We have then, a maximization function for all the gifts of all the bags: \[\sum_{i=1}^{n}\left( \sum_{j=1}^{m}\ Weight_i*X_{i,j} \right)\] Where each \(Weight_i\) represents the Weight of each gift and each \(X_{i,j}\) represents if the gift \(i\) goes into the bag \(j\).

\(i\) goes from gift \(1\) to gift \(n\). \(j\) goes from bag \(1\) to bag \(m\).

Constraint Number 1: -Each bag has to weight less that 50 lb. \[ \sum_{j=1}^{m}\ Weight_i*X_{i,j} \le 50 kg\ \forall i = 1,..,n \] Constraint Number 2: -Each bag has to carry at least 3 items.

\[\sum_{i=1}^{n}\ X_{i,j} \ge 3\ \forall j = 1,..,m \]

Constraint Number 3: An item can be only in one bag.

\[ \sum_{j=1}^{m}\ X_{i,j} \le 1 \ \forall i = 1,..,n \] Contraint Number 4: \(X_{i,j}\) is binary \[X_{i,j} \in\ \{ {0,1} \} \ \forall i = 1,..,n \ j = 1,..,m \]

First Approach: Random path

In this first approach I’ll generate a random list of all the gifts and cut this list into bags when they are about to get 50 kg.

set.seed(2223)
sample_gifts<- data.frame(GiftId=integer(),Toy=character(),Weight=double, Bag=integer())
sampleGiftId<-sample(gifts$GiftId)
sampleToy <- factor(str_extract(sampleGiftId, pattern = "[a-z]+")) 
sampleWeight<-sapply(sampleToy,function(x) do.call(paste("sample",x, sep="_"), as.list(1)))
sampleBag<-rep(0,length(sampleToy))
sample_gifts<-data.frame(GiftId=sampleGiftId,Toy=sampleToy,Weight=sampleWeight,Bag=sampleBag)
bag_weight<-NULL
bag_items<-NULL
bag_name<-NULL
bag_name[1]<-1
bag_items[1]<-0;
bagweight=0
j=-1
for (i in 1:length(sample_gifts$Toy))
{
  if (sample_gifts$Weight[i]<50)
  {
    bagweight = bagweight + sample_gifts$Weight[i]
    if (bagweight>48) {
      
      j = j + 1
      bag_weight[j]<-bagweight - sample_gifts$Weight[i]
      bag_items[j]<-0
      bag_name[j]<-j
      bagweight = sample_gifts$Weight[i]
    }
   sample_gifts$Bag[i]=j
   bag_items[j]<-bag_items[j]+1
  }
}
summary(bag_weight)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  6.692  38.260  43.260  40.760  45.990  48.000 
length(bag_weight)
[1] 1243
summary(bag_name)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    1.0   311.5   622.0   622.0   932.5  1243.0 
     

We take out bags that do not respect the constraints. (Less than 3)

Now I show the weight distribution per bag.

d<-data.frame(bag_weight,bag_items,bag_name)
f1<-cut(bag_items,3)
levels(f1)<-c("Low","Medium","High")
d$bag_item_q<-f1
  ggplot(d, aes(bag_weight, ..count..,)) +
  geom_density() +
  ggtitle("Count of bags by weight")

    
  ggplot(d, aes(bag_items, ..count..,)) +
  geom_density() +
  ggtitle("Count of bags by weight")

    
   ggplot(d, aes(bag_weight, ..count..,fill=bag_item_q)) +
  geom_density() +
  facet_wrap(~bag_item_q, ncol= 3, scales="free")+
  ggtitle("Count of bags by weight diviving by number of items in the bag")

Now we choose the 1000 bags with more weight in them.

d1<-d[bag_items>=3,]
mt <- head(d1[order(d1$bag_weight,decreasing=TRUE), ],1000)
summary(mt)
   bag_weight      bag_items         bag_name       bag_item_q 
 Min.   :32.90   Min.   : 3.000   Min.   :   1.0   Low   :541  
 1st Qu.:40.47   1st Qu.: 5.000   1st Qu.: 307.8   Medium:429  
 Median :44.13   Median : 6.000   Median : 614.5   High  : 30  
 Mean   :43.12   Mean   : 6.524   Mean   : 617.8               
 3rd Qu.:46.34   3rd Qu.: 8.000   3rd Qu.: 923.2               
 Max.   :48.00   Max.   :16.000   Max.   :1242.0               
Total_Weight<-sum(mt$bag_weight)
Total_Weight
[1] 43122.35

The total Weight for this solutions is 43122 lb.

Exporting the solution

summary(sample_gifts_f)
       GiftId          Toy           Weight            Bag        
 ball_0   :   1   book   :1123   Min.   : 0.000   Min.   :   1.0  
 ball_1   :   1   ball   :1023   1st Qu.: 2.044   1st Qu.: 303.0  
 ball_10  :   1   doll   : 939   Median : 4.564   Median : 608.0  
 ball_100 :   1   horse  : 938   Mean   : 6.351   Mean   : 615.2  
 ball_1000:   1   blocks : 930   3rd Qu.: 9.322   3rd Qu.: 924.0  
 ball_1001:   1   train  : 926   Max.   :39.386   Max.   :1242.0  
 (Other)  :6518   (Other): 645                                    
---
title: "SANTAS GIFTS"
output: html_notebook
---
♫ Bells are ringing, children singing, all is merry and bright. Santa's elves made a big mistake, now he needs your help tonight ♫

Helping Santa filling his baggs.

#Libraries#
```{r, warning=FALSE}
library(stringr)
library(ggplot2)
library(triangle)
library(data.table)
```

#Data Load# 
```{r, warning=FALSE}
gifts   <- read.csv('input/gifts.csv')
#Tomo la priper palabra de cada regalo
gifts$Toy <- factor(str_extract(gifts$GiftId, pattern = "[a-z]+")) 

#Genero un gráfico para mostrar la distribución de regalos Santa
ggplot(data= gifts, aes(x= reorder(Toy, -table(Toy)[Toy]) , fill =Toy ))+
  geom_bar(stat="count") + 
  theme_classic() +
  xlab("Toy")+
  guides(fill = FALSE)

```

#Gift Weight Probability Distribution#
##We compute the weight distribution for each gift##
```{r}
# Taken from Ben Gorman's kernel. Thank you.
# https://www.kaggle.com/ben519/santas-uncertain-bags/merry-christmas-y-all

#horse = max(0, np.random.normal(5,2,1)[0])
sample_horse <- function(n) pmax(0, rnorm(n=n, mean=5, sd=2))
#ball = max(0, 1 + np.random.normal(1,0.3,1)[0])
sample_ball <- function (n) pmax(0, 1 + rnorm(n=n,mean=1,sd=0.3))
#bike = max(0, np.random.normal(20,10,1)[0])
sample_bike <- function (n) pmax(0, rnorm(n=n,mean=20, sd=10))
#train = max(0, np.random.normal(10,5,1)[0])
sample_train <- function (n) pmax(0, rnorm(n=n, mean=10, sd=5))
#coal = 47 * np.random.beta(0.5,0.5,1)[0]
sample_coal <- function (n) 47 * rbeta(n=n, shape1=0.5, shape2=0.5)
#book = np.random.chisquare(2,1)[0]
sample_book <- function (n) rchisq(n = n, df = 2)
#doll = np.random.gamma(5,1,1)[0]
sample_doll <- function (n) rgamma(n=n, shape=5, rate=1)
#block = np.random.triangular(5,10,20,1)[0]
sample_blocks<- function(n) if(n == 0) return(numeric(0)) else return(rtriangle(n=n, a=5, c=10, b=20))
#gloves = 3.0 + np.random.rand(1)[0] if np.random.rand(1) < 0.3 else np.random.rand(1)[0]
sample_gloves <- function(n) runif(n=n, min=0, max=1) + (runif(n=n, min=0, max=1) < .3) * 3

set.seed(1234)
gifts$Weight<-sapply(gifts$Toy,function(x) do.call(paste("sample",x, sep="_"), as.list(1)))

```
#Let's visualizate the weights distributions#
```{r}
ggplot(data= gifts, aes (x= Weight, fill=Toy)) +
 geom_density() +
  facet_wrap(~Toy, ncol= 3, scales="free")+
  theme_bw() + 
  guides(fill=FALSE) +
  ggtitle("Density plots of the toy's weight by toy type")
```

#Let's visualizate a boxplot with weights distributions#
```{r}
ggplot (data= gifts, aes (y= Weight, x=Toy, fill=Toy))+
  geom_boxplot()+
   guides(fill=FALSE) +
  ggtitle("Boxplots of weight distribution by toy type")
```

#Evaluation#

As the Evaluation tab tells us "Submissions are evaluated on the total amount of weight you fit into Santa's 1000 bags", so we need to maximize the total weight of the bags. The important key here is that our primary objective is to make same carry as much weight as possible, without taking into account the total number of gifts.

We have then, a maximization function for all the gifts of all the bags:
$$\sum_{i=1}^{n}\left( \sum_{j=1}^{m}\ Weight_i*X_{i,j} \right)$$
Where each $Weight_i$ represents the Weight of each gift and each $X_{i,j}$ represents if the gift $i$ goes into the bag $j$.

$i$ goes from gift $1$ to gift $n$.
$j$ goes from bag $1$ to bag $m$.

Constraint Number 1: -Each bag has to weight less that 50 lb.
$$ \sum_{j=1}^{m}\ Weight_i*X_{i,j} \le 50 kg\  \forall i = 1,..,n $$
Constraint Number 2: -Each bag has to carry at least 3 items.

$$\sum_{i=1}^{n}\ X_{i,j} \ge 3\    \forall j = 1,..,m $$

Constraint Number 3: An item can be only in one bag.

$$ \sum_{j=1}^{m}\ X_{i,j} \le 1 \  \forall i = 1,..,n $$
Contraint Number 4: $X_{i,j}$ is binary 
$$X_{i,j} \in\ \{ {0,1} \} \ \forall i = 1,..,n \ j = 1,..,m $$ 

#First Approach: Random path#

In this first approach I'll generate a random list of all the gifts and cut this list into bags when they are about to get 50 kg.

```{r}
set.seed(2223)
sample_gifts<- data.frame(GiftId=integer(),Toy=character(),Weight=double, Bag=integer())
sampleGiftId<-sample(gifts$GiftId)
sampleToy <- factor(str_extract(sampleGiftId, pattern = "[a-z]+")) 
sampleWeight<-sapply(sampleToy,function(x) do.call(paste("sample",x, sep="_"), as.list(1)))
sampleBag<-rep(0,length(sampleToy))

sample_gifts<-data.frame(GiftId=sampleGiftId,Toy=sampleToy,Weight=sampleWeight,Bag=sampleBag)
bag_weight<-NULL
bag_items<-NULL
bag_name<-NULL
bag_name[1]<-1
bag_items[1]<-0;
bagweight=0
j=-1

for (i in 1:length(sample_gifts$Toy))
{
  if (sample_gifts$Weight[i]<50)
  {
    bagweight = bagweight + sample_gifts$Weight[i]
    if (bagweight>48) {
      
      j = j + 1
      bag_weight[j]<-bagweight - sample_gifts$Weight[i]
      bag_items[j]<-0
      bag_name[j]<-j
      bagweight = sample_gifts$Weight[i]
    }
   sample_gifts$Bag[i]=j
   bag_items[j]<-bag_items[j]+1
  }
}


summary(bag_weight)
length(bag_weight)
summary(bag_name)
     
```
We take out bags that do not respect the constraints. (Less than 3)


Now I show the weight distribution per bag.

```{r}
d<-data.frame(bag_weight,bag_items,bag_name)
f1<-cut(bag_items,3)
levels(f1)<-c("Low","Medium","High")

d$bag_item_q<-f1

  ggplot(d, aes(bag_weight, ..count..,)) +
  geom_density() +
  ggtitle("Count of bags by weight")
    
  ggplot(d, aes(bag_items, ..count..,)) +
  geom_density() +
  ggtitle("Count of bags by weight")
    
   ggplot(d, aes(bag_weight, ..count..,fill=bag_item_q)) +
  geom_density() +
  facet_wrap(~bag_item_q, ncol= 3, scales="free")+
  ggtitle("Count of bags by weight diviving by number of items in the bag")


```



Now we choose the 1000 bags with more weight in them.
```{r}
d1<-d[bag_items>=3,]
mt <- head(d1[order(d1$bag_weight,decreasing=TRUE), ],1000)
summary(mt)
Total_Weight<-sum(mt$bag_weight)
Total_Weight



```
The total Weight for this solutions is `r format(Total_Weight,digits=1)`  lb.

# Exporting the solution
```{r}
#I need to check in the original gift which gifts are in the bags that  exist in the solution 
# and  for eah bag generate a line with the gifts

mt<-mt[order(mt$bag_name,decreasing=FALSE),]
sample_gifts2<-sample_gifts[order(sample_gifts$Bag,decreasing=FALSE),]
sample_gifts_f<-sample_gifts[sample_gifts$Bag %in% mt$bag_name,]
sample_gifts_f<-data.frame(sample_gifts_f)
summary(sample_gifts_f)

tot_result <- aggregate(GiftId~Bag, data = sample_gifts_f, paste, collapse = " ")
result<-list(tot_result$GiftId) 
fwrite(result, file ="result.csv")

```




