library(dplyr)
library(ggplot2)
library(Amelia)
train<-read.csv("train.csv",stringsAsFactors = FALSE, na.strings = "")
test<-read.csv("test.csv",stringsAsFactors = FALSE, na.strings = "")
# Agrega como SouthHampton a Embark si falta la data
train$Embarked<-ifelse(is.na(train$Embarked),
                       yes = "S",
                       no = train$Embarked)
test$Embarked<-ifelse(is.na(test$Embarked),
                      yes = "S",
                      no = test$Embarked)
missmap(train)

# Función para extraer el titulo del nombre
extract_title <- function(name){
  y<-strsplit(name,split = "[.,]")
  y<-y[[1]][2]
  return(trimws(y))
}

# Agregamos la columna del titulo
train$Title<-sapply(train$Name, FUN = extract_title,USE.NAMES = FALSE)
test$Title<-sapply(test$Name, FUN = extract_title,USE.NAMES = FALSE)

table(train$Title, train$Sex)

train$Title<-ifelse(train$Title %in% c("Miss","Mlle","Lady","Ms"),
                    yes = "Miss",
                    no =  train$Title)

test$Title<-ifelse(test$Title %in% c("Miss","Mlle", "Lady", "Ms"),
                   yes = "Miss",
                   no =  test$Title)

train$Title<-ifelse(train$Title %in% c("Mrs","Mme", "Dona"),
                    yes = "Mrs",
                    no =  train$Title)

test$Title<-ifelse(test$Title %in% c("Mrs","Mme", "Dona"),
                   yes = "Mrs",
                   no =  test$Title)



train$Adult<-ifelse(train$Age > 12,yes = 1, no = 0)
test$Adult<-ifelse(test$Age > 12,yes = 1, no = 0)

train$Adult<-ifelse(is.na(train$Age) & trimws(train$Title) == "Master" ,
                    yes = 0,
                    no = ifelse(is.na(train$Age),
                                yes = 1,
                                no = train$Adult)   
)
test$Adult<-ifelse(is.na(test$Age) & trimws(test$Title) == "Master" ,
                   yes = 0,
                   no = ifelse(is.na(test$Age),
                               yes = 1,
                               no = test$Adult)   
)


summary(train$Fare)

test$Fare <- ifelse(is.na(test$Fare),
                    yes = median(train$Fare),
                    no=test$Fare)

modelo1<-lm(data = train, 
            formula = Survived ~ Adult +
              Title + Embarked + Fare +
              Sex + Pclass)

prediccion<-predict(object = modelo1,newdata = test)
prediccion <- ifelse(prediccion>0.5, yes = 1, no = 0)
prediccion <- data.frame(PassengerId = test$PassengerId,
                          Survived = prediccion)

write.csv(prediccion, file="ModAdultTitle.csv",row.names = FALSE)
LS0tCnRpdGxlOiAiQ2xhc2lmaWNhY2nDs24iCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCmBgYHtyIGxpYnJlcmlhc30KbGlicmFyeShkcGx5cikKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KEFtZWxpYSkKYGBgCgpgYGB7cn0KdHJhaW48LXJlYWQuY3N2KCJ0cmFpbi5jc3YiLHN0cmluZ3NBc0ZhY3RvcnMgPSBGQUxTRSwgbmEuc3RyaW5ncyA9ICIiKQp0ZXN0PC1yZWFkLmNzdigidGVzdC5jc3YiLHN0cmluZ3NBc0ZhY3RvcnMgPSBGQUxTRSwgbmEuc3RyaW5ncyA9ICIiKQpgYGAKCgoKYGBge3J9CiMgQWdyZWdhIGNvbW8gU291dGhIYW1wdG9uIGEgRW1iYXJrIHNpIGZhbHRhIGxhIGRhdGEKdHJhaW4kRW1iYXJrZWQ8LWlmZWxzZShpcy5uYSh0cmFpbiRFbWJhcmtlZCksCiAgICAgICAgICAgICAgICAgICAgICAgeWVzID0gIlMiLAogICAgICAgICAgICAgICAgICAgICAgIG5vID0gdHJhaW4kRW1iYXJrZWQpCnRlc3QkRW1iYXJrZWQ8LWlmZWxzZShpcy5uYSh0ZXN0JEVtYmFya2VkKSwKICAgICAgICAgICAgICAgICAgICAgIHllcyA9ICJTIiwKICAgICAgICAgICAgICAgICAgICAgIG5vID0gdGVzdCRFbWJhcmtlZCkKbWlzc21hcCh0cmFpbikKCiMgRnVuY2nDs24gcGFyYSBleHRyYWVyIGVsIHRpdHVsbyBkZWwgbm9tYnJlCmV4dHJhY3RfdGl0bGUgPC0gZnVuY3Rpb24obmFtZSl7CiAgeTwtc3Ryc3BsaXQobmFtZSxzcGxpdCA9ICJbLixdIikKICB5PC15W1sxXV1bMl0KICByZXR1cm4odHJpbXdzKHkpKQp9CgojIEFncmVnYW1vcyBsYSBjb2x1bW5hIGRlbCB0aXR1bG8KdHJhaW4kVGl0bGU8LXNhcHBseSh0cmFpbiROYW1lLCBGVU4gPSBleHRyYWN0X3RpdGxlLFVTRS5OQU1FUyA9IEZBTFNFKQp0ZXN0JFRpdGxlPC1zYXBwbHkodGVzdCROYW1lLCBGVU4gPSBleHRyYWN0X3RpdGxlLFVTRS5OQU1FUyA9IEZBTFNFKQoKdGFibGUodHJhaW4kVGl0bGUsIHRyYWluJFNleCkKCnRyYWluJFRpdGxlPC1pZmVsc2UodHJhaW4kVGl0bGUgJWluJSBjKCJNaXNzIiwiTWxsZSIsIkxhZHkiLCJNcyIpLAogICAgICAgICAgICAgICAgICAgIHllcyA9ICJNaXNzIiwKICAgICAgICAgICAgICAgICAgICBubyA9ICB0cmFpbiRUaXRsZSkKCnRlc3QkVGl0bGU8LWlmZWxzZSh0ZXN0JFRpdGxlICVpbiUgYygiTWlzcyIsIk1sbGUiLCAiTGFkeSIsICJNcyIpLAogICAgICAgICAgICAgICAgICAgeWVzID0gIk1pc3MiLAogICAgICAgICAgICAgICAgICAgbm8gPSAgdGVzdCRUaXRsZSkKCnRyYWluJFRpdGxlPC1pZmVsc2UodHJhaW4kVGl0bGUgJWluJSBjKCJNcnMiLCJNbWUiLCAiRG9uYSIpLAogICAgICAgICAgICAgICAgICAgIHllcyA9ICJNcnMiLAogICAgICAgICAgICAgICAgICAgIG5vID0gIHRyYWluJFRpdGxlKQoKdGVzdCRUaXRsZTwtaWZlbHNlKHRlc3QkVGl0bGUgJWluJSBjKCJNcnMiLCJNbWUiLCAiRG9uYSIpLAogICAgICAgICAgICAgICAgICAgeWVzID0gIk1ycyIsCiAgICAgICAgICAgICAgICAgICBubyA9ICB0ZXN0JFRpdGxlKQoKCgp0cmFpbiRBZHVsdDwtaWZlbHNlKHRyYWluJEFnZSA+IDEyLHllcyA9IDEsIG5vID0gMCkKdGVzdCRBZHVsdDwtaWZlbHNlKHRlc3QkQWdlID4gMTIseWVzID0gMSwgbm8gPSAwKQoKdHJhaW4kQWR1bHQ8LWlmZWxzZShpcy5uYSh0cmFpbiRBZ2UpICYgdHJpbXdzKHRyYWluJFRpdGxlKSA9PSAiTWFzdGVyIiAsCiAgICAgICAgICAgICAgICAgICAgeWVzID0gMCwKICAgICAgICAgICAgICAgICAgICBubyA9IGlmZWxzZShpcy5uYSh0cmFpbiRBZ2UpLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHllcyA9IDEsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbm8gPSB0cmFpbiRBZHVsdCkgICAKKQp0ZXN0JEFkdWx0PC1pZmVsc2UoaXMubmEodGVzdCRBZ2UpICYgdHJpbXdzKHRlc3QkVGl0bGUpID09ICJNYXN0ZXIiICwKICAgICAgICAgICAgICAgICAgIHllcyA9IDAsCiAgICAgICAgICAgICAgICAgICBubyA9IGlmZWxzZShpcy5uYSh0ZXN0JEFnZSksCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICB5ZXMgPSAxLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbm8gPSB0ZXN0JEFkdWx0KSAgIAopCgoKc3VtbWFyeSh0cmFpbiRGYXJlKQoKdGVzdCRGYXJlIDwtIGlmZWxzZShpcy5uYSh0ZXN0JEZhcmUpLAogICAgICAgICAgICAgICAgICAgIHllcyA9IG1lZGlhbih0cmFpbiRGYXJlKSwKICAgICAgICAgICAgICAgICAgICBubz10ZXN0JEZhcmUpCgptb2RlbG8xPC1sbShkYXRhID0gdHJhaW4sIAogICAgICAgICAgICBmb3JtdWxhID0gU3Vydml2ZWQgfiBBZHVsdCArCiAgICAgICAgICAgICAgVGl0bGUgKyBFbWJhcmtlZCArIEZhcmUgKwogICAgICAgICAgICAgIFNleCArIFBjbGFzcykKCnByZWRpY2Npb248LXByZWRpY3Qob2JqZWN0ID0gbW9kZWxvMSxuZXdkYXRhID0gdGVzdCkKcHJlZGljY2lvbiA8LSBpZmVsc2UocHJlZGljY2lvbj4wLjUsIHllcyA9IDEsIG5vID0gMCkKcHJlZGljY2lvbiA8LSBkYXRhLmZyYW1lKFBhc3NlbmdlcklkID0gdGVzdCRQYXNzZW5nZXJJZCwKICAgICAgICAgICAgICAgICAgICAgICAgICBTdXJ2aXZlZCA9IHByZWRpY2Npb24pCgp3cml0ZS5jc3YocHJlZGljY2lvbiwgZmlsZT0iTW9kQWR1bHRUaXRsZS5jc3YiLHJvdy5uYW1lcyA9IEZBTFNFKQpgYGAKCgoKCgo=