Spaceship Titanic , 1997 yılında çıkan bir macera oyunudur. Lüks bir uzay gemisi olan titanic’in içinde geçer. Oyuncunun bulmacaları çözmesi ve karakterler’le etkileşimde bulunarak Spaceship Titanic’in hikayesini keşfetmesi gerekiyor. Gerçekten eğlencili bir oyun!
library(readr)
train <- read_csv("train.csv")
library(readr)
test <- read_csv("test.csv")
library(readr)
test <- read_csv("test.csv")
train <- read_csv("train.csv")
str(train)
## spc_tbl_ [8,693 × 14] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ PassengerId : chr [1:8693] "0001_01" "0002_01" "0003_01" "0003_02" ...
## $ HomePlanet : chr [1:8693] "Europa" "Earth" "Europa" "Europa" ...
## $ CryoSleep : logi [1:8693] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ Cabin : chr [1:8693] "B/0/P" "F/0/S" "A/0/S" "A/0/S" ...
## $ Destination : chr [1:8693] "TRAPPIST-1e" "TRAPPIST-1e" "TRAPPIST-1e" "TRAPPIST-1e" ...
## $ Age : num [1:8693] 39 24 58 33 16 44 26 28 35 14 ...
## $ VIP : logi [1:8693] FALSE FALSE TRUE FALSE FALSE FALSE ...
## $ RoomService : num [1:8693] 0 109 43 0 303 0 42 0 0 0 ...
## $ FoodCourt : num [1:8693] 0 9 3576 1283 70 ...
## $ ShoppingMall: num [1:8693] 0 25 0 371 151 0 3 0 17 0 ...
## $ Spa : num [1:8693] 0 549 6715 3329 565 ...
## $ VRDeck : num [1:8693] 0 44 49 193 2 0 0 NA 0 0 ...
## $ Name : chr [1:8693] "Maham Ofracculy" "Juanna Vines" "Altark Susent" "Solam Susent" ...
## $ Transported : logi [1:8693] FALSE TRUE FALSE FALSE TRUE TRUE ...
## - attr(*, "spec")=
## .. cols(
## .. PassengerId = col_character(),
## .. HomePlanet = col_character(),
## .. CryoSleep = col_logical(),
## .. Cabin = col_character(),
## .. Destination = col_character(),
## .. Age = col_double(),
## .. VIP = col_logical(),
## .. RoomService = col_double(),
## .. FoodCourt = col_double(),
## .. ShoppingMall = col_double(),
## .. Spa = col_double(),
## .. VRDeck = col_double(),
## .. Name = col_character(),
## .. Transported = col_logical()
## .. )
## - attr(*, "problems")=<externalptr>
Bazı boşluklar var, boşlukları kapatmak için Str yükliyoruz. Passengerld aile sıra numarası verir.
str(test)
## spc_tbl_ [4,277 × 13] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ PassengerId : chr [1:4277] "0013_01" "0018_01" "0019_01" "0021_01" ...
## $ HomePlanet : chr [1:4277] "Earth" "Earth" "Europa" "Europa" ...
## $ CryoSleep : logi [1:4277] TRUE FALSE TRUE FALSE FALSE FALSE ...
## $ Cabin : chr [1:4277] "G/3/S" "F/4/S" "C/0/S" "C/1/S" ...
## $ Destination : chr [1:4277] "TRAPPIST-1e" "TRAPPIST-1e" "55 Cancri e" "TRAPPIST-1e" ...
## $ Age : num [1:4277] 27 19 31 38 20 31 21 20 23 24 ...
## $ VIP : logi [1:4277] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ RoomService : num [1:4277] 0 0 0 0 10 0 0 0 0 0 ...
## $ FoodCourt : num [1:4277] 0 9 0 6652 0 ...
## $ ShoppingMall: num [1:4277] 0 0 0 0 635 263 0 0 0 0 ...
## $ Spa : num [1:4277] 0 2823 0 181 0 ...
## $ VRDeck : num [1:4277] 0 0 0 585 0 60 0 0 0 0 ...
## $ Name : chr [1:4277] "Nelly Carsoning" "Lerome Peckers" "Sabih Unhearfus" "Meratz Caltilter" ...
## - attr(*, "spec")=
## .. cols(
## .. PassengerId = col_character(),
## .. HomePlanet = col_character(),
## .. CryoSleep = col_logical(),
## .. Cabin = col_character(),
## .. Destination = col_character(),
## .. Age = col_double(),
## .. VIP = col_logical(),
## .. RoomService = col_double(),
## .. FoodCourt = col_double(),
## .. ShoppingMall = col_double(),
## .. Spa = col_double(),
## .. VRDeck = col_double(),
## .. Name = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
library(DataExplorer)
create_report(train)
##
|
| | 0%
|
|. | 2%
|
|.. | 5% [global_options]
|
|... | 7%
|
|.... | 10% [introduce]
|
|.... | 12%
|
|..... | 14% [plot_intro]
|
|...... | 17%
|
|....... | 19% [data_structure]
|
|........ | 21%
|
|......... | 24% [missing_profile]
|
|.......... | 26%
|
|........... | 29% [univariate_distribution_header]
|
|........... | 31%
|
|............ | 33% [plot_histogram]
|
|............. | 36%
|
|.............. | 38% [plot_density]
|
|............... | 40%
|
|................ | 43% [plot_frequency_bar]
|
|................. | 45%
|
|.................. | 48% [plot_response_bar]
|
|.................. | 50%
|
|................... | 52% [plot_with_bar]
|
|.................... | 55%
|
|..................... | 57% [plot_normal_qq]
|
|...................... | 60%
|
|....................... | 62% [plot_response_qq]
|
|........................ | 64%
|
|......................... | 67% [plot_by_qq]
|
|.......................... | 69%
|
|.......................... | 71% [correlation_analysis]
|
|........................... | 74%
|
|............................ | 76% [principal_component_analysis]
|
|............................. | 79%
|
|.............................. | 81% [bivariate_distribution_header]
|
|............................... | 83%
|
|................................ | 86% [plot_response_boxplot]
|
|................................. | 88%
|
|................................. | 90% [plot_by_boxplot]
|
|.................................. | 93%
|
|................................... | 95% [plot_response_scatterplot]
|
|.................................... | 98%
|
|.....................................| 100% [plot_by_scatterplot]
## "C:/Program Files/RStudio/resources/app/bin/quarto/bin/tools/pandoc" +RTS -K512m -RTS "C:\Users\NDJELA~1\ONEDRI~1\Desktop\finalpro\REPORT~1.MD" --to html4 --from markdown+autolink_bare_uris+tex_math_single_backslash --output pandoc22c44cae1e44.html --lua-filter "C:\Users\NDJELASSEM\AppData\Local\R\win-library\4.2\rmarkdown\rmarkdown\lua\pagebreak.lua" --lua-filter "C:\Users\NDJELASSEM\AppData\Local\R\win-library\4.2\rmarkdown\rmarkdown\lua\latex-div.lua" --embed-resources --standalone --variable bs3=TRUE --section-divs --table-of-contents --toc-depth 6 --template "C:\Users\NDJELASSEM\AppData\Local\R\win-library\4.2\rmarkdown\rmd\h\default.html" --no-highlight --variable highlightjs=1 --variable theme=yeti --mathjax --variable "mathjax-url=https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML" --include-in-header "C:\Users\NDJELA~1\AppData\Local\Temp\Rtmpym5eyC\rmarkdown-str22c44bc02d0.html"
library(tidyverse)
library(explore)
## Warning: package 'explore' was built under R version 4.2.3
library(tidyverse)
library(explore)
train %>% describe_all()
## # A tibble: 14 × 8
## variable type na na_pct unique min mean max
## <chr> <chr> <int> <dbl> <int> <dbl> <dbl> <dbl>
## 1 PassengerId chr 0 0 8693 NA NA NA
## 2 HomePlanet chr 201 2.3 4 NA NA NA
## 3 CryoSleep lgl 217 2.5 3 0 0.36 1
## 4 Cabin chr 199 2.3 6561 NA NA NA
## 5 Destination chr 182 2.1 4 NA NA NA
## 6 Age dbl 179 2.1 81 0 28.8 79
## 7 VIP lgl 203 2.3 3 0 0.02 1
## 8 RoomService dbl 181 2.1 1274 0 225. 14327
## 9 FoodCourt dbl 183 2.1 1508 0 458. 29813
## 10 ShoppingMall dbl 208 2.4 1116 0 174. 23492
## 11 Spa dbl 183 2.1 1328 0 311. 22408
## 12 VRDeck dbl 188 2.2 1307 0 305. 24133
## 13 Name chr 200 2.3 8474 NA NA NA
## 14 Transported lgl 0 0 2 0 0.5 1
head(train)
## # A tibble: 6 × 14
## PassengerId HomePlanet CryoSleep Cabin Destination Age VIP RoomService
## <chr> <chr> <lgl> <chr> <chr> <dbl> <lgl> <dbl>
## 1 0001_01 Europa FALSE B/0/P TRAPPIST-1e 39 FALSE 0
## 2 0002_01 Earth FALSE F/0/S TRAPPIST-1e 24 FALSE 109
## 3 0003_01 Europa FALSE A/0/S TRAPPIST-1e 58 TRUE 43
## 4 0003_02 Europa FALSE A/0/S TRAPPIST-1e 33 FALSE 0
## 5 0004_01 Earth FALSE F/1/S TRAPPIST-1e 16 FALSE 303
## 6 0005_01 Earth FALSE F/0/P PSO J318.5-22 44 FALSE 0
## # ℹ 6 more variables: FoodCourt <dbl>, ShoppingMall <dbl>, Spa <dbl>,
## # VRDeck <dbl>, Name <chr>, Transported <lgl>
unique(train$CryoSleep)
## [1] FALSE TRUE NA
train$CryoSleep <- addNA(train$Destination)
test$CryoSleep <- addNA(test$Destination)
train %>% describe_all()
## # A tibble: 14 × 8
## variable type na na_pct unique min mean max
## <chr> <chr> <int> <dbl> <int> <dbl> <dbl> <dbl>
## 1 PassengerId chr 0 0 8693 NA NA NA
## 2 HomePlanet chr 201 2.3 4 NA NA NA
## 3 CryoSleep fct 0 0 4 NA NA NA
## 4 Cabin chr 199 2.3 6561 NA NA NA
## 5 Destination chr 182 2.1 4 NA NA NA
## 6 Age dbl 179 2.1 81 0 28.8 79
## 7 VIP lgl 203 2.3 3 0 0.02 1
## 8 RoomService dbl 181 2.1 1274 0 225. 14327
## 9 FoodCourt dbl 183 2.1 1508 0 458. 29813
## 10 ShoppingMall dbl 208 2.4 1116 0 174. 23492
## 11 Spa dbl 183 2.1 1328 0 311. 22408
## 12 VRDeck dbl 188 2.2 1307 0 305. 24133
## 13 Name chr 200 2.3 8474 NA NA NA
## 14 Transported lgl 0 0 2 0 0.5 1
```{r age_means <- train %>% group_by(HomePlanet) %>% summarize(mean_age = mean(Age, na.rm = TRUE))
train_data <- train %>% left_join(age_means, by = “Homeplanet”)
train_data <- train_data %>% mutate(age = ifelse(is.na(Age), mean_age, Age))
```r
train %>% describe_all()
## # A tibble: 14 × 8
## variable type na na_pct unique min mean max
## <chr> <chr> <int> <dbl> <int> <dbl> <dbl> <dbl>
## 1 PassengerId chr 0 0 8693 NA NA NA
## 2 HomePlanet chr 201 2.3 4 NA NA NA
## 3 CryoSleep fct 0 0 4 NA NA NA
## 4 Cabin chr 199 2.3 6561 NA NA NA
## 5 Destination chr 182 2.1 4 NA NA NA
## 6 Age dbl 179 2.1 81 0 28.8 79
## 7 VIP lgl 203 2.3 3 0 0.02 1
## 8 RoomService dbl 181 2.1 1274 0 225. 14327
## 9 FoodCourt dbl 183 2.1 1508 0 458. 29813
## 10 ShoppingMall dbl 208 2.4 1116 0 174. 23492
## 11 Spa dbl 183 2.1 1328 0 311. 22408
## 12 VRDeck dbl 188 2.2 1307 0 305. 24133
## 13 Name chr 200 2.3 8474 NA NA NA
## 14 Transported lgl 0 0 2 0 0.5 1
unique(train$Destination)
## [1] "TRAPPIST-1e" "PSO J318.5-22" "55 Cancri e" NA
hist(train$FoodCourt)
train$VIP <- addNA(train$VIP)
test$VIP <- addNA(test$VIP)
train<- train %>% group_by(Destination)
hist(train$RoomService)
train <- train %>% mutate(FoodCourt = coalesce(FoodCourt, 0))
test <- test %>% mutate(FoodCourt = coalesce(FoodCourt, 0))
train %>% describe_all()
## # A tibble: 14 × 8
## variable type na na_pct unique min mean max
## <chr> <chr> <int> <dbl> <int> <dbl> <dbl> <dbl>
## 1 PassengerId chr 0 0 8693 NA NA NA
## 2 HomePlanet chr 201 2.3 4 NA NA NA
## 3 CryoSleep fct 0 0 4 NA NA NA
## 4 Cabin chr 199 2.3 6561 NA NA NA
## 5 Destination chr 182 2.1 4 NA NA NA
## 6 Age dbl 179 2.1 81 0 28.8 79
## 7 VIP fct 0 0 3 NA NA NA
## 8 RoomService dbl 181 2.1 1274 0 225. 14327
## 9 FoodCourt dbl 0 0 1507 0 448. 29813
## 10 ShoppingMall dbl 208 2.4 1116 0 174. 23492
## 11 Spa dbl 183 2.1 1328 0 311. 22408
## 12 VRDeck dbl 188 2.2 1307 0 305. 24133
## 13 Name chr 200 2.3 8474 NA NA NA
## 14 Transported lgl 0 0 2 0 0.5 1
ggplot(train, aes(x = RoomService))+
geom_histogram(fill = "white", colour = "black")+
facet_grid(Destination ~ .)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 181 rows containing non-finite values (`stat_bin()`).
SVM,“Destek Vektör Makinesi”nin kısaltmasıdır.Bu,makine ögrenimi alanında kullanılan bir sınıflandırma ve regresyon yöntemidir.SVM,verileri iki veya daha fazla sınıfa ayırmak ıçın kullanılır.Veri noktalarını en iyi şekilde ayıran bir bir hiperdüzlem bulmaya çalışır.SVM,birçok uygulama alanında başarılı şonuçlar veren güçlü bir algoritmadır
{r library(e1071) fit_svm <- svm(Transported ~ ., data = training_set, type= 'C-classification', kernel = 'linear')
{r preds <- predict(fit_svm, newdata = testing_set, type = “raw”) %>% data.frame()
{r y_pred = ifelse(preds$. == TRUE, 1, 0)
Decision trees,makine öğrenimi alanında kullanılan bir sınıflandırma ve regresyon yöntemidir.Veri setini ağaç yapısı şeklinde. temsil eder ve her bir düğümde bir karar noktası bulunur.Bu karar noktasına göre veri seti bölünür ve agacın dallarında ilerlenir .
library(rpart)
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 4.2.3
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.2.3
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
##
## combine
## The following object is masked from 'package:ggplot2':
##
## margin
library(caret)
## Warning: package 'caret' was built under R version 4.2.3
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
{r training_set$Transported <- as.factor(training_set$Transported) testing_set$Transported <- as.factor(testing_set$Transported) train_set$Transported <- as.factor(train_set$Transported)
{r fit_tree <- rpart(Transported ~ ., data = training_set) rpart.plot(fit_tree)
Naive Bayes, makine öğrenmesi alanında sınıflandırma problemlerini çözmek için kullanılan bir algoritmadır. Bu algoritma, veri setindeki özelliklerin birbirinden bağımsız olduğunu varsayar ve bu varsayıma dayanarak sınıflandırma yapar. Örneğin, bir e-postanın spam veya spam olmayan bir e-posta olduğunu belirlemek için kullanılabilir. Bu algoritma, e-postanın içerdiği kelimelerin olasılıklarını hesaplar ve bu olasılıkları kullanarak e-postayı bir sınıfa atar.
{r library(e1071) fit_nb <- naiveBayes(Transported ~ ., data = training_set) preds <- predict(fit_nb, newdata =testing_set[-11], type ="raw") %>% data.frame() y_pred = ifelse(preds$TRUE. > 0.5, 1, 0)
{r cm = table(y_true, y_pred)
{r cm
{r ## y_pred ## y_true 0 1 ## 0 510 569 ## 1 78 1016
(510 + 1016)/(510 + 1016 + 569+ 78)
## [1] 0.7022549
{r nb_son = naiveBayes(Transported ~ ., data = train_set) perd <- predict(nb_son, newdata = test_set, type = "raw") %>% data.frame()
{r y_pred = ifelse(preds$TRUE. > 0.5, TRUE, FALSE)
{r Transported <- as.character(y_pred) PassengerId <- test$PassengerId Transported <- as.vector(Transported) submission <- cbind(PassengerId, Transported)
{r submission <- as.data.frame(submission) submission$Transported <- str_to_title(submission$Transported)
library(stringr)