DATA 607 – Project 2
HAZAL GUNDUZ
The goal of this assignment is to give you practice in preparing different datasets for downstream analysis work. Your task is to:
library(knitr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.1.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(tidyr)
library(dplyr)
library(stringr)
library(ggplot2)
Dataset 1
In dataset 1, I used the original file from kaggle which folder contains the data behind the story article “Should Travelers Avoid Flying Airplanes That Have Had Crashes in the Past?” I have compared the number of incidents, fatal incidents, and fatalities.
airline_safety <- read.csv("airline-safety.csv", header = FALSE, stringsAsFactors = FALSE)
head(airline_safety)
## V1 V2 V3
## 1 airline avail_seat_km_per_week incidents_85_99
## 2 Aer Lingus 320906734 2
## 3 Aeroflot* 1197672318 76
## 4 Aerolineas Argentinas 385803648 6
## 5 Aeromexico* 596871813 3
## 6 Air Canada 1865253802 2
## V4 V5 V6 V7
## 1 fatal_accidents_85_99 fatalities_85_99 incidents_00_14 fatal_accidents_00_14
## 2 0 0 0 0
## 3 14 128 6 1
## 4 0 0 1 0
## 5 1 64 5 0
## 6 0 0 2 0
## V8
## 1 fatalities_00_14
## 2 0
## 3 88
## 4 0
## 5 0
## 6 0
summary(airline_safety)
## V1 V2 V3 V4
## Length:57 Length:57 Length:57 Length:57
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## V5 V6 V7 V8
## Length:57 Length:57 Length:57 Length:57
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
Data Cleaning
Rename row header
names(airline_safety) <- airline_safety[1,]
head(airline_safety)
## airline avail_seat_km_per_week incidents_85_99
## 1 airline avail_seat_km_per_week incidents_85_99
## 2 Aer Lingus 320906734 2
## 3 Aeroflot* 1197672318 76
## 4 Aerolineas Argentinas 385803648 6
## 5 Aeromexico* 596871813 3
## 6 Air Canada 1865253802 2
## fatal_accidents_85_99 fatalities_85_99 incidents_00_14 fatal_accidents_00_14
## 1 fatal_accidents_85_99 fatalities_85_99 incidents_00_14 fatal_accidents_00_14
## 2 0 0 0 0
## 3 14 128 6 1
## 4 0 0 1 0
## 5 1 64 5 0
## 6 0 0 2 0
## fatalities_00_14
## 1 fatalities_00_14
## 2 0
## 3 88
## 4 0
## 5 0
## 6 0
Remove row first
airline_safety <- airline_safety[-c(1),]
head(airline_safety)
## airline avail_seat_km_per_week incidents_85_99
## 2 Aer Lingus 320906734 2
## 3 Aeroflot* 1197672318 76
## 4 Aerolineas Argentinas 385803648 6
## 5 Aeromexico* 596871813 3
## 6 Air Canada 1865253802 2
## 7 Air France 3004002661 14
## fatal_accidents_85_99 fatalities_85_99 incidents_00_14 fatal_accidents_00_14
## 2 0 0 0 0
## 3 14 128 6 1
## 4 0 0 1 0
## 5 1 64 5 0
## 6 0 0 2 0
## 7 4 79 6 2
## fatalities_00_14
## 2 0
## 3 88
## 4 0
## 5 0
## 6 0
## 7 337
Converting from chr to numeric
airline_safety$avail_seat_km_per_week <- as.numeric(as.character(airline_safety$avail_seat_km_per_week))
airline_safety$incidents_85_99<-as.numeric(as.character(airline_safety$incidents_85_99))
airline_safety$fatal_accidents_85_99<-as.numeric(as.character(airline_safety$fatal_accidents_85_99))
airline_safety$fatalities_85_99<-as.numeric(as.character(airline_safety$fatalities_85_99))
airline_safety$incidents_00_14<-as.numeric(as.character(airline_safety$incidents_00_14))
airline_safety$fatal_accidents_00_14<-as.numeric(as.character(airline_safety$fatal_accidents_00_14))
airline_safety$fatalities_00_14<-as.numeric(as.character(airline_safety$fatalities_00_14))
head(airline_safety)
## airline avail_seat_km_per_week incidents_85_99
## 2 Aer Lingus 320906734 2
## 3 Aeroflot* 1197672318 76
## 4 Aerolineas Argentinas 385803648 6
## 5 Aeromexico* 596871813 3
## 6 Air Canada 1865253802 2
## 7 Air France 3004002661 14
## fatal_accidents_85_99 fatalities_85_99 incidents_00_14 fatal_accidents_00_14
## 2 0 0 0 0
## 3 14 128 6 1
## 4 0 0 1 0
## 5 1 64 5 0
## 6 0 0 2 0
## 7 4 79 6 2
## fatalities_00_14
## 2 0
## 3 88
## 4 0
## 5 0
## 6 0
## 7 337
To creating total tab that adds the total number of incidents, fatal accidents, fatalities in 1985 to 1999;
airline_safety <- airline_safety %>%
mutate(total_85_99 = incidents_85_99 + fatal_accidents_85_99 + fatalities_85_99, total_00_14 = incidents_00_14 + fatal_accidents_00_14 + fatalities_00_14)
head(airline_safety)
## airline avail_seat_km_per_week incidents_85_99
## 2 Aer Lingus 320906734 2
## 3 Aeroflot* 1197672318 76
## 4 Aerolineas Argentinas 385803648 6
## 5 Aeromexico* 596871813 3
## 6 Air Canada 1865253802 2
## 7 Air France 3004002661 14
## fatal_accidents_85_99 fatalities_85_99 incidents_00_14 fatal_accidents_00_14
## 2 0 0 0 0
## 3 14 128 6 1
## 4 0 0 1 0
## 5 1 64 5 0
## 6 0 0 2 0
## 7 4 79 6 2
## fatalities_00_14 total_85_99 total_00_14
## 2 0 2 0
## 3 88 218 95
## 4 0 6 1
## 5 0 68 5
## 6 0 2 2
## 7 337 97 345
To creating new dataset that focused only on the airline and its totals from 85-99 and 00-14.
airline_total<-select(airline_safety, airline, total_85_99, total_00_14)
head(airline_total)
## airline total_85_99 total_00_14
## 2 Aer Lingus 2 0
## 3 Aeroflot* 218 95
## 4 Aerolineas Argentinas 6 1
## 5 Aeromexico* 68 5
## 6 Air Canada 2 2
## 7 Air France 97 345
summary(airline_total)
## airline total_85_99 total_00_14
## Length:56 Min. : 0.00 Min. : 0.0
## Class :character 1st Qu.: 4.75 1st Qu.: 1.0
## Mode :character Median : 55.00 Median : 5.0
## Mean :121.77 Mean : 60.3
## 3rd Qu.:221.50 3rd Qu.: 88.0
## Max. :553.00 Max. :542.0
I’ve used the spread function, for air_total to display the Airline numbers in a wide dataset.
airline_wide85 <-spread(airline_total, airline, total_85_99)
head(airline_wide85)
## total_00_14 Aer Lingus Aeroflot* Aerolineas Argentinas Aeromexico* Air Canada
## 1 0 2 NA NA NA NA
## 2 1 NA NA 6 NA NA
## 3 2 NA NA NA NA 2
## 4 3 NA NA NA NA NA
## 5 4 NA NA NA NA NA
## 6 5 NA NA NA 68 NA
## Air France Air India* Air New Zealand* Alaska Airlines* Alitalia
## 1 NA NA NA NA NA
## 2 NA NA NA NA NA
## 3 NA NA NA NA NA
## 4 NA NA NA NA NA
## 5 NA NA NA NA 59
## 6 NA NA NA NA NA
## All Nippon Airways American* Austrian Airlines Avianca British Airways*
## 1 NA NA NA 331 NA
## 2 NA NA 1 NA NA
## 3 NA NA NA NA NA
## 4 NA NA NA NA NA
## 5 NA NA NA NA NA
## 6 NA NA NA NA NA
## Cathay Pacific* China Airlines Condor COPA Delta / Northwest* Egyptair El Al
## 1 NA NA 19 51 NA NA NA
## 2 NA NA NA NA NA NA 6
## 3 0 NA NA NA NA NA NA
## 4 NA NA NA NA NA NA NA
## 5 NA NA NA NA NA NA NA
## 6 NA NA NA NA NA NA NA
## Ethiopian Airlines Finnair Garuda Indonesia Gulf Air Hawaiian Airlines Iberia
## 1 NA 1 NA NA NA NA
## 2 NA NA NA NA 0 NA
## 3 NA NA NA NA NA NA
## 4 NA NA NA NA NA NA
## 5 NA NA NA NA NA NA
## 6 NA NA NA NA NA 153
## Japan Airlines Kenya Airways KLM* Korean Air LAN Airlines Lufthansa*
## 1 524 NA NA NA 26 NA
## 2 NA NA 11 442 NA NA
## 3 NA NA NA NA NA NA
## 4 NA NA NA NA NA 9
## 5 NA NA NA NA NA NA
## 6 NA NA NA NA NA NA
## Malaysia Airlines Pakistan International Philippine Airlines Qantas*
## 1 NA NA NA NA
## 2 NA NA NA NA
## 3 NA NA NA NA
## 4 NA NA NA NA
## 5 NA NA 85 NA
## 6 NA NA NA 1
## Royal Air Maroc SAS* Saudi Arabian Singapore Airlines South African
## 1 NA NA NA NA NA
## 2 NA NA NA NA 162
## 3 NA NA NA NA NA
## 4 59 NA NA NA NA
## 5 NA NA NA NA NA
## 6 NA NA NA NA NA
## Southwest Airlines Sri Lankan / AirLanka SWISS* TACA TAM TAP - Air Portugal
## 1 NA NA NA NA NA 0
## 2 NA NA NA NA NA NA
## 3 NA NA NA NA NA NA
## 4 NA NA 232 NA NA NA
## 5 NA 17 NA NA NA NA
## 6 NA NA NA 7 NA NA
## Thai Airways Turkish Airlines United / Continental*
## 1 NA NA NA
## 2 NA NA NA
## 3 NA NA NA
## 4 NA NA NA
## 5 320 NA NA
## 6 NA NA NA
## US Airways / America West* Vietnam Airlines Virgin Atlantic Xiamen Airlines
## 1 NA NA 1 NA
## 2 NA 181 NA NA
## 3 NA NA NA 92
## 4 NA NA NA NA
## 5 NA NA NA NA
## 6 NA NA NA NA
summary(airline_wide85)
## total_00_14 Aer Lingus Aeroflot* Aerolineas Argentinas Aeromexico*
## Min. : 0.00 Min. :2 Min. :218 Min. :6 Min. :68
## 1st Qu.: 7.25 1st Qu.:2 1st Qu.:218 1st Qu.:6 1st Qu.:68
## Median : 67.50 Median :2 Median :218 Median :6 Median :68
## Mean :107.97 Mean :2 Mean :218 Mean :6 Mean :68
## 3rd Qu.:141.50 3rd Qu.:2 3rd Qu.:218 3rd Qu.:6 3rd Qu.:68
## Max. :542.00 Max. :2 Max. :218 Max. :6 Max. :68
## NA's :29 NA's :29 NA's :29 NA's :29
## Air Canada Air France Air India* Air New Zealand* Alaska Airlines*
## Min. :2 Min. :97 Min. :332 Min. :3 Min. :5
## 1st Qu.:2 1st Qu.:97 1st Qu.:332 1st Qu.:3 1st Qu.:5
## Median :2 Median :97 Median :332 Median :3 Median :5
## Mean :2 Mean :97 Mean :332 Mean :3 Mean :5
## 3rd Qu.:2 3rd Qu.:97 3rd Qu.:332 3rd Qu.:3 3rd Qu.:5
## Max. :2 Max. :97 Max. :332 Max. :3 Max. :5
## NA's :29 NA's :29 NA's :29 NA's :29 NA's :29
## Alitalia All Nippon Airways American* Austrian Airlines Avianca
## Min. :59 Min. :5 Min. :127 Min. :1 Min. :331
## 1st Qu.:59 1st Qu.:5 1st Qu.:127 1st Qu.:1 1st Qu.:331
## Median :59 Median :5 Median :127 Median :1 Median :331
## Mean :59 Mean :5 Mean :127 Mean :1 Mean :331
## 3rd Qu.:59 3rd Qu.:5 3rd Qu.:127 3rd Qu.:1 3rd Qu.:331
## Max. :59 Max. :5 Max. :127 Max. :1 Max. :331
## NA's :29 NA's :29 NA's :29 NA's :29 NA's :29
## British Airways* Cathay Pacific* China Airlines Condor COPA
## Min. :4 Min. :0 Min. :553 Min. :19 Min. :51
## 1st Qu.:4 1st Qu.:0 1st Qu.:553 1st Qu.:19 1st Qu.:51
## Median :4 Median :0 Median :553 Median :19 Median :51
## Mean :4 Mean :0 Mean :553 Mean :19 Mean :51
## 3rd Qu.:4 3rd Qu.:0 3rd Qu.:553 3rd Qu.:19 3rd Qu.:51
## Max. :4 Max. :0 Max. :553 Max. :19 Max. :51
## NA's :29 NA's :29 NA's :29 NA's :29 NA's :29
## Delta / Northwest* Egyptair El Al Ethiopian Airlines Finnair
## Min. :443 Min. :293 Min. :6 Min. :197 Min. :1
## 1st Qu.:443 1st Qu.:293 1st Qu.:6 1st Qu.:197 1st Qu.:1
## Median :443 Median :293 Median :6 Median :197 Median :1
## Mean :443 Mean :293 Mean :6 Mean :197 Mean :1
## 3rd Qu.:443 3rd Qu.:293 3rd Qu.:6 3rd Qu.:197 3rd Qu.:1
## Max. :443 Max. :293 Max. :6 Max. :197 Max. :1
## NA's :29 NA's :29 NA's :29 NA's :29 NA's :29
## Garuda Indonesia Gulf Air Hawaiian Airlines Iberia Japan Airlines
## Min. :273 Min. :1 Min. :0 Min. :153 Min. :524
## 1st Qu.:273 1st Qu.:1 1st Qu.:0 1st Qu.:153 1st Qu.:524
## Median :273 Median :1 Median :0 Median :153 Median :524
## Mean :273 Mean :1 Mean :0 Mean :153 Mean :524
## 3rd Qu.:273 3rd Qu.:1 3rd Qu.:0 3rd Qu.:153 3rd Qu.:524
## Max. :273 Max. :1 Max. :0 Max. :153 Max. :524
## NA's :29 NA's :29 NA's :29 NA's :29 NA's :29
## Kenya Airways KLM* Korean Air LAN Airlines Lufthansa*
## Min. :2 Min. :11 Min. :442 Min. :26 Min. :9
## 1st Qu.:2 1st Qu.:11 1st Qu.:442 1st Qu.:26 1st Qu.:9
## Median :2 Median :11 Median :442 Median :26 Median :9
## Mean :2 Mean :11 Mean :442 Mean :26 Mean :9
## 3rd Qu.:2 3rd Qu.:11 3rd Qu.:442 3rd Qu.:26 3rd Qu.:9
## Max. :2 Max. :11 Max. :442 Max. :26 Max. :9
## NA's :29 NA's :29 NA's :29 NA's :29 NA's :29
## Malaysia Airlines Pakistan International Philippine Airlines Qantas*
## Min. :38 Min. :245 Min. :85 Min. :1
## 1st Qu.:38 1st Qu.:245 1st Qu.:85 1st Qu.:1
## Median :38 Median :245 Median :85 Median :1
## Mean :38 Mean :245 Mean :85 Mean :1
## 3rd Qu.:38 3rd Qu.:245 3rd Qu.:85 3rd Qu.:1
## Max. :38 Max. :245 Max. :85 Max. :1
## NA's :29 NA's :29 NA's :29 NA's :29
## Royal Air Maroc SAS* Saudi Arabian Singapore Airlines South African
## Min. :59 Min. :5 Min. :322 Min. :10 Min. :162
## 1st Qu.:59 1st Qu.:5 1st Qu.:322 1st Qu.:10 1st Qu.:162
## Median :59 Median :5 Median :322 Median :10 Median :162
## Mean :59 Mean :5 Mean :322 Mean :10 Mean :162
## 3rd Qu.:59 3rd Qu.:5 3rd Qu.:322 3rd Qu.:10 3rd Qu.:162
## Max. :59 Max. :5 Max. :322 Max. :10 Max. :162
## NA's :29 NA's :29 NA's :29 NA's :29 NA's :29
## Southwest Airlines Sri Lankan / AirLanka SWISS* TACA
## Min. :1 Min. :17 Min. :232 Min. :7
## 1st Qu.:1 1st Qu.:17 1st Qu.:232 1st Qu.:7
## Median :1 Median :17 Median :232 Median :7
## Mean :1 Mean :17 Mean :232 Mean :7
## 3rd Qu.:1 3rd Qu.:17 3rd Qu.:232 3rd Qu.:7
## Max. :1 Max. :17 Max. :232 Max. :7
## NA's :29 NA's :29 NA's :29 NA's :29
## TAM TAP - Air Portugal Thai Airways Turkish Airlines
## Min. :109 Min. :0 Min. :320 Min. :75
## 1st Qu.:109 1st Qu.:0 1st Qu.:320 1st Qu.:75
## Median :109 Median :0 Median :320 Median :75
## Mean :109 Mean :0 Mean :320 Mean :75
## 3rd Qu.:109 3rd Qu.:0 3rd Qu.:320 3rd Qu.:75
## Max. :109 Max. :0 Max. :320 Max. :75
## NA's :29 NA's :29 NA's :29 NA's :29
## United / Continental* US Airways / America West* Vietnam Airlines
## Min. :346 Min. :247 Min. :181
## 1st Qu.:346 1st Qu.:247 1st Qu.:181
## Median :346 Median :247 Median :181
## Mean :346 Mean :247 Mean :181
## 3rd Qu.:346 3rd Qu.:247 3rd Qu.:181
## Max. :346 Max. :247 Max. :181
## NA's :29 NA's :29 NA's :29
## Virgin Atlantic Xiamen Airlines
## Min. :1 Min. :92
## 1st Qu.:1 1st Qu.:92
## Median :1 Median :92
## Mean :1 Mean :92
## 3rd Qu.:1 3rd Qu.:92
## Max. :1 Max. :92
## NA's :29 NA's :29
airline_wide00 <-spread(airline_total, airline, total_00_14)
head(airline_wide00)
## total_85_99 Aer Lingus Aeroflot* Aerolineas Argentinas Aeromexico* Air Canada
## 1 0 NA NA NA NA NA
## 2 1 NA NA NA NA NA
## 3 2 0 NA NA NA 2
## 4 3 NA NA NA NA NA
## 5 4 NA NA NA NA NA
## 6 5 NA NA NA NA NA
## Air France Air India* Air New Zealand* Alaska Airlines* Alitalia
## 1 NA NA NA NA NA
## 2 NA NA NA NA NA
## 3 NA NA NA NA NA
## 4 NA NA 13 NA NA
## 5 NA NA NA NA NA
## 6 NA NA NA 94 NA
## All Nippon Airways American* Austrian Airlines Avianca British Airways*
## 1 NA NA NA NA NA
## 2 NA NA 1 NA NA
## 3 NA NA NA NA NA
## 4 NA NA NA NA NA
## 5 NA NA NA NA 6
## 6 7 NA NA NA NA
## Cathay Pacific* China Airlines Condor COPA Delta / Northwest* Egyptair El Al
## 1 2 NA NA NA NA NA NA
## 2 NA NA NA NA NA NA NA
## 3 NA NA NA NA NA NA NA
## 4 NA NA NA NA NA NA NA
## 5 NA NA NA NA NA NA NA
## 6 NA NA NA NA NA NA NA
## Ethiopian Airlines Finnair Garuda Indonesia Gulf Air Hawaiian Airlines Iberia
## 1 NA NA NA NA 1 NA
## 2 NA 0 NA 147 NA NA
## 3 NA NA NA NA NA NA
## 4 NA NA NA NA NA NA
## 5 NA NA NA NA NA NA
## 6 NA NA NA NA NA NA
## Japan Airlines Kenya Airways KLM* Korean Air LAN Airlines Lufthansa*
## 1 NA NA NA NA NA NA
## 2 NA NA NA NA NA NA
## 3 NA 287 NA NA NA NA
## 4 NA NA NA NA NA NA
## 5 NA NA NA NA NA NA
## 6 NA NA NA NA NA NA
## Malaysia Airlines Pakistan International Philippine Airlines Qantas*
## 1 NA NA NA NA
## 2 NA NA NA 5
## 3 NA NA NA NA
## 4 NA NA NA NA
## 5 NA NA NA NA
## 6 NA NA NA NA
## Royal Air Maroc SAS* Saudi Arabian Singapore Airlines South African
## 1 NA NA NA NA NA
## 2 NA NA NA NA NA
## 3 NA NA NA NA NA
## 4 NA NA NA NA NA
## 5 NA NA NA NA NA
## 6 NA 117 NA NA NA
## Southwest Airlines Sri Lankan / AirLanka SWISS* TACA TAM TAP - Air Portugal
## 1 NA NA NA NA NA 0
## 2 8 NA NA NA NA NA
## 3 NA NA NA NA NA NA
## 4 NA NA NA NA NA NA
## 5 NA NA NA NA NA NA
## 6 NA NA NA NA NA NA
## Thai Airways Turkish Airlines United / Continental*
## 1 NA NA NA
## 2 NA NA NA
## 3 NA NA NA
## 4 NA NA NA
## 5 NA NA NA
## 6 NA NA NA
## US Airways / America West* Vietnam Airlines Virgin Atlantic Xiamen Airlines
## 1 NA NA NA NA
## 2 NA NA 0 NA
## 3 NA NA NA NA
## 4 NA NA NA NA
## 5 NA NA NA NA
## 6 NA NA NA NA
summary(airline_wide00)
## total_85_99 Aer Lingus Aeroflot* Aerolineas Argentinas Aeromexico*
## Min. : 0.0 Min. :0 Min. :95 Min. :1 Min. :5
## 1st Qu.: 14.0 1st Qu.:0 1st Qu.:95 1st Qu.:1 1st Qu.:5
## Median : 97.0 Median :0 Median :95 Median :1 Median :5
## Mean :156.6 Mean :0 Mean :95 Mean :1 Mean :5
## 3rd Qu.:260.0 3rd Qu.:0 3rd Qu.:95 3rd Qu.:1 3rd Qu.:5
## Max. :553.0 Max. :0 Max. :95 Max. :1 Max. :5
## NA's :42 NA's :42 NA's :42 NA's :42
## Air Canada Air France Air India* Air New Zealand* Alaska Airlines*
## Min. :2 Min. :345 Min. :163 Min. :13 Min. :94
## 1st Qu.:2 1st Qu.:345 1st Qu.:163 1st Qu.:13 1st Qu.:94
## Median :2 Median :345 Median :163 Median :13 Median :94
## Mean :2 Mean :345 Mean :163 Mean :13 Mean :94
## 3rd Qu.:2 3rd Qu.:345 3rd Qu.:163 3rd Qu.:13 3rd Qu.:94
## Max. :2 Max. :345 Max. :163 Max. :13 Max. :94
## NA's :42 NA's :42 NA's :42 NA's :42 NA's :42
## Alitalia All Nippon Airways American* Austrian Airlines Avianca
## Min. :4 Min. :7 Min. :436 Min. :1 Min. :0
## 1st Qu.:4 1st Qu.:7 1st Qu.:436 1st Qu.:1 1st Qu.:0
## Median :4 Median :7 Median :436 Median :1 Median :0
## Mean :4 Mean :7 Mean :436 Mean :1 Mean :0
## 3rd Qu.:4 3rd Qu.:7 3rd Qu.:436 3rd Qu.:1 3rd Qu.:0
## Max. :4 Max. :7 Max. :436 Max. :1 Max. :0
## NA's :42 NA's :42 NA's :42 NA's :42 NA's :42
## British Airways* Cathay Pacific* China Airlines Condor COPA
## Min. :6 Min. :2 Min. :228 Min. :0 Min. :0
## 1st Qu.:6 1st Qu.:2 1st Qu.:228 1st Qu.:0 1st Qu.:0
## Median :6 Median :2 Median :228 Median :0 Median :0
## Mean :6 Mean :2 Mean :228 Mean :0 Mean :0
## 3rd Qu.:6 3rd Qu.:2 3rd Qu.:228 3rd Qu.:0 3rd Qu.:0
## Max. :6 Max. :2 Max. :228 Max. :0 Max. :0
## NA's :42 NA's :42 NA's :42 NA's :42 NA's :42
## Delta / Northwest* Egyptair El Al Ethiopian Airlines Finnair
## Min. :77 Min. :19 Min. :1 Min. :99 Min. :0
## 1st Qu.:77 1st Qu.:19 1st Qu.:1 1st Qu.:99 1st Qu.:0
## Median :77 Median :19 Median :1 Median :99 Median :0
## Mean :77 Mean :19 Mean :1 Mean :99 Mean :0
## 3rd Qu.:77 3rd Qu.:19 3rd Qu.:1 3rd Qu.:99 3rd Qu.:0
## Max. :77 Max. :19 Max. :1 Max. :99 Max. :0
## NA's :42 NA's :42 NA's :42 NA's :42 NA's :42
## Garuda Indonesia Gulf Air Hawaiian Airlines Iberia Japan Airlines
## Min. :28 Min. :147 Min. :1 Min. :5 Min. :0
## 1st Qu.:28 1st Qu.:147 1st Qu.:1 1st Qu.:5 1st Qu.:0
## Median :28 Median :147 Median :1 Median :5 Median :0
## Mean :28 Mean :147 Mean :1 Mean :5 Mean :0
## 3rd Qu.:28 3rd Qu.:147 3rd Qu.:1 3rd Qu.:5 3rd Qu.:0
## Max. :28 Max. :147 Max. :1 Max. :5 Max. :0
## NA's :42 NA's :42 NA's :42 NA's :42 NA's :42
## Kenya Airways KLM* Korean Air LAN Airlines Lufthansa*
## Min. :287 Min. :1 Min. :1 Min. :0 Min. :3
## 1st Qu.:287 1st Qu.:1 1st Qu.:1 1st Qu.:0 1st Qu.:3
## Median :287 Median :1 Median :1 Median :0 Median :3
## Mean :287 Mean :1 Mean :1 Mean :0 Mean :3
## 3rd Qu.:287 3rd Qu.:1 3rd Qu.:1 3rd Qu.:0 3rd Qu.:3
## Max. :287 Max. :1 Max. :1 Max. :0 Max. :3
## NA's :42 NA's :42 NA's :42 NA's :42 NA's :42
## Malaysia Airlines Pakistan International Philippine Airlines Qantas*
## Min. :542 Min. :58 Min. :4 Min. :5
## 1st Qu.:542 1st Qu.:58 1st Qu.:4 1st Qu.:5
## Median :542 Median :58 Median :4 Median :5
## Mean :542 Mean :58 Mean :4 Mean :5
## 3rd Qu.:542 3rd Qu.:58 3rd Qu.:4 3rd Qu.:5
## Max. :542 Max. :58 Max. :4 Max. :5
## NA's :42 NA's :42 NA's :42 NA's :42
## Royal Air Maroc SAS* Saudi Arabian Singapore Airlines South African
## Min. :3 Min. :117 Min. :11 Min. :86 Min. :1
## 1st Qu.:3 1st Qu.:117 1st Qu.:11 1st Qu.:86 1st Qu.:1
## Median :3 Median :117 Median :11 Median :86 Median :1
## Mean :3 Mean :117 Mean :11 Mean :86 Mean :1
## 3rd Qu.:3 3rd Qu.:117 3rd Qu.:11 3rd Qu.:86 3rd Qu.:1
## Max. :3 Max. :117 Max. :11 Max. :86 Max. :1
## NA's :42 NA's :42 NA's :42 NA's :42 NA's :42
## Southwest Airlines Sri Lankan / AirLanka SWISS* TACA
## Min. :8 Min. :4 Min. :3 Min. :5
## 1st Qu.:8 1st Qu.:4 1st Qu.:3 1st Qu.:5
## Median :8 Median :4 Median :3 Median :5
## Mean :8 Mean :4 Mean :3 Mean :5
## 3rd Qu.:8 3rd Qu.:4 3rd Qu.:3 3rd Qu.:5
## Max. :8 Max. :4 Max. :3 Max. :5
## NA's :42 NA's :42 NA's :42 NA's :42
## TAM TAP - Air Portugal Thai Airways Turkish Airlines
## Min. :197 Min. :0 Min. :4 Min. :94
## 1st Qu.:197 1st Qu.:0 1st Qu.:4 1st Qu.:94
## Median :197 Median :0 Median :4 Median :94
## Mean :197 Mean :0 Mean :4 Mean :94
## 3rd Qu.:197 3rd Qu.:0 3rd Qu.:4 3rd Qu.:94
## Max. :197 Max. :0 Max. :4 Max. :94
## NA's :42 NA's :42 NA's :42 NA's :42
## United / Continental* US Airways / America West* Vietnam Airlines
## Min. :125 Min. :36 Min. :1
## 1st Qu.:125 1st Qu.:36 1st Qu.:1
## Median :125 Median :36 Median :1
## Mean :125 Mean :36 Mean :1
## 3rd Qu.:125 3rd Qu.:36 3rd Qu.:1
## Max. :125 Max. :36 Max. :1
## NA's :42 NA's :42 NA's :42
## Virgin Atlantic Xiamen Airlines
## Min. :0 Min. :2
## 1st Qu.:0 1st Qu.:2
## Median :0 Median :2
## Mean :0 Mean :2
## 3rd Qu.:0 3rd Qu.:2
## Max. :0 Max. :2
## NA's :42 NA's :42
To list who experienced the most accidents, fatal accidents, deaths between 1985 and 1999.
airline_max85<-airline_total[order(airline_total$total_85_99, decreasing = TRUE),]
head(airline_max85)
## airline total_85_99 total_00_14
## 18 China Airlines 553 228
## 30 Japan Airlines 524 0
## 21 Delta / Northwest* 443 77
## 33 Korean Air 442 1
## 53 United / Continental* 346 125
## 8 Air India* 332 163
summary(airline_max85)
## airline total_85_99 total_00_14
## Length:56 Min. : 0.00 Min. : 0.0
## Class :character 1st Qu.: 4.75 1st Qu.: 1.0
## Mode :character Median : 55.00 Median : 5.0
## Mean :121.77 Mean : 60.3
## 3rd Qu.:221.50 3rd Qu.: 88.0
## Max. :553.00 Max. :542.0
According to the data, China Airlines has the highest number of incidents and fatalities from 1985 to 1999.
ggplot(airline_total, aes(x = "", y = total_85_99, fill = airline)) +
geom_bar(stat="identity",position="dodge") +
xlab("Airlines") + ylab("Incidents, Fatal Incidents, & Fatalities 1985 to 1999")
ggplot(airline_total, aes(x = "", y = total_00_14, fill = airline)) +
geom_bar(stat = "identity", position = "dodge") +
xlab("Airlines") + ylab("Incidents, Fatal Incidents, & Fatalities 2000 to 2015")
Dataset 2
students<-read.csv("StudentsPerformance.csv", header=TRUE, stringsAsFactors=FALSE)
head(students)
## gender race.ethnicity parental.level.of.education lunch
## 1 female group B bachelor's degree standard
## 2 female group C some college standard
## 3 female group B master's degree standard
## 4 male group A associate's degree free/reduced
## 5 male group C some college standard
## 6 female group B associate's degree standard
## test.preparation.course math.score reading.score writing.score
## 1 none 72 72 74
## 2 completed 69 90 88
## 3 none 90 95 93
## 4 none 47 57 44
## 5 none 76 78 75
## 6 none 71 83 78
summary(students)
## gender race.ethnicity parental.level.of.education
## Length:1000 Length:1000 Length:1000
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## lunch test.preparation.course math.score reading.score
## Length:1000 Length:1000 Min. : 0.00 Min. : 17.00
## Class :character Class :character 1st Qu.: 57.00 1st Qu.: 59.00
## Mode :character Mode :character Median : 66.00 Median : 70.00
## Mean : 66.09 Mean : 69.17
## 3rd Qu.: 77.00 3rd Qu.: 79.00
## Max. :100.00 Max. :100.00
## writing.score
## Min. : 10.00
## 1st Qu.: 57.75
## Median : 69.00
## Mean : 68.05
## 3rd Qu.: 79.00
## Max. :100.00
Data Cleaning
Rename row header
names(students)[1] <- "Gender"
names(students)[2] <- "Race.Ethnicity"
names(students)[3] <- "Parental.LOE"
names(students)[4] <- "Lunch"
names(students)[5]<- "Test.Prep"
names(students)[6] <- "Math"
names(students)[7] <- "Reading"
names(students)[8] <- "Writing"
head(students)
## Gender Race.Ethnicity Parental.LOE Lunch Test.Prep Math Reading
## 1 female group B bachelor's degree standard none 72 72
## 2 female group C some college standard completed 69 90
## 3 female group B master's degree standard none 90 95
## 4 male group A associate's degree free/reduced none 47 57
## 5 male group C some college standard none 76 78
## 6 female group B associate's degree standard none 71 83
## Writing
## 1 74
## 2 88
## 3 93
## 4 44
## 5 75
## 6 78
students_wide<-spread(students, Parental.LOE, Test.Prep)
head(students_wide)
## Gender Race.Ethnicity Lunch Math Reading Writing associate's degree
## 1 female group A free/reduced 34 48 41 <NA>
## 2 female group A free/reduced 37 57 56 none
## 3 female group A free/reduced 38 43 43 <NA>
## 4 female group A free/reduced 41 51 48 none
## 5 female group A free/reduced 44 45 45 <NA>
## 6 female group A free/reduced 44 64 58 <NA>
## bachelor's degree high school master's degree some college some high school
## 1 <NA> completed <NA> <NA> <NA>
## 2 <NA> <NA> <NA> <NA> <NA>
## 3 <NA> <NA> <NA> <NA> none
## 4 <NA> <NA> <NA> <NA> <NA>
## 5 <NA> <NA> <NA> <NA> none
## 6 <NA> <NA> <NA> <NA> none
To list the Math, Reading and Writing scores;
students_M <- students[order(students$Math, decreasing = TRUE),]
head(students_M)
## Gender Race.Ethnicity Parental.LOE Lunch Test.Prep Math
## 150 male group E associate's degree free/reduced completed 100
## 452 female group E some college standard none 100
## 459 female group E bachelor's degree standard none 100
## 624 male group A some college standard completed 100
## 626 male group D some college standard completed 100
## 917 male group E bachelor's degree standard completed 100
## Reading Writing
## 150 100 93
## 452 92 97
## 459 100 100
## 624 96 86
## 626 97 99
## 917 100 100
students_R<-students[order(students$Reading, decreasing = TRUE),]
head(students_R)
## Gender Race.Ethnicity Parental.LOE Lunch Test.Prep Math
## 107 female group D master's degree standard none 87
## 115 female group E bachelor's degree standard completed 99
## 150 male group E associate's degree free/reduced completed 100
## 166 female group C bachelor's degree standard completed 96
## 180 female group D some high school standard completed 97
## 382 male group C associate's degree standard completed 87
## Reading Writing
## 107 100 100
## 115 100 100
## 150 100 93
## 166 100 100
## 180 100 100
## 382 100 95
#Writing;
students_W<-students[order(students$Writing, decreasing = TRUE),]
head(students_W)
## Gender Race.Ethnicity Parental.LOE Lunch Test.Prep Math Reading
## 107 female group D master's degree standard none 87 100
## 115 female group E bachelor's degree standard completed 99 100
## 166 female group C bachelor's degree standard completed 96 100
## 180 female group D some high school standard completed 97 100
## 378 female group D master's degree free/reduced completed 85 95
## 404 female group D high school standard completed 88 99
## Writing
## 107 100
## 115 100
## 166 100
## 180 100
## 378 100
## 404 100
To average the three subjects, I’ll create new column and add it to the data frame.
students <- students %>%
mutate(MRW.avg = (Math + Reading + Writing) / 3)
head(students)
## Gender Race.Ethnicity Parental.LOE Lunch Test.Prep Math Reading
## 1 female group B bachelor's degree standard none 72 72
## 2 female group C some college standard completed 69 90
## 3 female group B master's degree standard none 90 95
## 4 male group A associate's degree free/reduced none 47 57
## 5 male group C some college standard none 76 78
## 6 female group B associate's degree standard none 71 83
## Writing MRW.avg
## 1 74 72.66667
## 2 88 82.33333
## 3 93 92.66667
## 4 44 49.33333
## 5 75 76.33333
## 6 78 77.33333
students_avg <- select(students, Gender, Parental.LOE, Math, Reading, Writing, MRW.avg)
head(students_avg)
## Gender Parental.LOE Math Reading Writing MRW.avg
## 1 female bachelor's degree 72 72 74 72.66667
## 2 female some college 69 90 88 82.33333
## 3 female master's degree 90 95 93 92.66667
## 4 male associate's degree 47 57 44 49.33333
## 5 male some college 76 78 75 76.33333
## 6 female associate's degree 71 83 78 77.33333
students_avg <- students_avg[order(students_avg$MRW.avg, decreasing = TRUE),]
head(students_avg)
## Gender Parental.LOE Math Reading Writing MRW.avg
## 459 female bachelor's degree 100 100 100 100.00000
## 917 male bachelor's degree 100 100 100 100.00000
## 963 female associate's degree 100 100 100 100.00000
## 115 female bachelor's degree 99 100 100 99.66667
## 180 female some high school 97 100 100 99.00000
## 713 female some college 98 100 99 99.00000
ggplot(students_avg, aes(x = "", y = MRW.avg, fill = Parental.LOE)) +
geom_bar(stat = "identity",position = "dodge") +
xlab("Parental Level of Education") + ylab("Average Scores")
There is correlation between student’s performance and parental education.
Dataset 3
movies <- read.csv("movies_metadata.csv", header = TRUE, stringsAsFactors = FALSE)
head(movies)
## adult
## 1 False
## 2 False
## 3 False
## 4 False
## 5 False
## 6 False
## belongs_to_collection
## 1 {'id': 10194, 'name': 'Toy Story Collection', 'poster_path': '/7G9915LfUQ2lVfwMEEhDsn3kT4B.jpg', 'backdrop_path': '/9FBwqcd9IRruEDUrTdcaafOMKUq.jpg'}
## 2
## 3 {'id': 119050, 'name': 'Grumpy Old Men Collection', 'poster_path': '/nLvUdqgPgm3F85NMCii9gVFUcet.jpg', 'backdrop_path': '/hypTnLot2z8wpFS7qwsQHW1uV8u.jpg'}
## 4
## 5 {'id': 96871, 'name': 'Father of the Bride Collection', 'poster_path': '/nts4iOmNnq7GNicycMJ9pSAn204.jpg', 'backdrop_path': '/7qwE57OVZmMJChBpLEbJEmzUydk.jpg'}
## 6
## budget
## 1 30000000
## 2 65000000
## 3 0
## 4 16000000
## 5 0
## 6 60000000
## genres
## 1 [{'id': 16, 'name': 'Animation'}, {'id': 35, 'name': 'Comedy'}, {'id': 10751, 'name': 'Family'}]
## 2 [{'id': 12, 'name': 'Adventure'}, {'id': 14, 'name': 'Fantasy'}, {'id': 10751, 'name': 'Family'}]
## 3 [{'id': 10749, 'name': 'Romance'}, {'id': 35, 'name': 'Comedy'}]
## 4 [{'id': 35, 'name': 'Comedy'}, {'id': 18, 'name': 'Drama'}, {'id': 10749, 'name': 'Romance'}]
## 5 [{'id': 35, 'name': 'Comedy'}]
## 6 [{'id': 28, 'name': 'Action'}, {'id': 80, 'name': 'Crime'}, {'id': 18, 'name': 'Drama'}, {'id': 53, 'name': 'Thriller'}]
## homepage id imdb_id original_language
## 1 http://toystory.disney.com/toy-story 862 tt0114709 en
## 2 8844 tt0113497 en
## 3 15602 tt0113228 en
## 4 31357 tt0114885 en
## 5 11862 tt0113041 en
## 6 949 tt0113277 en
## original_title
## 1 Toy Story
## 2 Jumanji
## 3 Grumpier Old Men
## 4 Waiting to Exhale
## 5 Father of the Bride Part II
## 6 Heat
## overview
## 1 Led by Woody, Andy's toys live happily in his room until Andy's birthday brings Buzz Lightyear onto the scene. Afraid of losing his place in Andy's heart, Woody plots against Buzz. But when circumstances separate Buzz and Woody from their owner, the duo eventually learns to put aside their differences.
## 2 When siblings Judy and Peter discover an enchanted board game that opens the door to a magical world, they unwittingly invite Alan -- an adult who's been trapped inside the game for 26 years -- into their living room. Alan's only hope for freedom is to finish the game, which proves risky as all three find themselves running from giant rhinoceroses, evil monkeys and other terrifying creatures.
## 3 A family wedding reignites the ancient feud between next-door neighbors and fishing buddies John and Max. Meanwhile, a sultry Italian divorcée opens a restaurant at the local bait shop, alarming the locals who worry she'll scare the fish away. But she's less interested in seafood than she is in cooking up a hot time with Max.
## 4 Cheated on, mistreated and stepped on, the women are holding their breath, waiting for the elusive "good man" to break a string of less-than-stellar lovers. Friends and confidants Vannah, Bernie, Glo and Robin talk it all out, determined to find a better way to breathe.
## 5 Just when George Banks has recovered from his daughter's wedding, he receives the news that she's pregnant ... and that George's wife, Nina, is expecting too. He was planning on selling their home, but that's a plan that -- like George -- will have to change with the arrival of both a grandchild and a kid of his own.
## 6 Obsessive master thief, Neil McCauley leads a top-notch crew on various insane heists throughout Los Angeles while a mentally unstable detective, Vincent Hanna pursues him without rest. Each man recognizes and respects the ability and the dedication of the other even though they are aware their cat-and-mouse game may end in violence.
## popularity poster_path
## 1 21.946943 /rhIRbceoE9lR4veEXuwCC2wARtG.jpg
## 2 17.015539 /vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg
## 3 11.7129 /6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg
## 4 3.859495 /16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg
## 5 8.387519 /e64sOI48hQXyru7naBFyssKFxVd.jpg
## 6 17.924927 /zMyfPUelumio3tiDKPffaUpsQTD.jpg
## production_companies
## 1 [{'name': 'Pixar Animation Studios', 'id': 3}]
## 2 [{'name': 'TriStar Pictures', 'id': 559}, {'name': 'Teitler Film', 'id': 2550}, {'name': 'Interscope Communications', 'id': 10201}]
## 3 [{'name': 'Warner Bros.', 'id': 6194}, {'name': 'Lancaster Gate', 'id': 19464}]
## 4 [{'name': 'Twentieth Century Fox Film Corporation', 'id': 306}]
## 5 [{'name': 'Sandollar Productions', 'id': 5842}, {'name': 'Touchstone Pictures', 'id': 9195}]
## 6 [{'name': 'Regency Enterprises', 'id': 508}, {'name': 'Forward Pass', 'id': 675}, {'name': 'Warner Bros.', 'id': 6194}]
## production_countries release_date
## 1 [{'iso_3166_1': 'US', 'name': 'United States of America'}] 1995-10-30
## 2 [{'iso_3166_1': 'US', 'name': 'United States of America'}] 1995-12-15
## 3 [{'iso_3166_1': 'US', 'name': 'United States of America'}] 1995-12-22
## 4 [{'iso_3166_1': 'US', 'name': 'United States of America'}] 1995-12-22
## 5 [{'iso_3166_1': 'US', 'name': 'United States of America'}] 1995-02-10
## 6 [{'iso_3166_1': 'US', 'name': 'United States of America'}] 1995-12-15
## revenue runtime
## 1 373554033 81
## 2 262797249 104
## 3 0 101
## 4 81452156 127
## 5 76578911 106
## 6 187436818 170
## spoken_languages
## 1 [{'iso_639_1': 'en', 'name': 'English'}]
## 2 [{'iso_639_1': 'en', 'name': 'English'}, {'iso_639_1': 'fr', 'name': 'Français'}]
## 3 [{'iso_639_1': 'en', 'name': 'English'}]
## 4 [{'iso_639_1': 'en', 'name': 'English'}]
## 5 [{'iso_639_1': 'en', 'name': 'English'}]
## 6 [{'iso_639_1': 'en', 'name': 'English'}, {'iso_639_1': 'es', 'name': 'Español'}]
## status
## 1 Released
## 2 Released
## 3 Released
## 4 Released
## 5 Released
## 6 Released
## tagline
## 1
## 2 Roll the dice and unleash the excitement!
## 3 Still Yelling. Still Fighting. Still Ready for Love.
## 4 Friends are the people who let you be yourself... and never let you forget it.
## 5 Just When His World Is Back To Normal... He's In For The Surprise Of His Life!
## 6 A Los Angeles Crime Saga
## title video vote_average vote_count
## 1 Toy Story False 7.7 5415
## 2 Jumanji False 6.9 2413
## 3 Grumpier Old Men False 6.5 92
## 4 Waiting to Exhale False 6.1 34
## 5 Father of the Bride Part II False 5.7 173
## 6 Heat False 7.7 1886
nrow(movies)
## [1] 45466
names(movies)
## [1] "adult" "belongs_to_collection" "budget"
## [4] "genres" "homepage" "id"
## [7] "imdb_id" "original_language" "original_title"
## [10] "overview" "popularity" "poster_path"
## [13] "production_companies" "production_countries" "release_date"
## [16] "revenue" "runtime" "spoken_languages"
## [19] "status" "tagline" "title"
## [22] "video" "vote_average" "vote_count"
summary(movies)
## adult belongs_to_collection budget genres
## Length:45466 Length:45466 Length:45466 Length:45466
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## homepage id imdb_id original_language
## Length:45466 Length:45466 Length:45466 Length:45466
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## original_title overview popularity poster_path
## Length:45466 Length:45466 Length:45466 Length:45466
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## production_companies production_countries release_date
## Length:45466 Length:45466 Length:45466
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## revenue runtime spoken_languages status
## Min. :0.000e+00 Min. : 0.00 Length:45466 Length:45466
## 1st Qu.:0.000e+00 1st Qu.: 85.00 Class :character Class :character
## Median :0.000e+00 Median : 95.00 Mode :character Mode :character
## Mean :1.121e+07 Mean : 94.13
## 3rd Qu.:0.000e+00 3rd Qu.: 107.00
## Max. :2.788e+09 Max. :1256.00
## NA's :6 NA's :263
## tagline title video vote_average
## Length:45466 Length:45466 Length:45466 Min. : 0.000
## Class :character Class :character Class :character 1st Qu.: 5.000
## Mode :character Mode :character Mode :character Median : 6.000
## Mean : 5.618
## 3rd Qu.: 6.800
## Max. :10.000
## NA's :6
## vote_count
## Min. : 0.0
## 1st Qu.: 3.0
## Median : 10.0
## Mean : 109.9
## 3rd Qu.: 34.0
## Max. :14075.0
## NA's :6
Data Cleaning
Rename row header
names(movies)[1] <- "Adult"
names(movies)[2] <- "Belongs To Collection"
names(movies)[3] <- "Budget"
names(movies)[4] <- "Genre"
names(movies)[5]<- "Homepage"
names(movies)[6] <- "Id"
names(movies)[7] <- "Imdb Id"
names(movies)[8] <- "Original Language"
names(movies)[9] <- "Original Title"
names(movies)[10] <- "Overview"
head(movies)
## Adult
## 1 False
## 2 False
## 3 False
## 4 False
## 5 False
## 6 False
## Belongs To Collection
## 1 {'id': 10194, 'name': 'Toy Story Collection', 'poster_path': '/7G9915LfUQ2lVfwMEEhDsn3kT4B.jpg', 'backdrop_path': '/9FBwqcd9IRruEDUrTdcaafOMKUq.jpg'}
## 2
## 3 {'id': 119050, 'name': 'Grumpy Old Men Collection', 'poster_path': '/nLvUdqgPgm3F85NMCii9gVFUcet.jpg', 'backdrop_path': '/hypTnLot2z8wpFS7qwsQHW1uV8u.jpg'}
## 4
## 5 {'id': 96871, 'name': 'Father of the Bride Collection', 'poster_path': '/nts4iOmNnq7GNicycMJ9pSAn204.jpg', 'backdrop_path': '/7qwE57OVZmMJChBpLEbJEmzUydk.jpg'}
## 6
## Budget
## 1 30000000
## 2 65000000
## 3 0
## 4 16000000
## 5 0
## 6 60000000
## Genre
## 1 [{'id': 16, 'name': 'Animation'}, {'id': 35, 'name': 'Comedy'}, {'id': 10751, 'name': 'Family'}]
## 2 [{'id': 12, 'name': 'Adventure'}, {'id': 14, 'name': 'Fantasy'}, {'id': 10751, 'name': 'Family'}]
## 3 [{'id': 10749, 'name': 'Romance'}, {'id': 35, 'name': 'Comedy'}]
## 4 [{'id': 35, 'name': 'Comedy'}, {'id': 18, 'name': 'Drama'}, {'id': 10749, 'name': 'Romance'}]
## 5 [{'id': 35, 'name': 'Comedy'}]
## 6 [{'id': 28, 'name': 'Action'}, {'id': 80, 'name': 'Crime'}, {'id': 18, 'name': 'Drama'}, {'id': 53, 'name': 'Thriller'}]
## Homepage Id Imdb Id Original Language
## 1 http://toystory.disney.com/toy-story 862 tt0114709 en
## 2 8844 tt0113497 en
## 3 15602 tt0113228 en
## 4 31357 tt0114885 en
## 5 11862 tt0113041 en
## 6 949 tt0113277 en
## Original Title
## 1 Toy Story
## 2 Jumanji
## 3 Grumpier Old Men
## 4 Waiting to Exhale
## 5 Father of the Bride Part II
## 6 Heat
## Overview
## 1 Led by Woody, Andy's toys live happily in his room until Andy's birthday brings Buzz Lightyear onto the scene. Afraid of losing his place in Andy's heart, Woody plots against Buzz. But when circumstances separate Buzz and Woody from their owner, the duo eventually learns to put aside their differences.
## 2 When siblings Judy and Peter discover an enchanted board game that opens the door to a magical world, they unwittingly invite Alan -- an adult who's been trapped inside the game for 26 years -- into their living room. Alan's only hope for freedom is to finish the game, which proves risky as all three find themselves running from giant rhinoceroses, evil monkeys and other terrifying creatures.
## 3 A family wedding reignites the ancient feud between next-door neighbors and fishing buddies John and Max. Meanwhile, a sultry Italian divorcée opens a restaurant at the local bait shop, alarming the locals who worry she'll scare the fish away. But she's less interested in seafood than she is in cooking up a hot time with Max.
## 4 Cheated on, mistreated and stepped on, the women are holding their breath, waiting for the elusive "good man" to break a string of less-than-stellar lovers. Friends and confidants Vannah, Bernie, Glo and Robin talk it all out, determined to find a better way to breathe.
## 5 Just when George Banks has recovered from his daughter's wedding, he receives the news that she's pregnant ... and that George's wife, Nina, is expecting too. He was planning on selling their home, but that's a plan that -- like George -- will have to change with the arrival of both a grandchild and a kid of his own.
## 6 Obsessive master thief, Neil McCauley leads a top-notch crew on various insane heists throughout Los Angeles while a mentally unstable detective, Vincent Hanna pursues him without rest. Each man recognizes and respects the ability and the dedication of the other even though they are aware their cat-and-mouse game may end in violence.
## popularity poster_path
## 1 21.946943 /rhIRbceoE9lR4veEXuwCC2wARtG.jpg
## 2 17.015539 /vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg
## 3 11.7129 /6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg
## 4 3.859495 /16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg
## 5 8.387519 /e64sOI48hQXyru7naBFyssKFxVd.jpg
## 6 17.924927 /zMyfPUelumio3tiDKPffaUpsQTD.jpg
## production_companies
## 1 [{'name': 'Pixar Animation Studios', 'id': 3}]
## 2 [{'name': 'TriStar Pictures', 'id': 559}, {'name': 'Teitler Film', 'id': 2550}, {'name': 'Interscope Communications', 'id': 10201}]
## 3 [{'name': 'Warner Bros.', 'id': 6194}, {'name': 'Lancaster Gate', 'id': 19464}]
## 4 [{'name': 'Twentieth Century Fox Film Corporation', 'id': 306}]
## 5 [{'name': 'Sandollar Productions', 'id': 5842}, {'name': 'Touchstone Pictures', 'id': 9195}]
## 6 [{'name': 'Regency Enterprises', 'id': 508}, {'name': 'Forward Pass', 'id': 675}, {'name': 'Warner Bros.', 'id': 6194}]
## production_countries release_date
## 1 [{'iso_3166_1': 'US', 'name': 'United States of America'}] 1995-10-30
## 2 [{'iso_3166_1': 'US', 'name': 'United States of America'}] 1995-12-15
## 3 [{'iso_3166_1': 'US', 'name': 'United States of America'}] 1995-12-22
## 4 [{'iso_3166_1': 'US', 'name': 'United States of America'}] 1995-12-22
## 5 [{'iso_3166_1': 'US', 'name': 'United States of America'}] 1995-02-10
## 6 [{'iso_3166_1': 'US', 'name': 'United States of America'}] 1995-12-15
## revenue runtime
## 1 373554033 81
## 2 262797249 104
## 3 0 101
## 4 81452156 127
## 5 76578911 106
## 6 187436818 170
## spoken_languages
## 1 [{'iso_639_1': 'en', 'name': 'English'}]
## 2 [{'iso_639_1': 'en', 'name': 'English'}, {'iso_639_1': 'fr', 'name': 'Français'}]
## 3 [{'iso_639_1': 'en', 'name': 'English'}]
## 4 [{'iso_639_1': 'en', 'name': 'English'}]
## 5 [{'iso_639_1': 'en', 'name': 'English'}]
## 6 [{'iso_639_1': 'en', 'name': 'English'}, {'iso_639_1': 'es', 'name': 'Español'}]
## status
## 1 Released
## 2 Released
## 3 Released
## 4 Released
## 5 Released
## 6 Released
## tagline
## 1
## 2 Roll the dice and unleash the excitement!
## 3 Still Yelling. Still Fighting. Still Ready for Love.
## 4 Friends are the people who let you be yourself... and never let you forget it.
## 5 Just When His World Is Back To Normal... He's In For The Surprise Of His Life!
## 6 A Los Angeles Crime Saga
## title video vote_average vote_count
## 1 Toy Story False 7.7 5415
## 2 Jumanji False 6.9 2413
## 3 Grumpier Old Men False 6.5 92
## 4 Waiting to Exhale False 6.1 34
## 5 Father of the Bride Part II False 5.7 173
## 6 Heat False 7.7 1886
Remove row of two
movies <- movies[-c(1, 2),]
head(movies)
## Adult
## 3 False
## 4 False
## 5 False
## 6 False
## 7 False
## 8 False
## Belongs To Collection
## 3 {'id': 119050, 'name': 'Grumpy Old Men Collection', 'poster_path': '/nLvUdqgPgm3F85NMCii9gVFUcet.jpg', 'backdrop_path': '/hypTnLot2z8wpFS7qwsQHW1uV8u.jpg'}
## 4
## 5 {'id': 96871, 'name': 'Father of the Bride Collection', 'poster_path': '/nts4iOmNnq7GNicycMJ9pSAn204.jpg', 'backdrop_path': '/7qwE57OVZmMJChBpLEbJEmzUydk.jpg'}
## 6
## 7
## 8
## Budget
## 3 0
## 4 16000000
## 5 0
## 6 60000000
## 7 58000000
## 8 0
## Genre
## 3 [{'id': 10749, 'name': 'Romance'}, {'id': 35, 'name': 'Comedy'}]
## 4 [{'id': 35, 'name': 'Comedy'}, {'id': 18, 'name': 'Drama'}, {'id': 10749, 'name': 'Romance'}]
## 5 [{'id': 35, 'name': 'Comedy'}]
## 6 [{'id': 28, 'name': 'Action'}, {'id': 80, 'name': 'Crime'}, {'id': 18, 'name': 'Drama'}, {'id': 53, 'name': 'Thriller'}]
## 7 [{'id': 35, 'name': 'Comedy'}, {'id': 10749, 'name': 'Romance'}]
## 8 [{'id': 28, 'name': 'Action'}, {'id': 12, 'name': 'Adventure'}, {'id': 18, 'name': 'Drama'}, {'id': 10751, 'name': 'Family'}]
## Homepage Id Imdb Id Original Language Original Title
## 3 15602 tt0113228 en Grumpier Old Men
## 4 31357 tt0114885 en Waiting to Exhale
## 5 11862 tt0113041 en Father of the Bride Part II
## 6 949 tt0113277 en Heat
## 7 11860 tt0114319 en Sabrina
## 8 45325 tt0112302 en Tom and Huck
## Overview
## 3 A family wedding reignites the ancient feud between next-door neighbors and fishing buddies John and Max. Meanwhile, a sultry Italian divorcée opens a restaurant at the local bait shop, alarming the locals who worry she'll scare the fish away. But she's less interested in seafood than she is in cooking up a hot time with Max.
## 4 Cheated on, mistreated and stepped on, the women are holding their breath, waiting for the elusive "good man" to break a string of less-than-stellar lovers. Friends and confidants Vannah, Bernie, Glo and Robin talk it all out, determined to find a better way to breathe.
## 5 Just when George Banks has recovered from his daughter's wedding, he receives the news that she's pregnant ... and that George's wife, Nina, is expecting too. He was planning on selling their home, but that's a plan that -- like George -- will have to change with the arrival of both a grandchild and a kid of his own.
## 6 Obsessive master thief, Neil McCauley leads a top-notch crew on various insane heists throughout Los Angeles while a mentally unstable detective, Vincent Hanna pursues him without rest. Each man recognizes and respects the ability and the dedication of the other even though they are aware their cat-and-mouse game may end in violence.
## 7 An ugly duckling having undergone a remarkable change, still harbors feelings for her crush: a carefree playboy, but not before his business-focused brother has something to say about it.
## 8 A mischievous young boy, Tom Sawyer, witnesses a murder by the deadly Injun Joe. Tom becomes friends with Huckleberry Finn, a boy with no future and no family. Tom has to choose between honoring a friendship or honoring an oath because the town alcoholic is accused of the murder. Tom and Huck go through several adventures trying to retrieve evidence.
## popularity poster_path
## 3 11.7129 /6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg
## 4 3.859495 /16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg
## 5 8.387519 /e64sOI48hQXyru7naBFyssKFxVd.jpg
## 6 17.924927 /zMyfPUelumio3tiDKPffaUpsQTD.jpg
## 7 6.677277 /jQh15y5YB7bWz1NtffNZmRw0s9D.jpg
## 8 2.561161 /sGO5Qa55p7wTu7FJcX4H4xIVKvS.jpg
## production_companies
## 3 [{'name': 'Warner Bros.', 'id': 6194}, {'name': 'Lancaster Gate', 'id': 19464}]
## 4 [{'name': 'Twentieth Century Fox Film Corporation', 'id': 306}]
## 5 [{'name': 'Sandollar Productions', 'id': 5842}, {'name': 'Touchstone Pictures', 'id': 9195}]
## 6 [{'name': 'Regency Enterprises', 'id': 508}, {'name': 'Forward Pass', 'id': 675}, {'name': 'Warner Bros.', 'id': 6194}]
## 7 [{'name': 'Paramount Pictures', 'id': 4}, {'name': 'Scott Rudin Productions', 'id': 258}, {'name': 'Mirage Enterprises', 'id': 932}, {'name': 'Sandollar Productions', 'id': 5842}, {'name': 'Constellation Entertainment', 'id': 14941}, {'name': 'Worldwide', 'id': 55873}, {'name': 'Mont Blanc Entertainment GmbH', 'id': 58079}]
## 8 [{'name': 'Walt Disney Pictures', 'id': 2}]
## production_countries
## 3 [{'iso_3166_1': 'US', 'name': 'United States of America'}]
## 4 [{'iso_3166_1': 'US', 'name': 'United States of America'}]
## 5 [{'iso_3166_1': 'US', 'name': 'United States of America'}]
## 6 [{'iso_3166_1': 'US', 'name': 'United States of America'}]
## 7 [{'iso_3166_1': 'DE', 'name': 'Germany'}, {'iso_3166_1': 'US', 'name': 'United States of America'}]
## 8 [{'iso_3166_1': 'US', 'name': 'United States of America'}]
## release_date revenue runtime
## 3 1995-12-22 0 101
## 4 1995-12-22 81452156 127
## 5 1995-02-10 76578911 106
## 6 1995-12-15 187436818 170
## 7 1995-12-15 0 127
## 8 1995-12-22 0 97
## spoken_languages
## 3 [{'iso_639_1': 'en', 'name': 'English'}]
## 4 [{'iso_639_1': 'en', 'name': 'English'}]
## 5 [{'iso_639_1': 'en', 'name': 'English'}]
## 6 [{'iso_639_1': 'en', 'name': 'English'}, {'iso_639_1': 'es', 'name': 'Español'}]
## 7 [{'iso_639_1': 'fr', 'name': 'Français'}, {'iso_639_1': 'en', 'name': 'English'}]
## 8 [{'iso_639_1': 'en', 'name': 'English'}, {'iso_639_1': 'de', 'name': 'Deutsch'}]
## status
## 3 Released
## 4 Released
## 5 Released
## 6 Released
## 7 Released
## 8 Released
## tagline
## 3 Still Yelling. Still Fighting. Still Ready for Love.
## 4 Friends are the people who let you be yourself... and never let you forget it.
## 5 Just When His World Is Back To Normal... He's In For The Surprise Of His Life!
## 6 A Los Angeles Crime Saga
## 7 You are cordially invited to the most surprising merger of the year.
## 8 The Original Bad Boys.
## title video vote_average vote_count
## 3 Grumpier Old Men False 6.5 92
## 4 Waiting to Exhale False 6.1 34
## 5 Father of the Bride Part II False 5.7 173
## 6 Heat False 7.7 1886
## 7 Sabrina False 6.2 141
## 8 Tom and Huck False 5.4 45
Github => https://github.com/Gunduzhazal/project-2
Rpubs => https://rpubs.com/gunduzhazal/839070