library(readr)
Egame <- "Downloads/Video_Games_Sales_as_at_22_Dec_2016.csv"
Egame_sale <- read.csv(Egame, na.strings = c("", "N/A", "tbd"))
View(Egame_sale)
str(Egame_sale)
## 'data.frame': 16719 obs. of 16 variables:
## $ Name : chr "Wii Sports" "Super Mario Bros." "Mario Kart Wii" "Wii Sports Resort" ...
## $ Platform : chr "Wii" "NES" "Wii" "Wii" ...
## $ Year_of_Release: int 2006 1985 2008 2009 1996 1989 2006 2006 2009 1984 ...
## $ Genre : chr "Sports" "Platform" "Racing" "Sports" ...
## $ Publisher : chr "Nintendo" "Nintendo" "Nintendo" "Nintendo" ...
## $ NA_Sales : num 41.4 29.1 15.7 15.6 11.3 ...
## $ EU_Sales : num 28.96 3.58 12.76 10.93 8.89 ...
## $ JP_Sales : num 3.77 6.81 3.79 3.28 10.22 ...
## $ Other_Sales : num 8.45 0.77 3.29 2.95 1 0.58 2.88 2.84 2.24 0.47 ...
## $ Global_Sales : num 82.5 40.2 35.5 32.8 31.4 ...
## $ Critic_Score : int 76 NA 82 80 NA NA 89 58 87 NA ...
## $ Critic_Count : int 51 NA 73 73 NA NA 65 41 80 NA ...
## $ User_Score : num 8 NA 8.3 8 NA NA 8.5 6.6 8.4 NA ...
## $ User_Count : int 322 NA 709 192 NA NA 431 129 594 NA ...
## $ Developer : chr "Nintendo" NA "Nintendo" "Nintendo" ...
## $ Rating : chr "E" NA "E" "E" ...
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Platform_egames <- Egame_sale %>%
filter(Genre == "Platform") %>%
head(7)
Platform_egames
## Name Platform Year_of_Release Genre Publisher
## 1 Super Mario Bros. NES 1985 Platform Nintendo
## 2 New Super Mario Bros. DS 2006 Platform Nintendo
## 3 New Super Mario Bros. Wii Wii 2009 Platform Nintendo
## 4 Super Mario World SNES 1990 Platform Nintendo
## 5 Super Mario Land GB 1989 Platform Nintendo
## 6 Super Mario Bros. 3 NES 1988 Platform Nintendo
## 7 Super Mario 64 N64 1996 Platform Nintendo
## NA_Sales EU_Sales JP_Sales Other_Sales Global_Sales Critic_Score Critic_Count
## 1 29.08 3.58 6.81 0.77 40.24 NA NA
## 2 11.28 9.14 6.50 2.88 29.80 89 65
## 3 14.44 6.94 4.70 2.24 28.32 87 80
## 4 12.78 3.75 3.54 0.55 20.61 NA NA
## 5 10.83 2.71 4.18 0.42 18.14 NA NA
## 6 9.54 3.44 3.84 0.46 17.28 NA NA
## 7 6.91 2.85 1.91 0.23 11.89 NA NA
## User_Score User_Count Developer Rating
## 1 NA NA <NA> <NA>
## 2 8.5 431 Nintendo E
## 3 8.4 594 Nintendo E
## 4 NA NA <NA> <NA>
## 5 NA NA <NA> <NA>
## 6 NA NA <NA> <NA>
## 7 NA NA <NA> <NA>
First_six_games <- Egame_sale %>%
select("Name", "Platform", "Year_of_Release", "Genre") %>%
head(6)
First_six_games
## Name Platform Year_of_Release Genre
## 1 Wii Sports Wii 2006 Sports
## 2 Super Mario Bros. NES 1985 Platform
## 3 Mario Kart Wii Wii 2008 Racing
## 4 Wii Sports Resort Wii 2009 Sports
## 5 Pokemon Red/Pokemon Blue GB 1996 Role-Playing
## 6 Tetris GB 1989 Puzzle
Top_ten <- Egame_sale %>%
arrange(desc(Global_Sales)) %>%
head(10)
Top_ten
## Name Platform Year_of_Release Genre Publisher
## 1 Wii Sports Wii 2006 Sports Nintendo
## 2 Super Mario Bros. NES 1985 Platform Nintendo
## 3 Mario Kart Wii Wii 2008 Racing Nintendo
## 4 Wii Sports Resort Wii 2009 Sports Nintendo
## 5 Pokemon Red/Pokemon Blue GB 1996 Role-Playing Nintendo
## 6 Tetris GB 1989 Puzzle Nintendo
## 7 New Super Mario Bros. DS 2006 Platform Nintendo
## 8 Wii Play Wii 2006 Misc Nintendo
## 9 New Super Mario Bros. Wii Wii 2009 Platform Nintendo
## 10 Duck Hunt NES 1984 Shooter Nintendo
## NA_Sales EU_Sales JP_Sales Other_Sales Global_Sales Critic_Score
## 1 41.36 28.96 3.77 8.45 82.53 76
## 2 29.08 3.58 6.81 0.77 40.24 NA
## 3 15.68 12.76 3.79 3.29 35.52 82
## 4 15.61 10.93 3.28 2.95 32.77 80
## 5 11.27 8.89 10.22 1.00 31.37 NA
## 6 23.20 2.26 4.22 0.58 30.26 NA
## 7 11.28 9.14 6.50 2.88 29.80 89
## 8 13.96 9.18 2.93 2.84 28.92 58
## 9 14.44 6.94 4.70 2.24 28.32 87
## 10 26.93 0.63 0.28 0.47 28.31 NA
## Critic_Count User_Score User_Count Developer Rating
## 1 51 8.0 322 Nintendo E
## 2 NA NA NA <NA> <NA>
## 3 73 8.3 709 Nintendo E
## 4 73 8.0 192 Nintendo E
## 5 NA NA NA <NA> <NA>
## 6 NA NA NA <NA> <NA>
## 7 65 8.5 431 Nintendo E
## 8 41 6.6 129 Nintendo E
## 9 80 8.4 594 Nintendo E
## 10 NA NA NA <NA> <NA>
Egame_sale <- Egame_sale %>%
rename(Release_Yr = Year_of_Release)
names(Egame_sale)
## [1] "Name" "Platform" "Release_Yr" "Genre" "Publisher"
## [6] "NA_Sales" "EU_Sales" "JP_Sales" "Other_Sales" "Global_Sales"
## [11] "Critic_Score" "Critic_Count" "User_Score" "User_Count" "Developer"
## [16] "Rating"
N64_1M <- Egame_sale %>%
filter(Platform == "N64",
NA_Sales > 1) %>%
head(5)
N64_1M
## Name Platform Release_Yr Genre Publisher
## 1 Super Mario 64 N64 1996 Platform Nintendo
## 2 Mario Kart 64 N64 1996 Racing Nintendo
## 3 GoldenEye 007 N64 1997 Shooter Nintendo
## 4 The Legend of Zelda: Ocarina of Time N64 1998 Action Nintendo
## 5 Super Smash Bros. N64 1999 Fighting Nintendo
## NA_Sales EU_Sales JP_Sales Other_Sales Global_Sales Critic_Score Critic_Count
## 1 6.91 2.85 1.91 0.23 11.89 NA NA
## 2 5.55 1.94 2.23 0.15 9.87 NA NA
## 3 5.80 2.01 0.13 0.15 8.09 NA NA
## 4 4.10 1.89 1.45 0.16 7.60 NA NA
## 5 2.95 0.60 1.97 0.04 5.55 NA NA
## User_Score User_Count Developer Rating
## 1 NA NA <NA> <NA>
## 2 NA NA <NA> <NA>
## 3 NA NA <NA> <NA>
## 4 NA NA <NA> <NA>
## 5 NA NA <NA> <NA>
Egame_sale <- Egame_sale %>%
mutate( NA_EU_sales = NA_Sales + EU_Sales) %>%
relocate(NA_EU_sales, .after = EU_Sales)
head(Egame_sale,5)
## Name Platform Release_Yr Genre Publisher NA_Sales
## 1 Wii Sports Wii 2006 Sports Nintendo 41.36
## 2 Super Mario Bros. NES 1985 Platform Nintendo 29.08
## 3 Mario Kart Wii Wii 2008 Racing Nintendo 15.68
## 4 Wii Sports Resort Wii 2009 Sports Nintendo 15.61
## 5 Pokemon Red/Pokemon Blue GB 1996 Role-Playing Nintendo 11.27
## EU_Sales NA_EU_sales JP_Sales Other_Sales Global_Sales Critic_Score
## 1 28.96 70.32 3.77 8.45 82.53 76
## 2 3.58 32.66 6.81 0.77 40.24 NA
## 3 12.76 28.44 3.79 3.29 35.52 82
## 4 10.93 26.54 3.28 2.95 32.77 80
## 5 8.89 20.16 10.22 1.00 31.37 NA
## Critic_Count User_Score User_Count Developer Rating
## 1 51 8.0 322 Nintendo E
## 2 NA NA NA <NA> <NA>
## 3 73 8.3 709 Nintendo E
## 4 73 8.0 192 Nintendo E
## 5 NA NA NA <NA> <NA>
NA_EU_Sales_avg_sd <- Egame_sale %>%
summarize( mean(NA_EU_sales), sd(NA_EU_sales))
round(NA_EU_Sales_avg_sd, digits = 2)
## mean(NA_EU_sales) sd(NA_EU_sales)
## 1 0.41 1.24
Sample_egame <- Egame_sale %>%
sample_n(6, replace = TRUE)
Sample_egame
## Name Platform Release_Yr Genre
## 1 FIFA Soccer 09 PS2 2008 Sports
## 2 Ratchet & Clank: Up Your Arsenal PS2 2004 Platform
## 3 Dakar 2: The World's Ultimate Rally XB 2003 Racing
## 4 Momotarou Dentetsu World DS 2010 Misc
## 5 Blood of Bahamut DS 2009 Role-Playing
## 6 Intelligent Qube PS 1997 Puzzle
## Publisher NA_Sales EU_Sales NA_EU_sales JP_Sales
## 1 Electronic Arts 0.38 0.07 0.45 0.01
## 2 Sony Computer Entertainment 1.31 0.74 2.05 0.31
## 3 Acclaim Entertainment 0.02 0.00 0.02 0.00
## 4 Hudson Soft 0.00 0.00 0.00 0.19
## 5 Square Enix 0.00 0.00 0.00 0.09
## 6 Sony Computer Entertainment 0.13 0.07 0.20 1.00
## Other_Sales Global_Sales Critic_Score Critic_Count User_Score User_Count
## 1 1.82 2.28 82 8 6.9 20
## 2 0.22 2.57 NA NA NA NA
## 3 0.00 0.02 NA NA NA NA
## 4 0.00 0.19 NA NA NA NA
## 5 0.00 0.09 NA NA NA NA
## 6 0.02 1.22 NA NA NA NA
## Developer Rating
## 1 EA Canada E
## 2 <NA> <NA>
## 3 <NA> <NA>
## 4 <NA> <NA>
## 5 <NA> <NA>
## 6 <NA> <NA>
set.seed(1234)
Sample_5percent <- Egame_sale %>%
sample_frac(0.05)
head(Sample_5percent, 4)
## Name Platform Release_Yr Genre
## 1 ESPN Winter X Games: Snowboarding 2002 PS2 2000 Sports
## 2 Happy Cooking DS 2006 Simulation
## 3 NCAA Football 2005 GC 2004 Sports
## 4 Mega Man X: Command Mission PS2 2004 Role-Playing
## Publisher NA_Sales EU_Sales NA_EU_sales JP_Sales
## 1 Konami Digital Entertainment 0.10 0.08 0.18 0
## 2 Ubisoft 0.17 0.00 0.17 0
## 3 Electronic Arts 0.17 0.04 0.21 0
## 4 Capcom 0.09 0.07 0.16 0
## Other_Sales Global_Sales Critic_Score Critic_Count User_Score User_Count
## 1 0.03 0.21 64 14 7.9 8
## 2 0.01 0.18 NA NA NA NA
## 3 0.01 0.22 88 18 9.0 5
## 4 0.02 0.18 69 27 7.1 18
## Developer Rating
## 1 Konami T
## 2 Ubisoft E
## 3 EA Sports E
## 4 Valuewave Co.,Ltd. E
Subset_WII_Nintendo <- Egame_sale %>%
filter(Platform == "Wii", Publisher == "Nintendo", Release_Yr > 2009) %>%
select(Name, Platform, Release_Yr, Genre, Publisher, Global_Sales) %>%
arrange(desc(Global_Sales))
head(Subset_WII_Nintendo, 7)
## Name Platform Release_Yr Genre Publisher
## 1 Wii Party Wii 2010 Misc Nintendo
## 2 Super Mario Galaxy 2 Wii 2010 Platform Nintendo
## 3 Donkey Kong Country Returns Wii 2010 Platform Nintendo
## 4 The Legend of Zelda: Skyward Sword Wii 2011 Action Nintendo
## 5 Mario Party 9 Wii 2012 Misc Nintendo
## 6 Super Mario All-Stars: Limited Edition Wii 2010 Platform Nintendo
## 7 Mario Sports Mix Wii 2010 Sports Nintendo
## Global_Sales
## 1 8.38
## 2 7.51
## 3 6.44
## 4 3.95
## 5 3.13
## 6 2.56
## 7 2.08
print(Subset_WII_Nintendo[26, ])
## Name Platform Release_Yr Genre Publisher Global_Sales
## 26 Fatal Frame 2: Wii Edition Wii 2012 Action Nintendo 0.1
Subset_Sports_RolePlaying <- Egame_sale %>%
filter(Genre == "Sports" | Genre == "Role-Playing")
Stats_Sports_RP <- Subset_Sports_RolePlaying %>%
group_by(Platform) %>%
summarize(
Mean_NA_sale = mean(NA_Sales, na.rm = TRUE),
Median_NA_sale = median(NA_Sales, na.rm = TRUE),
Min_NA_sale = min(NA_Sales, na.rm = TRUE),
Max_NA_sale = max(NA_Sales, na.rm = TRUE)
)
Stats_Sports_RP
## # A tibble: 28 × 5
## Platform Mean_NA_sale Median_NA_sale Min_NA_sale Max_NA_sale
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 2600 0.268 0.22 0.07 0.52
## 2 3DS 0.223 0.005 0 5.28
## 3 DC 0.153 0 0 1.12
## 4 DS 0.177 0.05 0 6.38
## 5 GB 1.08 0 0 11.3
## 6 GBA 0.240 0.06 0 6.06
## 7 GC 0.192 0.1 0 1.48
## 8 GEN 0.45 0 0 1.75
## 9 N64 0.266 0.15 0 1.68
## 10 NES 0.326 0.14 0 1.92
## # ℹ 18 more rows
Stats_Sports_RP_final <- Stats_Sports_RP %>%
arrange(desc(Median_NA_sale)) %>%
select(Platform, Mean_NA_sale, Median_NA_sale, Min_NA_sale, Max_NA_sale)
Stats_Sports_RP_final
## # A tibble: 28 × 5
## Platform Mean_NA_sale Median_NA_sale Min_NA_sale Max_NA_sale
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 2600 0.268 0.22 0.07 0.52
## 2 XOne 0.418 0.22 0 2.51
## 3 X360 0.462 0.185 0 5.05
## 4 WiiU 0.171 0.17 0 0.37
## 5 N64 0.266 0.15 0 1.68
## 6 XB 0.264 0.15 0 2.09
## 7 NES 0.326 0.14 0 1.92
## 8 Wii 0.526 0.14 0 41.4
## 9 PS3 0.273 0.12 0 2.55
## 10 GC 0.192 0.1 0 1.48
## # ℹ 18 more rows
Part 2
library(tidyr)
library(readr)
ExamScores <- read_csv("Downloads/ExamScores.csv")
## Rows: 24 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Exam
## dbl (2): Student, Score
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(ExamScores)
Examtidy <- ExamScores %>%
pivot_wider(
names_from = "Exam",
values_from = "Score"
)
Examtidy
## # A tibble: 8 × 4
## Student `Exam 1` `Exam 2` `Exam 3`
## <dbl> <dbl> <dbl> <dbl>
## 1 1 90 91 85
## 2 2 79 80 62
## 3 3 98 92 96
## 4 4 50 60 75
## 5 5 79 83 68
## 6 6 72 75 77
## 7 7 92 93 94
## 8 8 99 77 84
dim(Examtidy)
## [1] 8 4
library(tidyr)
library(dplyr)
library(readr)
BusinessSalaries <- read_csv("Downloads/BusinessSalaries.csv")
## Rows: 75 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (2): Fairfield, Rival
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(BusinessSalaries)
Money_Out_of_College <- BusinessSalaries %>%
pivot_longer(
cols = 1:2,
names_to = "University",
values_to = "Salary",
values_drop_na = TRUE
)
Money_Out_of_College
## # A tibble: 125 × 2
## University Salary
## <chr> <dbl>
## 1 Fairfield 104000
## 2 Rival 67700
## 3 Fairfield 92900
## 4 Rival 82400
## 5 Fairfield 92200
## 6 Rival 98400
## 7 Fairfield 109500
## 8 Rival 90600
## 9 Fairfield 92200
## 10 Rival 106000
## # ℹ 115 more rows
dim(Money_Out_of_College)
## [1] 125 2