##1.1 For our analysis, the subjects are not the cricketers themselves, but each batting innings they participated in. In order to make the data tidy ### a. Each subject needs its own row. Rearrange the data into a long format so that there is a row for each batter in each innings. Your new tibble should have 270 rows ### Loading the file ash.csv from computer
setwd("C:\\Users\\hoaan\\OneDrive\\Data Science\\Data taming, visualisation")
ashes = read.csv("ashes.csv", header = T)
colnames(ashes)
## [1] "batter" "team" "role"
## [4] "Test.1..Innings.1" "Test.1..Innings.2" "Test.2..Innings.1"
## [7] "Test.2..Innings.2" "Test.3..Innings.1" "Test.3..Innings.2"
## [10] "Test.4..Innings.1" "Test.4..Innings.2" "Test.5..Innings.1"
## [13] "Test.5..Innings.2"
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
## Warning: package 'stringr' was built under R version 4.4.3
ashes <- ashes %>%
rename_with(~str_replace_all(., "Test\\.(\\d+)\\.\\.Innings\\.(\\d+)",
"test\\1_innings\\2"))
colnames (ashes)
## [1] "batter" "team" "role" "test1_innings1"
## [5] "test1_innings2" "test2_innings1" "test2_innings2" "test3_innings1"
## [9] "test3_innings2" "test4_innings1" "test4_innings2" "test5_innings1"
## [13] "test5_innings2"
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'tidyr' was built under R version 4.4.2
## Warning: package 'readr' was built under R version 4.4.2
## Warning: package 'purrr' was built under R version 4.4.2
## Warning: package 'forcats' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
ashes_long <- gather (ashes, key = "test_innings", value = "performance",
test1_innings1:test5_innings2)
dim (ashes_long)
## [1] 270 5
for each of the following for each player innings: • the player’s batting number, • their score, and • the number of balls they faced.
install.packages("dplyr")
## Warning: package 'dplyr' is in use and will not be installed
install.packages("stringr")
## Warning: package 'stringr' is in use and will not be installed
library(dplyr)
library(stringr)
ashes_long <- ashes_long %>%
mutate (
extracted = str_match(performance, "Batting at number ([0-9NA]+), scored ([0-9NA]+) runs from ([0-9NA]+) balls") [,2:4],
player_batting_number = extracted [,1],
scored = extracted [,2],
number_of_balls_faced = extracted [,3]
) %>%
select (-extracted, -performance)
ashes_long
## batter team role test_innings player_batting_number
## 1 Ali England allrounder test1_innings1 6
## 2 Anderson English bowl test1_innings1 11
## 3 Bairstow England wicketkeeper test1_innings1 7
## 4 Ball England bowl test1_innings1 10
## 5 Bancroft Australia bat test1_innings1 1
## 6 Bird Australia bowl test1_innings1 <NA>
## 7 Broad England bowler test1_innings1 9
## 8 Cook England bat test1_innings1 1
## 9 Crane England bowl test1_innings1 <NA>
## 10 Cummins Australia bowl test1_innings1 9
## 11 Curran England bowl test1_innings1 <NA>
## 12 Handscomb Australia batting test1_innings1 5
## 13 Hazlewood Australia bowl test1_innings1 10
## 14 Khawaja Australia batsman test1_innings1 3
## 15 Lyon Australia bowl test1_innings1 11
## 16 Malan England bat test1_innings1 5
## 17 MMarsh Australia all rounder test1_innings1 <NA>
## 18 Overton England bowl test1_innings1 <NA>
## 19 Paine Australia wicketkeeper test1_innings1 7
## 20 Root England bat test1_innings1 4
## 21 SMarsh Australia bat test1_innings1 6
## 22 Smith Australia bat test1_innings1 4
## 23 Starc Australia bowl test1_innings1 8
## 24 Stoneman England bat test1_innings1 2
## 25 Vince England bat test1_innings1 3
## 26 Warner Australia bat test1_innings1 2
## 27 Woakes England all-rounder test1_innings1 8
## 28 Ali England allrounder test1_innings2 6
## 29 Anderson English bowl test1_innings2 11
## 30 Bairstow England wicketkeeper test1_innings2 7
## 31 Ball England bowl test1_innings2 10
## 32 Bancroft Australia bat test1_innings2 1
## 33 Bird Australia bowl test1_innings2 <NA>
## 34 Broad England bowler test1_innings2 9
## 35 Cook England bat test1_innings2 1
## 36 Crane England bowl test1_innings2 <NA>
## 37 Cummins Australia bowl test1_innings2 <NA>
## 38 Curran England bowl test1_innings2 <NA>
## 39 Handscomb Australia batting test1_innings2 <NA>
## 40 Hazlewood Australia bowl test1_innings2 <NA>
## 41 Khawaja Australia batsman test1_innings2 <NA>
## 42 Lyon Australia bowl test1_innings2 <NA>
## 43 Malan England bat test1_innings2 5
## 44 MMarsh Australia all rounder test1_innings2 <NA>
## 45 Overton England bowl test1_innings2 <NA>
## 46 Paine Australia wicketkeeper test1_innings2 <NA>
## 47 Root England bat test1_innings2 4
## 48 SMarsh Australia bat test1_innings2 <NA>
## 49 Smith Australia bat test1_innings2 <NA>
## 50 Starc Australia bowl test1_innings2 <NA>
## 51 Stoneman England bat test1_innings2 2
## 52 Vince England bat test1_innings2 3
## 53 Warner Australia bat test1_innings2 2
## 54 Woakes England all-rounder test1_innings2 8
## 55 Ali England allrounder test2_innings1 6
## 56 Anderson English bowl test2_innings1 11
## 57 Bairstow England wicketkeeper test2_innings1 7
## 58 Ball England bowl test2_innings1 <NA>
## 59 Bancroft Australia bat test2_innings1 1
## 60 Bird Australia bowl test2_innings1 <NA>
## 61 Broad England bowler test2_innings1 10
## 62 Cook England bat test2_innings1 1
## 63 Crane England bowl test2_innings1 <NA>
## 64 Cummins Australia bowl test2_innings1 9
## 65 Curran England bowl test2_innings1 <NA>
## 66 Handscomb Australia batting test2_innings1 5
## 67 Hazlewood Australia bowl test2_innings1 <NA>
## 68 Khawaja Australia batsman test2_innings1 3
## 69 Lyon Australia bowl test2_innings1 10
## 70 Malan England bat test2_innings1 5
## 71 MMarsh Australia all rounder test2_innings1 <NA>
## 72 Overton England bowl test2_innings1 9
## 73 Paine Australia wicketkeeper test2_innings1 7
## 74 Root England bat test2_innings1 4
## 75 SMarsh Australia bat test2_innings1 6
## 76 Smith Australia bat test2_innings1 4
## 77 Starc Australia bowl test2_innings1 8
## 78 Stoneman England bat test2_innings1 2
## 79 Vince England bat test2_innings1 3
## 80 Warner Australia bat test2_innings1 2
## 81 Woakes England all-rounder test2_innings1 8
## 82 Ali England allrounder test2_innings2 7
## 83 Anderson English bowl test2_innings2 11
## 84 Bairstow England wicketkeeper test2_innings2 8
## 85 Ball England bowl test2_innings2 <NA>
## 86 Bancroft Australia bat test2_innings2 1
## 87 Bird Australia bowl test2_innings2 <NA>
## 88 Broad England bowler test2_innings2 10
## 89 Cook England bat test2_innings2 1
## 90 Crane England bowl test2_innings2 <NA>
## 91 Cummins Australia bowl test2_innings2 10
## 92 Curran England bowl test2_innings2 <NA>
## 93 Handscomb Australia batting test2_innings2 5
## 94 Hazlewood Australia bowl test2_innings2 11
## 95 Khawaja Australia batsman test2_innings2 3
## 96 Lyon Australia bowl test2_innings2 6
## 97 Malan England bat test2_innings2 5
## 98 MMarsh Australia all rounder test2_innings2 <NA>
## 99 Overton England bowl test2_innings2 9
## 100 Paine Australia wicketkeeper test2_innings2 8
## 101 Root England bat test2_innings2 4
## 102 SMarsh Australia bat test2_innings2 7
## 103 Smith Australia bat test2_innings2 4
## 104 Starc Australia bowl test2_innings2 9
## 105 Stoneman England bat test2_innings2 2
## 106 Vince England bat test2_innings2 3
## 107 Warner Australia bat test2_innings2 2
## 108 Woakes England all-rounder test2_innings2 6
## 109 Ali England allrounder test3_innings1 7
## 110 Anderson English bowl test3_innings1 11
## 111 Bairstow England wicketkeeper test3_innings1 6
## 112 Ball England bowl test3_innings1 <NA>
## 113 Bancroft Australia bat test3_innings1 1
## 114 Bird Australia bowl test3_innings1 <NA>
## 115 Broad England bowler test3_innings1 10
## 116 Cook England bat test3_innings1 1
## 117 Crane England bowl test3_innings1 <NA>
## 118 Cummins Australia bowl test3_innings1 9
## 119 Curran England bowl test3_innings1 <NA>
## 120 Handscomb Australia batting test3_innings1 <NA>
## 121 Hazlewood Australia bowl test3_innings1 <NA>
## 122 Khawaja Australia batsman test3_innings1 3
## 123 Lyon Australia bowl test3_innings1 10
## 124 Malan England bat test3_innings1 5
## 125 MMarsh Australia all rounder test3_innings1 6
## 126 Overton England bowl test3_innings1 9
## 127 Paine Australia wicketkeeper test3_innings1 7
## 128 Root England bat test3_innings1 4
## 129 SMarsh Australia bat test3_innings1 5
## 130 Smith Australia bat test3_innings1 4
## 131 Starc Australia bowl test3_innings1 8
## 132 Stoneman England bat test3_innings1 2
## 133 Vince England bat test3_innings1 3
## 134 Warner Australia bat test3_innings1 2
## 135 Woakes England all-rounder test3_innings1 8
## 136 Ali England allrounder test3_innings2 7
## 137 Anderson English bowl test3_innings2 11
## 138 Bairstow England wicketkeeper test3_innings2 6
## 139 Ball England bowl test3_innings2 <NA>
## 140 Bancroft Australia bat test3_innings2 <NA>
## 141 Bird Australia bowl test3_innings2 <NA>
## 142 Broad England bowler test3_innings2 10
## 143 Cook England bat test3_innings2 1
## 144 Crane England bowl test3_innings2 <NA>
## 145 Cummins Australia bowl test3_innings2 <NA>
## 146 Curran England bowl test3_innings2 <NA>
## 147 Handscomb Australia batting test3_innings2 <NA>
## 148 Hazlewood Australia bowl test3_innings2 <NA>
## 149 Khawaja Australia batsman test3_innings2 <NA>
## 150 Lyon Australia bowl test3_innings2 <NA>
## 151 Malan England bat test3_innings2 5
## 152 MMarsh Australia all rounder test3_innings2 <NA>
## 153 Overton England bowl test3_innings2 9
## 154 Paine Australia wicketkeeper test3_innings2 <NA>
## 155 Root England bat test3_innings2 4
## 156 SMarsh Australia bat test3_innings2 <NA>
## 157 Smith Australia bat test3_innings2 <NA>
## 158 Starc Australia bowl test3_innings2 <NA>
## 159 Stoneman England bat test3_innings2 2
## 160 Vince England bat test3_innings2 3
## 161 Warner Australia bat test3_innings2 <NA>
## 162 Woakes England all-rounder test3_innings2 8
## 163 Ali England allrounder test4_innings1 7
## 164 Anderson English bowl test4_innings1 11
## 165 Bairstow England wicketkeeper test4_innings1 6
## 166 Ball England bowl test4_innings1 <NA>
## 167 Bancroft Australia bat test4_innings1 1
## 168 Bird Australia bowl test4_innings1 9
## 169 Broad England bowler test4_innings1 10
## 170 Cook England bat test4_innings1 1
## 171 Crane England bowl test4_innings1 <NA>
## 172 Cummins Australia bowl test4_innings1 8
## 173 Curran England bowl test4_innings1 9
## 174 Handscomb Australia batting test4_innings1 <NA>
## 175 Hazlewood Australia bowl test4_innings1 10
## 176 Khawaja Australia batsman test4_innings1 3
## 177 Lyon Australia bowl test4_innings1 11
## 178 Malan England bat test4_innings1 5
## 179 MMarsh Australia all rounder test4_innings1 6
## 180 Overton England bowl test4_innings1 <NA>
## 181 Paine Australia wicketkeeper test4_innings1 7
## 182 Root England bat test4_innings1 4
## 183 SMarsh Australia bat test4_innings1 5
## 184 Smith Australia bat test4_innings1 4
## 185 Starc Australia bowl test4_innings1 <NA>
## 186 Stoneman England bat test4_innings1 2
## 187 Vince England bat test4_innings1 3
## 188 Warner Australia bat test4_innings1 2
## 189 Woakes England all-rounder test4_innings1 8
## 190 Ali England allrounder test4_innings2 <NA>
## 191 Anderson English bowl test4_innings2 <NA>
## 192 Bairstow England wicketkeeper test4_innings2 <NA>
## 193 Ball England bowl test4_innings2 <NA>
## 194 Bancroft Australia bat test4_innings2 1
## 195 Bird Australia bowl test4_innings2 <NA>
## 196 Broad England bowler test4_innings2 <NA>
## 197 Cook England bat test4_innings2 <NA>
## 198 Crane England bowl test4_innings2 <NA>
## 199 Cummins Australia bowl test4_innings2 <NA>
## 200 Curran England bowl test4_innings2 <NA>
## 201 Handscomb Australia batting test4_innings2 <NA>
## 202 Hazlewood Australia bowl test4_innings2 <NA>
## 203 Khawaja Australia batsman test4_innings2 3
## 204 Lyon Australia bowl test4_innings2 <NA>
## 205 Malan England bat test4_innings2 <NA>
## 206 MMarsh Australia all rounder test4_innings2 6
## 207 Overton England bowl test4_innings2 <NA>
## 208 Paine Australia wicketkeeper test4_innings2 <NA>
## 209 Root England bat test4_innings2 <NA>
## 210 SMarsh Australia bat test4_innings2 5
## 211 Smith Australia bat test4_innings2 4
## 212 Starc Australia bowl test4_innings2 <NA>
## 213 Stoneman England bat test4_innings2 <NA>
## 214 Vince England bat test4_innings2 <NA>
## 215 Warner Australia bat test4_innings2 2
## 216 Woakes England all-rounder test4_innings2 <NA>
## 217 Ali England allrounder test5_innings1 7
## 218 Anderson English bowl test5_innings1 11
## 219 Bairstow England wicketkeeper test5_innings1 6
## 220 Ball England bowl test5_innings1 <NA>
## 221 Bancroft Australia bat test5_innings1 1
## 222 Bird Australia bowl test5_innings1 <NA>
## 223 Broad England bowler test5_innings1 9
## 224 Cook England bat test5_innings1 1
## 225 Crane England bowl test5_innings1 10
## 226 Cummins Australia bowl test5_innings1 9
## 227 Curran England bowl test5_innings1 8
## 228 Handscomb Australia batting test5_innings1 <NA>
## 229 Hazlewood Australia bowl test5_innings1 <NA>
## 230 Khawaja Australia batsman test5_innings1 3
## 231 Lyon Australia bowl test5_innings1 <NA>
## 232 Malan England bat test5_innings1 5
## 233 MMarsh Australia all rounder test5_innings1 6
## 234 Overton England bowl test5_innings1 <NA>
## 235 Paine Australia wicketkeeper test5_innings1 7
## 236 Root England bat test5_innings1 4
## 237 SMarsh Australia bat test5_innings1 5
## 238 Smith Australia bat test5_innings1 4
## 239 Starc Australia bowl test5_innings1 8
## 240 Stoneman England bat test5_innings1 2
## 241 Vince England bat test5_innings1 3
## 242 Warner Australia bat test5_innings1 2
## 243 Woakes England all-rounder test5_innings1 <NA>
## 244 Ali England allrounder test5_innings2 7
## 245 Anderson English bowl test5_innings2 11
## 246 Bairstow England wicketkeeper test5_innings2 6
## 247 Ball England bowl test5_innings2 <NA>
## 248 Bancroft Australia bat test5_innings2 <NA>
## 249 Bird Australia bowl test5_innings2 <NA>
## 250 Broad England bowler test5_innings2 9
## 251 Cook England bat test5_innings2 1
## 252 Crane England bowl test5_innings2 10
## 253 Cummins Australia bowl test5_innings2 <NA>
## 254 Curran England bowl test5_innings2 8
## 255 Handscomb Australia batting test5_innings2 <NA>
## 256 Hazlewood Australia bowl test5_innings2 <NA>
## 257 Khawaja Australia batsman test5_innings2 <NA>
## 258 Lyon Australia bowl test5_innings2 <NA>
## 259 Malan England bat test5_innings2 5
## 260 MMarsh Australia all rounder test5_innings2 <NA>
## 261 Overton England bowl test5_innings2 <NA>
## 262 Paine Australia wicketkeeper test5_innings2 <NA>
## 263 Root England bat test5_innings2 4
## 264 SMarsh Australia bat test5_innings2 <NA>
## 265 Smith Australia bat test5_innings2 <NA>
## 266 Starc Australia bowl test5_innings2 <NA>
## 267 Stoneman England bat test5_innings2 2
## 268 Vince England bat test5_innings2 3
## 269 Warner Australia bat test5_innings2 <NA>
## 270 Woakes England all-rounder test5_innings2 <NA>
## scored number_of_balls_faced
## 1 38 102
## 2 5 9
## 3 9 24
## 4 14 11
## 5 5 19
## 6 <NA> <NA>
## 7 20 32
## 8 2 10
## 9 <NA> <NA>
## 10 42 120
## 11 <NA> <NA>
## 12 14 17
## 13 6 25
## 14 11 24
## 15 9 22
## 16 56 130
## 17 <NA> <NA>
## 18 <NA> <NA>
## 19 13 42
## 20 15 50
## 21 51 141
## 22 141 326
## 23 6 5
## 24 53 159
## 25 83 170
## 26 26 43
## 27 0 4
## 28 40 64
## 29 0 1
## 30 42 75
## 31 1 5
## 32 82 182
## 33 <NA> <NA>
## 34 2 9
## 35 7 13
## 36 <NA> <NA>
## 37 <NA> <NA>
## 38 <NA> <NA>
## 39 <NA> <NA>
## 40 <NA> <NA>
## 41 <NA> <NA>
## 42 <NA> <NA>
## 43 4 17
## 44 <NA> <NA>
## 45 <NA> <NA>
## 46 <NA> <NA>
## 47 51 104
## 48 <NA> <NA>
## 49 <NA> <NA>
## 50 <NA> <NA>
## 51 27 81
## 52 2 6
## 53 87 119
## 54 17 57
## 55 25 57
## 56 0 3
## 57 21 50
## 58 <NA> <NA>
## 59 10 41
## 60 <NA> <NA>
## 61 3 17
## 62 37 90
## 63 <NA> <NA>
## 64 44 90
## 65 <NA> <NA>
## 66 36 86
## 67 <NA> <NA>
## 68 53 112
## 69 10 11
## 70 19 58
## 71 <NA> <NA>
## 72 41 79
## 73 57 102
## 74 9 10
## 75 126 231
## 76 40 90
## 77 6 29
## 78 18 21
## 79 2 10
## 80 47 102
## 81 36 62
## 82 2 20
## 83 0 0
## 84 36 57
## 85 <NA> <NA>
## 86 4 8
## 87 <NA> <NA>
## 88 8 12
## 89 16 66
## 90 <NA> <NA>
## 91 11 28
## 92 <NA> <NA>
## 93 12 42
## 94 3 7
## 95 20 49
## 96 14 30
## 97 29 80
## 98 <NA> <NA>
## 99 7 34
## 100 11 27
## 101 67 123
## 102 19 60
## 103 6 12
## 104 20 25
## 105 36 65
## 106 15 39
## 107 14 60
## 108 5 10
## 109 0 2
## 110 0 7
## 111 119 215
## 112 <NA> <NA>
## 113 25 55
## 114 <NA> <NA>
## 115 12 10
## 116 7 16
## 117 <NA> <NA>
## 118 41 63
## 119 <NA> <NA>
## 120 <NA> <NA>
## 121 <NA> <NA>
## 122 50 123
## 123 4 3
## 124 140 227
## 125 181 236
## 126 2 7
## 127 49 85
## 128 20 23
## 129 28 75
## 130 239 399
## 131 1 3
## 132 56 110
## 133 25 63
## 134 22 36
## 135 8 12
## 136 11 56
## 137 1 7
## 138 14 26
## 139 <NA> <NA>
## 140 <NA> <NA>
## 141 <NA> <NA>
## 142 0 2
## 143 14 20
## 144 <NA> <NA>
## 145 <NA> <NA>
## 146 <NA> <NA>
## 147 <NA> <NA>
## 148 <NA> <NA>
## 149 <NA> <NA>
## 150 <NA> <NA>
## 151 54 135
## 152 <NA> <NA>
## 153 12 21
## 154 <NA> <NA>
## 155 14 20
## 156 <NA> <NA>
## 157 <NA> <NA>
## 158 <NA> <NA>
## 159 3 8
## 160 55 95
## 161 <NA> <NA>
## 162 22 48
## 163 20 14
## 164 0 16
## 165 22 39
## 166 <NA> <NA>
## 167 26 95
## 168 4 6
## 169 56 63
## 170 244 409
## 171 <NA> <NA>
## 172 4 18
## 173 4 15
## 174 <NA> <NA>
## 175 1 12
## 176 17 65
## 177 0 10
## 178 14 43
## 179 9 18
## 180 <NA> <NA>
## 181 24 36
## 182 61 133
## 183 61 148
## 184 76 156
## 185 <NA> <NA>
## 186 15 37
## 187 17 37
## 188 103 151
## 189 26 62
## 190 <NA> <NA>
## 191 <NA> <NA>
## 192 <NA> <NA>
## 193 <NA> <NA>
## 194 27 42
## 195 <NA> <NA>
## 196 <NA> <NA>
## 197 <NA> <NA>
## 198 <NA> <NA>
## 199 <NA> <NA>
## 200 <NA> <NA>
## 201 <NA> <NA>
## 202 <NA> <NA>
## 203 11 14
## 204 <NA> <NA>
## 205 <NA> <NA>
## 206 29 166
## 207 <NA> <NA>
## 208 <NA> <NA>
## 209 <NA> <NA>
## 210 4 22
## 211 102 275
## 212 <NA> <NA>
## 213 <NA> <NA>
## 214 <NA> <NA>
## 215 86 227
## 216 <NA> <NA>
## 217 30 58
## 218 0 3
## 219 5 7
## 220 <NA> <NA>
## 221 0 7
## 222 <NA> <NA>
## 223 31 32
## 224 39 104
## 225 4 7
## 226 24 16
## 227 39 65
## 228 <NA> <NA>
## 229 <NA> <NA>
## 230 171 381
## 231 <NA> <NA>
## 232 62 180
## 233 101 141
## 234 <NA> <NA>
## 235 38 52
## 236 83 141
## 237 156 291
## 238 83 158
## 239 11 10
## 240 24 24
## 241 25 54
## 242 56 104
## 243 <NA> <NA>
## 244 13 43
## 245 2 23
## 246 38 143
## 247 <NA> <NA>
## 248 <NA> <NA>
## 249 <NA> <NA>
## 250 4 2
## 251 10 20
## 252 2 4
## 253 <NA> <NA>
## 254 23 40
## 255 <NA> <NA>
## 256 <NA> <NA>
## 257 <NA> <NA>
## 258 <NA> <NA>
## 259 5 27
## 260 <NA> <NA>
## 261 <NA> <NA>
## 262 <NA> <NA>
## 263 58 167
## 264 <NA> <NA>
## 265 <NA> <NA>
## 266 <NA> <NA>
## 267 0 9
## 268 18 51
## 269 <NA> <NA>
## 270 <NA> <NA>
ashes_long$player_batting_number <- as.integer(ashes_long$player_batting_number)
ashes_long$scored <- as.integer(ashes_long$scored)
ashes_long$number_of_balls_faced <- as.integer(ashes_long$number_of_balls_faced)
head (ashes_long)
## batter team role test_innings player_batting_number scored
## 1 Ali England allrounder test1_innings1 6 38
## 2 Anderson English bowl test1_innings1 11 5
## 3 Bairstow England wicketkeeper test1_innings1 7 9
## 4 Ball England bowl test1_innings1 10 14
## 5 Bancroft Australia bat test1_innings1 1 5
## 6 Bird Australia bowl test1_innings1 NA NA
## number_of_balls_faced
## 1 102
## 2 9
## 3 24
## 4 11
## 5 19
## 6 NA
• ensure all categorical variables with a small number of levels are coded as factors, • ensure all categorical variables with a large number of levels are coded as characters, and • ensure all quantitative variables are coded as integers or numeric, as appropriate.
ashes_long$player_batting_number <- as.integer(ashes_long$player_batting_number)
ashes_long$scored <- as.integer(ashes_long$scored)
ashes_long$number_of_balls_faced <- as.integer(ashes_long$number_of_balls_faced)
head(ashes_long)
## batter team role test_innings player_batting_number scored
## 1 Ali England allrounder test1_innings1 6 38
## 2 Anderson English bowl test1_innings1 11 5
## 3 Bairstow England wicketkeeper test1_innings1 7 9
## 4 Ball England bowl test1_innings1 10 14
## 5 Bancroft Australia bat test1_innings1 1 5
## 6 Bird Australia bowl test1_innings1 NA NA
## number_of_balls_faced
## 1 102
## 2 9
## 3 24
## 4 11
## 5 19
## 6 NA
install.packages("forcats")
## Warning: package 'forcats' is in use and will not be installed
library(forcats)
install.packages("dplyr")
## Warning: package 'dplyr' is in use and will not be installed
library(dplyr)
ashes_long <- ashes_long %>%
mutate (
team = fct_recode(team, "England" = "English"),
role = fct_recode( role,
"all-rounder" = "allrounder" ,
"all-rounder" = "all rounder",
"batter" = "bat",
"batter" = "batting",
"batter" = "batsman",
"bowler" = "bowl",
)
)
head (ashes_long)
## batter team role test_innings player_batting_number scored
## 1 Ali England all-rounder test1_innings1 6 38
## 2 Anderson England bowler test1_innings1 11 5
## 3 Bairstow England wicketkeeper test1_innings1 7 9
## 4 Ball England bowler test1_innings1 10 14
## 5 Bancroft Australia batter test1_innings1 1 5
## 6 Bird Australia bowler test1_innings1 NA NA
## number_of_balls_faced
## 1 102
## 2 9
## 3 24
## 4 11
## 5 19
## 6 NA
library(ggplot2)
library(dplyr)
colnames(ashes_long)
## [1] "batter" "team" "role"
## [4] "test_innings" "player_batting_number" "scored"
## [7] "number_of_balls_faced"
ashes_long <- ashes_long %>% filter(!is.na(scored))
ggplot(ashes_long, aes(x = scored)) +
geom_histogram(binwidth = 10, fill = "blue", color = "black", alpha = 0.7) +
labs(title = "Histogram of Scores in 2017-18 Ashes Series",
x = "Score",
y = "Frequency") +
theme_minimal()
## 2.2 Describe the distribution of scores, considering shape, location
spread and outliers. Summary statistics
summary(ashes_long$scored)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 6.00 18.00 32.09 41.00 244.00
print(paste("SD = ", sd(ashes_long$scored)))
## [1] "SD = 41.3080521263174"
print(paste("IQR = ", IQR(ashes_long$scored)))
## [1] "IQR = 35"
Shape of the Distribution
library(e1071)
## Warning: package 'e1071' was built under R version 4.4.3
skewness (ashes_long$scored)
## [1] 2.601694
Spread of the Scores: using boxplot
ggplot(ashes_long, aes(y = scored)) +
geom_boxplot(fill = "blue", outlier.color = "red") +
labs(title = "Boxplot of Scores in the 2017-18 Ashes Series")
Outliers
Q1 <- quantile(ashes_long$scored, 0.25)
Q3 <- quantile(ashes_long$scored, 0.75)
IQR_value <- Q3 - Q1
lower <- Q1 - 1.5 * IQR_value
upper <- Q3 + 1.5 * IQR_value
outliers <- ashes_long$scored[ashes_long$scored < lower | ashes_long$scored > upper]
outlier_count <- length(outliers)
print(outliers)
## [1] 141 126 119 140 181 239 244 103 102 171 101 156
print(paste("Number of outliers: ", outlier_count))
## [1] "Number of outliers: 12"
Summary the finding 1. Shape of the distribution: skewness value = 2.601694, so it is right-skewed distribution 2. Center: mean = 32, median = 18, mean > median: confirmed right-skeweness 3. Spread of the Scores: SD = 41, IQR = 35, suggested that scores vary widely 4. Outliers: there are 12 outliers.
Noting that each player is represented by 10 rows in the data frame, how many players were used by each team in the series?