These are the codes I used for the edX Data Wrangling Course, offered by Harvard University. Feel free to use as an example and reference and replicate the codes if needed. As always, comments are welcomed.
url <- "https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data"
library(readr)
data <- read_csv(url,col_names = F)
nrow(data) #calculate number of rows
## [1] 569
ncol(data) #calculate number of columns
## [1] 32
library(tidyverse)
library(dslabs)
co2
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct
## 1959 315.42 316.31 316.50 317.56 318.13 318.00 316.39 314.65 313.68 313.18
## 1960 316.27 316.81 317.42 318.87 319.87 319.43 318.01 315.74 314.00 313.68
## 1961 316.73 317.54 318.38 319.31 320.42 319.61 318.42 316.63 314.83 315.16
## 1962 317.78 318.40 319.53 320.42 320.85 320.45 319.45 317.25 316.11 315.27
## 1963 318.58 318.92 319.70 321.22 322.08 321.31 319.58 317.61 316.05 315.83
## 1964 319.41 320.07 320.74 321.40 322.06 321.73 320.27 318.54 316.54 316.71
## 1965 319.27 320.28 320.73 321.97 322.00 321.71 321.05 318.71 317.66 317.14
## 1966 320.46 321.43 322.23 323.54 323.91 323.59 322.24 320.20 318.48 317.94
## 1967 322.17 322.34 322.88 324.25 324.83 323.93 322.38 320.76 319.10 319.24
## 1968 322.40 322.99 323.73 324.86 325.40 325.20 323.98 321.95 320.18 320.09
## 1969 323.83 324.26 325.47 326.50 327.21 326.54 325.72 323.50 322.22 321.62
## 1970 324.89 325.82 326.77 327.97 327.91 327.50 326.18 324.53 322.93 322.90
## 1971 326.01 326.51 327.01 327.62 328.76 328.40 327.20 325.27 323.20 323.40
## 1972 326.60 327.47 327.58 329.56 329.90 328.92 327.88 326.16 324.68 325.04
## 1973 328.37 329.40 330.14 331.33 332.31 331.90 330.70 329.15 327.35 327.02
## 1974 329.18 330.55 331.32 332.48 332.92 332.08 331.01 329.23 327.27 327.21
## 1975 330.23 331.25 331.87 333.14 333.80 333.43 331.73 329.90 328.40 328.17
## 1976 331.58 332.39 333.33 334.41 334.71 334.17 332.89 330.77 329.14 328.78
## 1977 332.75 333.24 334.53 335.90 336.57 336.10 334.76 332.59 331.42 330.98
## 1978 334.80 335.22 336.47 337.59 337.84 337.72 336.37 334.51 332.60 332.38
## 1979 336.05 336.59 337.79 338.71 339.30 339.12 337.56 335.92 333.75 333.70
## 1980 337.84 338.19 339.91 340.60 341.29 341.00 339.39 337.43 335.72 335.84
## 1981 339.06 340.30 341.21 342.33 342.74 342.08 340.32 338.26 336.52 336.68
## 1982 340.57 341.44 342.53 343.39 343.96 343.18 341.88 339.65 337.81 337.69
## 1983 341.20 342.35 342.93 344.77 345.58 345.14 343.81 342.21 339.69 339.82
## 1984 343.52 344.33 345.11 346.88 347.25 346.62 345.22 343.11 340.90 341.18
## 1985 344.79 345.82 347.25 348.17 348.74 348.07 346.38 344.51 342.92 342.62
## 1986 346.11 346.78 347.68 349.37 350.03 349.37 347.76 345.73 344.68 343.99
## 1987 347.84 348.29 349.23 350.80 351.66 351.07 349.33 347.92 346.27 346.18
## 1988 350.25 351.54 352.05 353.41 354.04 353.62 352.22 350.27 348.55 348.72
## 1989 352.60 352.92 353.53 355.26 355.52 354.97 353.75 351.52 349.64 349.83
## 1990 353.50 354.55 355.23 356.04 357.00 356.07 354.67 352.76 350.82 351.04
## 1991 354.59 355.63 357.03 358.48 359.22 358.12 356.06 353.92 352.05 352.11
## 1992 355.88 356.63 357.72 359.07 359.58 359.17 356.94 354.92 352.94 353.23
## 1993 356.63 357.10 358.32 359.41 360.23 359.55 357.53 355.48 353.67 353.95
## 1994 358.34 358.89 359.95 361.25 361.67 360.94 359.55 357.49 355.84 356.00
## 1995 359.98 361.03 361.66 363.48 363.82 363.30 361.94 359.50 358.11 357.80
## 1996 362.09 363.29 364.06 364.76 365.45 365.01 363.70 361.54 359.51 359.65
## 1997 363.23 364.06 364.61 366.40 366.84 365.68 364.52 362.57 360.24 360.83
## Nov Dec
## 1959 314.66 315.43
## 1960 314.84 316.03
## 1961 315.94 316.85
## 1962 316.53 317.53
## 1963 316.91 318.20
## 1964 317.53 318.55
## 1965 318.70 319.25
## 1966 319.63 320.87
## 1967 320.56 321.80
## 1968 321.16 322.74
## 1969 322.69 323.95
## 1970 323.85 324.96
## 1971 324.63 325.85
## 1972 326.34 327.39
## 1973 327.99 328.48
## 1974 328.29 329.41
## 1975 329.32 330.59
## 1976 330.14 331.52
## 1977 332.24 333.68
## 1978 333.75 334.78
## 1979 335.12 336.56
## 1980 336.93 338.04
## 1981 338.19 339.44
## 1982 339.09 340.32
## 1983 340.98 342.82
## 1984 342.80 344.04
## 1985 344.06 345.38
## 1986 345.48 346.72
## 1987 347.64 348.78
## 1988 349.91 351.18
## 1989 351.14 352.37
## 1990 352.69 354.07
## 1991 353.64 354.89
## 1992 354.09 355.33
## 1993 355.30 356.78
## 1994 357.59 359.05
## 1995 359.61 360.74
## 1996 360.80 362.38
## 1997 362.49 364.34
co2_wide <- data.frame(matrix(co2, ncol = 12, byrow = TRUE)) %>%
setNames(1:12) %>%
mutate(year = as.character(1959:1997))
co2_wide
## 1 2 3 4 5 6 7 8 9 10 11
## 1 315.42 316.31 316.50 317.56 318.13 318.00 316.39 314.65 313.68 313.18 314.66
## 2 316.27 316.81 317.42 318.87 319.87 319.43 318.01 315.74 314.00 313.68 314.84
## 3 316.73 317.54 318.38 319.31 320.42 319.61 318.42 316.63 314.83 315.16 315.94
## 4 317.78 318.40 319.53 320.42 320.85 320.45 319.45 317.25 316.11 315.27 316.53
## 5 318.58 318.92 319.70 321.22 322.08 321.31 319.58 317.61 316.05 315.83 316.91
## 6 319.41 320.07 320.74 321.40 322.06 321.73 320.27 318.54 316.54 316.71 317.53
## 7 319.27 320.28 320.73 321.97 322.00 321.71 321.05 318.71 317.66 317.14 318.70
## 8 320.46 321.43 322.23 323.54 323.91 323.59 322.24 320.20 318.48 317.94 319.63
## 9 322.17 322.34 322.88 324.25 324.83 323.93 322.38 320.76 319.10 319.24 320.56
## 10 322.40 322.99 323.73 324.86 325.40 325.20 323.98 321.95 320.18 320.09 321.16
## 11 323.83 324.26 325.47 326.50 327.21 326.54 325.72 323.50 322.22 321.62 322.69
## 12 324.89 325.82 326.77 327.97 327.91 327.50 326.18 324.53 322.93 322.90 323.85
## 13 326.01 326.51 327.01 327.62 328.76 328.40 327.20 325.27 323.20 323.40 324.63
## 14 326.60 327.47 327.58 329.56 329.90 328.92 327.88 326.16 324.68 325.04 326.34
## 15 328.37 329.40 330.14 331.33 332.31 331.90 330.70 329.15 327.35 327.02 327.99
## 16 329.18 330.55 331.32 332.48 332.92 332.08 331.01 329.23 327.27 327.21 328.29
## 17 330.23 331.25 331.87 333.14 333.80 333.43 331.73 329.90 328.40 328.17 329.32
## 18 331.58 332.39 333.33 334.41 334.71 334.17 332.89 330.77 329.14 328.78 330.14
## 19 332.75 333.24 334.53 335.90 336.57 336.10 334.76 332.59 331.42 330.98 332.24
## 20 334.80 335.22 336.47 337.59 337.84 337.72 336.37 334.51 332.60 332.38 333.75
## 21 336.05 336.59 337.79 338.71 339.30 339.12 337.56 335.92 333.75 333.70 335.12
## 22 337.84 338.19 339.91 340.60 341.29 341.00 339.39 337.43 335.72 335.84 336.93
## 23 339.06 340.30 341.21 342.33 342.74 342.08 340.32 338.26 336.52 336.68 338.19
## 24 340.57 341.44 342.53 343.39 343.96 343.18 341.88 339.65 337.81 337.69 339.09
## 25 341.20 342.35 342.93 344.77 345.58 345.14 343.81 342.21 339.69 339.82 340.98
## 26 343.52 344.33 345.11 346.88 347.25 346.62 345.22 343.11 340.90 341.18 342.80
## 27 344.79 345.82 347.25 348.17 348.74 348.07 346.38 344.51 342.92 342.62 344.06
## 28 346.11 346.78 347.68 349.37 350.03 349.37 347.76 345.73 344.68 343.99 345.48
## 29 347.84 348.29 349.23 350.80 351.66 351.07 349.33 347.92 346.27 346.18 347.64
## 30 350.25 351.54 352.05 353.41 354.04 353.62 352.22 350.27 348.55 348.72 349.91
## 31 352.60 352.92 353.53 355.26 355.52 354.97 353.75 351.52 349.64 349.83 351.14
## 32 353.50 354.55 355.23 356.04 357.00 356.07 354.67 352.76 350.82 351.04 352.69
## 33 354.59 355.63 357.03 358.48 359.22 358.12 356.06 353.92 352.05 352.11 353.64
## 34 355.88 356.63 357.72 359.07 359.58 359.17 356.94 354.92 352.94 353.23 354.09
## 35 356.63 357.10 358.32 359.41 360.23 359.55 357.53 355.48 353.67 353.95 355.30
## 36 358.34 358.89 359.95 361.25 361.67 360.94 359.55 357.49 355.84 356.00 357.59
## 37 359.98 361.03 361.66 363.48 363.82 363.30 361.94 359.50 358.11 357.80 359.61
## 38 362.09 363.29 364.06 364.76 365.45 365.01 363.70 361.54 359.51 359.65 360.80
## 39 363.23 364.06 364.61 366.40 366.84 365.68 364.52 362.57 360.24 360.83 362.49
## 12 year
## 1 315.43 1959
## 2 316.03 1960
## 3 316.85 1961
## 4 317.53 1962
## 5 318.20 1963
## 6 318.55 1964
## 7 319.25 1965
## 8 320.87 1966
## 9 321.80 1967
## 10 322.74 1968
## 11 323.95 1969
## 12 324.96 1970
## 13 325.85 1971
## 14 327.39 1972
## 15 328.48 1973
## 16 329.41 1974
## 17 330.59 1975
## 18 331.52 1976
## 19 333.68 1977
## 20 334.78 1978
## 21 336.56 1979
## 22 338.04 1980
## 23 339.44 1981
## 24 340.32 1982
## 25 342.82 1983
## 26 344.04 1984
## 27 345.38 1985
## 28 346.72 1986
## 29 348.78 1987
## 30 351.18 1988
## 31 352.37 1989
## 32 354.07 1990
## 33 354.89 1991
## 34 355.33 1992
## 35 356.78 1993
## 36 359.05 1994
## 37 360.74 1995
## 38 362.38 1996
## 39 364.34 1997
co2_tidy <- gather(co2_wide,month,co2,-year)
co2_tidy
## year month co2
## 1 1959 1 315.42
## 2 1960 1 316.27
## 3 1961 1 316.73
## 4 1962 1 317.78
## 5 1963 1 318.58
## 6 1964 1 319.41
## 7 1965 1 319.27
## 8 1966 1 320.46
## 9 1967 1 322.17
## 10 1968 1 322.40
## 11 1969 1 323.83
## 12 1970 1 324.89
## 13 1971 1 326.01
## 14 1972 1 326.60
## 15 1973 1 328.37
## 16 1974 1 329.18
## 17 1975 1 330.23
## 18 1976 1 331.58
## 19 1977 1 332.75
## 20 1978 1 334.80
## 21 1979 1 336.05
## 22 1980 1 337.84
## 23 1981 1 339.06
## 24 1982 1 340.57
## 25 1983 1 341.20
## 26 1984 1 343.52
## 27 1985 1 344.79
## 28 1986 1 346.11
## 29 1987 1 347.84
## 30 1988 1 350.25
## 31 1989 1 352.60
## 32 1990 1 353.50
## 33 1991 1 354.59
## 34 1992 1 355.88
## 35 1993 1 356.63
## 36 1994 1 358.34
## 37 1995 1 359.98
## 38 1996 1 362.09
## 39 1997 1 363.23
## 40 1959 2 316.31
## 41 1960 2 316.81
## 42 1961 2 317.54
## 43 1962 2 318.40
## 44 1963 2 318.92
## 45 1964 2 320.07
## 46 1965 2 320.28
## 47 1966 2 321.43
## 48 1967 2 322.34
## 49 1968 2 322.99
## 50 1969 2 324.26
## 51 1970 2 325.82
## 52 1971 2 326.51
## 53 1972 2 327.47
## 54 1973 2 329.40
## 55 1974 2 330.55
## 56 1975 2 331.25
## 57 1976 2 332.39
## 58 1977 2 333.24
## 59 1978 2 335.22
## 60 1979 2 336.59
## 61 1980 2 338.19
## 62 1981 2 340.30
## 63 1982 2 341.44
## 64 1983 2 342.35
## 65 1984 2 344.33
## 66 1985 2 345.82
## 67 1986 2 346.78
## 68 1987 2 348.29
## 69 1988 2 351.54
## 70 1989 2 352.92
## 71 1990 2 354.55
## 72 1991 2 355.63
## 73 1992 2 356.63
## 74 1993 2 357.10
## 75 1994 2 358.89
## 76 1995 2 361.03
## 77 1996 2 363.29
## 78 1997 2 364.06
## 79 1959 3 316.50
## 80 1960 3 317.42
## 81 1961 3 318.38
## 82 1962 3 319.53
## 83 1963 3 319.70
## 84 1964 3 320.74
## 85 1965 3 320.73
## 86 1966 3 322.23
## 87 1967 3 322.88
## 88 1968 3 323.73
## 89 1969 3 325.47
## 90 1970 3 326.77
## 91 1971 3 327.01
## 92 1972 3 327.58
## 93 1973 3 330.14
## 94 1974 3 331.32
## 95 1975 3 331.87
## 96 1976 3 333.33
## 97 1977 3 334.53
## 98 1978 3 336.47
## 99 1979 3 337.79
## 100 1980 3 339.91
## 101 1981 3 341.21
## 102 1982 3 342.53
## 103 1983 3 342.93
## 104 1984 3 345.11
## 105 1985 3 347.25
## 106 1986 3 347.68
## 107 1987 3 349.23
## 108 1988 3 352.05
## 109 1989 3 353.53
## 110 1990 3 355.23
## 111 1991 3 357.03
## 112 1992 3 357.72
## 113 1993 3 358.32
## 114 1994 3 359.95
## 115 1995 3 361.66
## 116 1996 3 364.06
## 117 1997 3 364.61
## 118 1959 4 317.56
## 119 1960 4 318.87
## 120 1961 4 319.31
## 121 1962 4 320.42
## 122 1963 4 321.22
## 123 1964 4 321.40
## 124 1965 4 321.97
## 125 1966 4 323.54
## 126 1967 4 324.25
## 127 1968 4 324.86
## 128 1969 4 326.50
## 129 1970 4 327.97
## 130 1971 4 327.62
## 131 1972 4 329.56
## 132 1973 4 331.33
## 133 1974 4 332.48
## 134 1975 4 333.14
## 135 1976 4 334.41
## 136 1977 4 335.90
## 137 1978 4 337.59
## 138 1979 4 338.71
## 139 1980 4 340.60
## 140 1981 4 342.33
## 141 1982 4 343.39
## 142 1983 4 344.77
## 143 1984 4 346.88
## 144 1985 4 348.17
## 145 1986 4 349.37
## 146 1987 4 350.80
## 147 1988 4 353.41
## 148 1989 4 355.26
## 149 1990 4 356.04
## 150 1991 4 358.48
## 151 1992 4 359.07
## 152 1993 4 359.41
## 153 1994 4 361.25
## 154 1995 4 363.48
## 155 1996 4 364.76
## 156 1997 4 366.40
## 157 1959 5 318.13
## 158 1960 5 319.87
## 159 1961 5 320.42
## 160 1962 5 320.85
## 161 1963 5 322.08
## 162 1964 5 322.06
## 163 1965 5 322.00
## 164 1966 5 323.91
## 165 1967 5 324.83
## 166 1968 5 325.40
## 167 1969 5 327.21
## 168 1970 5 327.91
## 169 1971 5 328.76
## 170 1972 5 329.90
## 171 1973 5 332.31
## 172 1974 5 332.92
## 173 1975 5 333.80
## 174 1976 5 334.71
## 175 1977 5 336.57
## 176 1978 5 337.84
## 177 1979 5 339.30
## 178 1980 5 341.29
## 179 1981 5 342.74
## 180 1982 5 343.96
## 181 1983 5 345.58
## 182 1984 5 347.25
## 183 1985 5 348.74
## 184 1986 5 350.03
## 185 1987 5 351.66
## 186 1988 5 354.04
## 187 1989 5 355.52
## 188 1990 5 357.00
## 189 1991 5 359.22
## 190 1992 5 359.58
## 191 1993 5 360.23
## 192 1994 5 361.67
## 193 1995 5 363.82
## 194 1996 5 365.45
## 195 1997 5 366.84
## 196 1959 6 318.00
## 197 1960 6 319.43
## 198 1961 6 319.61
## 199 1962 6 320.45
## 200 1963 6 321.31
## 201 1964 6 321.73
## 202 1965 6 321.71
## 203 1966 6 323.59
## 204 1967 6 323.93
## 205 1968 6 325.20
## 206 1969 6 326.54
## 207 1970 6 327.50
## 208 1971 6 328.40
## 209 1972 6 328.92
## 210 1973 6 331.90
## 211 1974 6 332.08
## 212 1975 6 333.43
## 213 1976 6 334.17
## 214 1977 6 336.10
## 215 1978 6 337.72
## 216 1979 6 339.12
## 217 1980 6 341.00
## 218 1981 6 342.08
## 219 1982 6 343.18
## 220 1983 6 345.14
## 221 1984 6 346.62
## 222 1985 6 348.07
## 223 1986 6 349.37
## 224 1987 6 351.07
## 225 1988 6 353.62
## 226 1989 6 354.97
## 227 1990 6 356.07
## 228 1991 6 358.12
## 229 1992 6 359.17
## 230 1993 6 359.55
## 231 1994 6 360.94
## 232 1995 6 363.30
## 233 1996 6 365.01
## 234 1997 6 365.68
## 235 1959 7 316.39
## 236 1960 7 318.01
## 237 1961 7 318.42
## 238 1962 7 319.45
## 239 1963 7 319.58
## 240 1964 7 320.27
## 241 1965 7 321.05
## 242 1966 7 322.24
## 243 1967 7 322.38
## 244 1968 7 323.98
## 245 1969 7 325.72
## 246 1970 7 326.18
## 247 1971 7 327.20
## 248 1972 7 327.88
## 249 1973 7 330.70
## 250 1974 7 331.01
## 251 1975 7 331.73
## 252 1976 7 332.89
## 253 1977 7 334.76
## 254 1978 7 336.37
## 255 1979 7 337.56
## 256 1980 7 339.39
## 257 1981 7 340.32
## 258 1982 7 341.88
## 259 1983 7 343.81
## 260 1984 7 345.22
## 261 1985 7 346.38
## 262 1986 7 347.76
## 263 1987 7 349.33
## 264 1988 7 352.22
## 265 1989 7 353.75
## 266 1990 7 354.67
## 267 1991 7 356.06
## 268 1992 7 356.94
## 269 1993 7 357.53
## 270 1994 7 359.55
## 271 1995 7 361.94
## 272 1996 7 363.70
## 273 1997 7 364.52
## 274 1959 8 314.65
## 275 1960 8 315.74
## 276 1961 8 316.63
## 277 1962 8 317.25
## 278 1963 8 317.61
## 279 1964 8 318.54
## 280 1965 8 318.71
## 281 1966 8 320.20
## 282 1967 8 320.76
## 283 1968 8 321.95
## 284 1969 8 323.50
## 285 1970 8 324.53
## 286 1971 8 325.27
## 287 1972 8 326.16
## 288 1973 8 329.15
## 289 1974 8 329.23
## 290 1975 8 329.90
## 291 1976 8 330.77
## 292 1977 8 332.59
## 293 1978 8 334.51
## 294 1979 8 335.92
## 295 1980 8 337.43
## 296 1981 8 338.26
## 297 1982 8 339.65
## 298 1983 8 342.21
## 299 1984 8 343.11
## 300 1985 8 344.51
## 301 1986 8 345.73
## 302 1987 8 347.92
## 303 1988 8 350.27
## 304 1989 8 351.52
## 305 1990 8 352.76
## 306 1991 8 353.92
## 307 1992 8 354.92
## 308 1993 8 355.48
## 309 1994 8 357.49
## 310 1995 8 359.50
## 311 1996 8 361.54
## 312 1997 8 362.57
## 313 1959 9 313.68
## 314 1960 9 314.00
## 315 1961 9 314.83
## 316 1962 9 316.11
## 317 1963 9 316.05
## 318 1964 9 316.54
## 319 1965 9 317.66
## 320 1966 9 318.48
## 321 1967 9 319.10
## 322 1968 9 320.18
## 323 1969 9 322.22
## 324 1970 9 322.93
## 325 1971 9 323.20
## 326 1972 9 324.68
## 327 1973 9 327.35
## 328 1974 9 327.27
## 329 1975 9 328.40
## 330 1976 9 329.14
## 331 1977 9 331.42
## 332 1978 9 332.60
## 333 1979 9 333.75
## 334 1980 9 335.72
## 335 1981 9 336.52
## 336 1982 9 337.81
## 337 1983 9 339.69
## 338 1984 9 340.90
## 339 1985 9 342.92
## 340 1986 9 344.68
## 341 1987 9 346.27
## 342 1988 9 348.55
## 343 1989 9 349.64
## 344 1990 9 350.82
## 345 1991 9 352.05
## 346 1992 9 352.94
## 347 1993 9 353.67
## 348 1994 9 355.84
## 349 1995 9 358.11
## 350 1996 9 359.51
## 351 1997 9 360.24
## 352 1959 10 313.18
## 353 1960 10 313.68
## 354 1961 10 315.16
## 355 1962 10 315.27
## 356 1963 10 315.83
## 357 1964 10 316.71
## 358 1965 10 317.14
## 359 1966 10 317.94
## 360 1967 10 319.24
## 361 1968 10 320.09
## 362 1969 10 321.62
## 363 1970 10 322.90
## 364 1971 10 323.40
## 365 1972 10 325.04
## 366 1973 10 327.02
## 367 1974 10 327.21
## 368 1975 10 328.17
## 369 1976 10 328.78
## 370 1977 10 330.98
## 371 1978 10 332.38
## 372 1979 10 333.70
## 373 1980 10 335.84
## 374 1981 10 336.68
## 375 1982 10 337.69
## 376 1983 10 339.82
## 377 1984 10 341.18
## 378 1985 10 342.62
## 379 1986 10 343.99
## 380 1987 10 346.18
## 381 1988 10 348.72
## 382 1989 10 349.83
## 383 1990 10 351.04
## 384 1991 10 352.11
## 385 1992 10 353.23
## 386 1993 10 353.95
## 387 1994 10 356.00
## 388 1995 10 357.80
## 389 1996 10 359.65
## 390 1997 10 360.83
## 391 1959 11 314.66
## 392 1960 11 314.84
## 393 1961 11 315.94
## 394 1962 11 316.53
## 395 1963 11 316.91
## 396 1964 11 317.53
## 397 1965 11 318.70
## 398 1966 11 319.63
## 399 1967 11 320.56
## 400 1968 11 321.16
## 401 1969 11 322.69
## 402 1970 11 323.85
## 403 1971 11 324.63
## 404 1972 11 326.34
## 405 1973 11 327.99
## 406 1974 11 328.29
## 407 1975 11 329.32
## 408 1976 11 330.14
## 409 1977 11 332.24
## 410 1978 11 333.75
## 411 1979 11 335.12
## 412 1980 11 336.93
## 413 1981 11 338.19
## 414 1982 11 339.09
## 415 1983 11 340.98
## 416 1984 11 342.80
## 417 1985 11 344.06
## 418 1986 11 345.48
## 419 1987 11 347.64
## 420 1988 11 349.91
## 421 1989 11 351.14
## 422 1990 11 352.69
## 423 1991 11 353.64
## 424 1992 11 354.09
## 425 1993 11 355.30
## 426 1994 11 357.59
## 427 1995 11 359.61
## 428 1996 11 360.80
## 429 1997 11 362.49
## 430 1959 12 315.43
## 431 1960 12 316.03
## 432 1961 12 316.85
## 433 1962 12 317.53
## 434 1963 12 318.20
## 435 1964 12 318.55
## 436 1965 12 319.25
## 437 1966 12 320.87
## 438 1967 12 321.80
## 439 1968 12 322.74
## 440 1969 12 323.95
## 441 1970 12 324.96
## 442 1971 12 325.85
## 443 1972 12 327.39
## 444 1973 12 328.48
## 445 1974 12 329.41
## 446 1975 12 330.59
## 447 1976 12 331.52
## 448 1977 12 333.68
## 449 1978 12 334.78
## 450 1979 12 336.56
## 451 1980 12 338.04
## 452 1981 12 339.44
## 453 1982 12 340.32
## 454 1983 12 342.82
## 455 1984 12 344.04
## 456 1985 12 345.38
## 457 1986 12 346.72
## 458 1987 12 348.78
## 459 1988 12 351.18
## 460 1989 12 352.37
## 461 1990 12 354.07
## 462 1991 12 354.89
## 463 1992 12 355.33
## 464 1993 12 356.78
## 465 1994 12 359.05
## 466 1995 12 360.74
## 467 1996 12 362.38
## 468 1997 12 364.34
co2_tidy %>% ggplot(aes(as.numeric(month), co2, color = year)) + geom_line()
library(dslabs)
data(admissions)
dat <- admissions %>% select(-applicants)
dat_tidy <- spread(dat, gender,admitted)
tmp <- gather(admissions, key, value, admitted:applicants)
tmp
## major gender key value
## 1 A men admitted 62
## 2 B men admitted 63
## 3 C men admitted 37
## 4 D men admitted 33
## 5 E men admitted 28
## 6 F men admitted 6
## 7 A women admitted 82
## 8 B women admitted 68
## 9 C women admitted 34
## 10 D women admitted 35
## 11 E women admitted 24
## 12 F women admitted 7
## 13 A men applicants 825
## 14 B men applicants 560
## 15 C men applicants 325
## 16 D men applicants 417
## 17 E men applicants 191
## 18 F men applicants 373
## 19 A women applicants 108
## 20 B women applicants 25
## 21 C women applicants 593
## 22 D women applicants 375
## 23 E women applicants 393
## 24 F women applicants 341
tmp2 <- unite(tmp,column_name,c(key,gender))
library(Lahman)
top <- Batting %>%
filter(yearID == 2016) %>%
arrange(desc(HR)) %>% # arrange by descending HR count
slice(1:10) # take entries 1-10
top %>% as_tibble()
## # A tibble: 10 x 22
## playerID yearID stint teamID lgID G AB R H X2B X3B HR
## <chr> <int> <int> <fct> <fct> <int> <int> <int> <int> <int> <int> <int>
## 1 trumbma01 2016 1 BAL AL 159 613 94 157 27 1 47
## 2 cruzne02 2016 1 SEA AL 155 589 96 169 27 1 43
## 3 daviskh01 2016 1 OAK AL 150 555 85 137 24 2 42
## 4 doziebr01 2016 1 MIN AL 155 615 104 165 35 5 42
## 5 encared01 2016 1 TOR AL 160 601 99 158 34 0 42
## 6 arenano01 2016 1 COL NL 160 618 116 182 35 6 41
## 7 cartech02 2016 1 MIL NL 160 549 84 122 27 1 41
## 8 frazito01 2016 1 CHA AL 158 590 89 133 21 0 40
## 9 bryankr01 2016 1 CHN NL 155 603 121 176 35 3 39
## 10 canoro01 2016 1 SEA AL 161 655 107 195 33 2 39
## # ... with 10 more variables: RBI <int>, SB <int>, CS <int>, BB <int>,
## # SO <int>, IBB <int>, HBP <int>, SH <int>, SF <int>, GIDP <int>
People %>% as_tibble()
## # A tibble: 20,370 x 26
## playerID birthYear birthMonth birthDay birthCountry birthState birthCity
## <chr> <int> <int> <int> <chr> <chr> <chr>
## 1 aardsda01 1981 12 27 USA CO Denver
## 2 aaronha01 1934 2 5 USA AL Mobile
## 3 aaronto01 1939 8 5 USA AL Mobile
## 4 aasedo01 1954 9 8 USA CA Orange
## 5 abadan01 1972 8 25 USA FL Palm Beach
## 6 abadfe01 1985 12 17 D.R. La Romana La Romana
## 7 abadijo01 1850 11 4 USA PA Philadelphia
## 8 abbated01 1877 4 15 USA PA Latrobe
## 9 abbeybe01 1869 11 11 USA VT Essex
## 10 abbeych01 1866 10 14 USA NE Falls City
## # ... with 20,360 more rows, and 19 more variables: deathYear <int>,
## # deathMonth <int>, deathDay <int>, deathCountry <chr>, deathState <chr>,
## # deathCity <chr>, nameFirst <chr>, nameLast <chr>, nameGiven <chr>,
## # weight <int>, height <int>, bats <fct>, throws <fct>, debut <chr>,
## # finalGame <chr>, retroID <chr>, bbrefID <chr>, deathDate <date>,
## # birthDate <date>
top_names <- top %>% left_join(People) %>%
select(playerID, nameFirst, nameLast, HR)
top_salary <- Salaries %>% filter(yearID == 2016) %>%
right_join(top_names) %>%
select(nameFirst, nameLast, teamID, HR, salary)
award_players <- AwardsPlayers %>% filter(yearID == 2016)
q7_a <- semi_join(top,award_players)
q7_b <- anti_join(award_players,top) #there are identical playerID; thus, we need to count unique obs using the code below
length(unique(q7_b$playerID))
## [1] 44
Question 1: Which of the first four nodes are tables of team payroll?
library(rvest)
url <- "https://web.archive.org/web/20181024132313/http://www.stevetheump.com/Payrolls.htm"
h <- read_html(url)
nodes <- html_nodes(h, "table")
html_text(nodes[[6]])
## [1] "# Team\nPayroll1. Los Angeles Dodgers\n$235,295,2192. New York Yankees\n$203,812,5063. Philadelphia Phillies\n$180,052,7234. Boston Red Sox\n$162,817,4115. Detroit Tigers\n$162,228,5276. Los Angeles Angels\n$155,692,0007. San Francisco Giants\n$154,185,8788. Texas Rangers\n$136,036,1729. Washington Nationals\n$134,704,43710. Toronto Blue Jays\n$132,628,70011. Arizona Diamondbacks\n$112,688,66612. Cincinnati Reds\n$112,390,77213. St. Louis Cardinals\n$111,020,36014. Atlanta Braves\n$110,897,34115. Baltimore Orioles\n$107,406,62316. Milwaukee Brewers\n$103,844,80617. Colorado Rockies\n$95,832,07118. Seattle Mariners\n$92,081,94319. Kansas City Royals\n$92,034,34520. Chicago White Sox\n$91,159,25421. San Diego Padres\n$90,094,19622. New York Mets\n$89,051,75823. Chicago Cubs\n$89,007,85724. Minnesota Twins\n$85,776,50025. Oakland Athletics\n$83,401,40026. Cleveland Indians\n$82,534,80027. Pittsburgh Pirates\n$78,111,66728. Tampa Bay Rays\n$77,062,89129. Miami Marlins\n$47,565,40030. Houston Astros\n$44,544,174"
lapply(nodes[1:4], html_table)
## [[1]]
## # A tibble: 1 x 2
## X1 X2
## <lgl> <chr>
## 1 NA "Salary Stats 1967-2019\nTop ML Player Salaries / Baseball's Luxury Tax"
##
## [[2]]
## # A tibble: 30 x 3
## RANK TEAM Payroll
## <int> <chr> <chr>
## 1 1 Boston Red Sox $235.65M
## 2 2 San Francisco Giants $208.51M
## 3 3 Los Angeles Dodgers $186.14M
## 4 4 Chicago Cubs $183.46M
## 5 5 Washington Nationals $181.59M
## 6 6 Los Angeles Angels $175.1M
## 7 7 New York Yankees $168.54M
## 8 8 Seattle Mariners $162.48M
## 9 9 Toronto Blue Jays $162.316M
## 10 10 St. Louis Cardinals $161.01M
## # ... with 20 more rows
##
## [[3]]
## # A tibble: 31 x 5
## X1 X2 X3 X4 X5
## <chr> <chr> <chr> <chr> <chr>
## 1 Rank Team 25 Man Disabled List Total Payroll
## 2 1 Los Angeles Dodgers $155,887,854 $37,354,166 $242,065,828
## 3 2 New York Yankees $168,045,699 $5,644,000 $201,539,699
## 4 3 Boston Red Sox $136,780,500 $38,239,250 $199,805,178
## 5 4 Detroit Tigers $168,500,600 $11,750,000 $199,750,600
## 6 5 Toronto Blue Jays $159,175,968 $2,169,400 $177,795,368
## 7 6 Texas Rangers $115,162,703 $39,136,360 $175,909,063
## 8 7 San Francisco Giants $169,504,611 $2,500,000 $172,354,611
## 9 8 Chicago Cubs $170,189,880 $2,000,000 $172,189,880
## 10 9 Washington Nationals $163,111,918 $535,000 $167,846,918
## # ... with 21 more rows
##
## [[4]]
## # A tibble: 30 x 5
## Rank Team `Opening Day` `Avg Salary` Median
## <int> <chr> <chr> <chr> <chr>
## 1 1 Dodgers $ 223,352,402 $ 7,445,080 $ 5,166,666
## 2 2 Yankees $ 213,472,857 $ 7,361,133 $ 3,300,000
## 3 3 Red Sox $ 182,161,414 $ 6,072,047 $ 3,500,000
## 4 4 Tigers $ 172,282,250 $ 6,891,290 $ 3,000,000
## 5 5 Giants $ 166,495,942 $ 5,946,284 $ 4,000,000
## 6 6 Nationals $ 166,010,977 $ 5,724,516 $ 2,500,000
## 7 7 Angels $ 146,449,583 $ 5,049,986 $ 1,312,500
## 8 8 Rangers $ 144,307,373 $ 4,509,605 $ 937,500
## 9 9 Phillies $ 133,048,000 $ 4,434,933 $ 700,000
## 10 10 Blue Jays $ 126,369,628 $ 4,357,573 $ 1,650,000
## # ... with 20 more rows
Question 2: For the last 3 components of nodes, which of the following are true?
node_length <- length(nodes)
html_table(nodes[[node_length]]) #last node
## # A tibble: 54 x 4
## X1 X2 X3 X4
## <chr> <chr> <chr> <chr>
## 1 Year Minimum "Average" "% Chg"
## 2 2019 $555,000 "" "-"
## 3 2018 $545,000 "$4,520,000" ""
## 4 2017 $535,000 "$4,470,000" "5.4"
## 5 2016 $507,500 "$4,400,000" "-"
## 6 2015 $507,500 "$4,250,000" "-"
## 7 2014 $507,500 "$3,820,000" "12.8"
## 8 2013 $480,000 "$3,386,212" "5.4"
## 9 2012 $480,000 "$3,440,000" "3.8"
## 10 2011 $414,500 "$3,305,393" "0.2"
## # ... with 44 more rows
html_table(nodes[[node_length-1]]) #second to last node
## # A tibble: 31 x 3
## X1 X2 X3
## <chr> <chr> <chr>
## 1 Team Payroll Average
## 2 NY Yankees $92,538,260 $3,190,974
## 3 Los Angeles $88,124,286 $3,263,862
## 4 Atlanta $84,537,836 $2,817,928
## 5 Baltimore $81,447,435 $2,808,532
## 6 Arizona $81,027,833 $2,893,851
## 7 NY Mets $79,509,776 $3,180,391
## 8 Boston $77,940,333 $2,598,011
## 9 Cleveland $75,880,871 $2,918,495
## 10 Texas $70,795,921 $2,722,920
## # ... with 21 more rows
html_table(nodes[[node_length-2]]) #third to last node
## # A tibble: 31 x 3
## X1 X2 X3
## <chr> <chr> <chr>
## 1 Team Payroll Average
## 2 NY Yankees $109,791,893 $3,541,674
## 3 Boston $109,558,908 $3,423,716
## 4 Los Angeles $108,980,952 $3,757,964
## 5 NY Mets $93,174,428 $3,327,658
## 6 Cleveland $91,974,979 $3,065,833
## 7 Atlanta $91,851,687 $2,962,958
## 8 Texas $88,504,421 $2,854,981
## 9 Arizona $81,206,513 $2,900,233
## 10 St. Louis $77,270,855 $2,664,512
## # ... with 21 more rows
Question 3:
tab_1 <- html_table(nodes[[10]])
tab_1
## # A tibble: 31 x 4
## X1 X2 X3 X4
## <chr> <chr> <chr> <chr>
## 1 No. Team Payroll Average
## 2 1. New York Yankees $206,333,389 $8,253,336
## 3 2. Boston Red Sox $162,747,333 $5,611,977
## 4 3. Chicago Cubs $146,859,000 $5,439,222
## 5 4. Philadelphia Phillies $141,927,381 $5,068,835
## 6 5. New York Mets $132,701,445 $5,103,902
## 7 6. Detroit Tigers $122,864,929 $4,550,553
## 8 7. Chicago White Sox $108,273,197 $4,164,354
## 9 8. Los Angeles Angels $105,013,667 $3,621,161
## 10 9. Seattle Mariners $98,376,667 $3,513,452
## # ... with 21 more rows
tab_2 <- html_table(nodes[[19]])
tab_2
## # A tibble: 31 x 3
## X1 X2 X3
## <chr> <chr> <chr>
## 1 Team Payroll Average
## 2 NY Yankees $109,791,893 $3,541,674
## 3 Boston $109,558,908 $3,423,716
## 4 Los Angeles $108,980,952 $3,757,964
## 5 NY Mets $93,174,428 $3,327,658
## 6 Cleveland $91,974,979 $3,065,833
## 7 Atlanta $91,851,687 $2,962,958
## 8 Texas $88,504,421 $2,854,981
## 9 Arizona $81,206,513 $2,900,233
## 10 St. Louis $77,270,855 $2,664,512
## # ... with 21 more rows
tab1_new <- tab_1[-1, -1]
names(tab1_new) <- c("Team", "Payroll", "Average")
tab2_new <- tab_2[-1,]
names(tab2_new) <- c("Team", "Payroll", "Average")
library(readr)
tab12 <- full_join(tab1_new,tab2_new, by = "Team") %>%
nrow()
Question 4-5:
library(rvest)
library(tidyverse)
url <- "https://en.wikipedia.org/w/index.php?title=Opinion_polling_for_the_United_Kingdom_European_Union_membership_referendum&oldid=896735054"
h <- read_html(url)
tab <- html_nodes(h, "table")
lapply(tab[1:42], html_table)
## [[1]]
## # A tibble: 27 x 21
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 "Par~ <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 2 "UK ~ <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 3 "" <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 4 "Acc~ <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 5 "197~ <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 6 "Tre~ Sing~ (UK ~ Maas~ (UK ~ Trea~ (UK ~ Trea~ (UK ~ Trea~ (UK ~ <NA> <NA>
## 7 "Sin~ (UK ~ <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 8 "Maa~ (UK ~ <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 9 "Tre~ (UK ~ <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 10 "Tre~ (UK ~ <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## # ... with 17 more rows, and 8 more variables: X14 <chr>, X15 <chr>, X16 <chr>,
## # X17 <chr>, X18 <chr>, X19 <chr>, X20 <chr>, X21 <chr>
##
## [[2]]
## # A tibble: 5 x 2
## X1 X2
## <chr> <chr>
## 1 Single European Act, 1986 (UK ratification)
## 2 Maastricht Treaty, 1992 (UK ratification)
## 3 Treaty of Amsterdam, 1997 (UK ratification)
## 4 Treaty of Nice, 2001 (UK ratification)
## 5 Treaty of Lisbon, 2007 (UK ratification)
##
## [[3]]
## # A tibble: 10 x 2
## X1 X2
## <chr> <chr>
## 1 Members 1973–1979 (elected by parliament)
## 2 Members 1979–1984 (1979 election)
## 3 Members 1984–1989 (1984 election)
## 4 Members 1989–1994 (1989 election)
## 5 Members 1994–1999 (1994 election)
## 6 Members 1999–2004 (1999 election)
## 7 Members 2004–2009 (2004 election)
## 8 Members 2009–2014 (2009 election)
## 9 Members 2014–2019 (2014 election)
## 10 Members 2019–2020 (2019 election)
##
## [[4]]
## # A tibble: 8 x 7
## `Conducted by` Date Remain Leave Undecided Lead Notes
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Conducted by Date "" "" Undecided Lead "Notes"
## 2 What UK Thinks: EU[14] 23 June "52%" "48%" N/A 4% "Six mos~
## 3 Elections Etc.[15] 23 June "50.6%" "49.4%" N/A 1.2% "Twelve ~
## 4 HuffPost Pollster[16] 23 June "45.8%" "45.3%" 9% 0.5% ""
## 5 Number Cruncher Politics[17] 22 June "46%" "44%" 10% 2% "Equal w~
## 6 Financial Times[18] 13 June "48%" "46%" 6% 2% "Five mo~
## 7 The Telegraph[20] 21 June "51%" "49%" N/A 2% "Six mos~
## 8 The Economist[21] 6 June "44%" "44%" 9% 0% "Exclude~
##
## [[5]]
## # A tibble: 1 x 2
## X1 X2
## <lgl> <chr>
## 1 NA This list is incomplete; you can help by adding missing items. (Novemb~
##
## [[6]]
## # A tibble: 134 x 9
## `Date(s) conducted` Remain Leave Undecided Lead Sample `Conducted by`
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Date(s) conducted "" "" Undecided Lead Sample Conducted by
## 2 23 June 2016 "48.1%" "51.9%" N/A 3.8% 33,577,342 Results of th~
## 3 23 June "52%" "48%" N/A 4% 4,772 YouGov
## 4 22 June "55%" "45%" N/A 10% 4,700 Populus
## 5 20–22 June "51%" "49%" N/A 2% 3,766 YouGov
## 6 20–22 June "49%" "46%" 1% 3% 1,592 Ipsos MORI
## 7 20–22 June "44%" "45%" 9% 1% 3,011 Opinium
## 8 17–22 June "54%" "46%" N/A 8% 1,032 ComRes
## 9 17–22 June "48%" "42%" 11% 6% 1,032 ComRes
## 10 16–22 June "41%" "43%" 16% 2% 2,320 TNS
## # ... with 124 more rows, and 2 more variables: Polling type <chr>, Notes <chr>
##
## [[7]]
## # A tibble: 73 x 9
## `Date(s) conducted` Remain Leave Undecided Sample `Conducted by` Notes ``
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Date(s) conducted "" "" Undecided Sample Conducted by "Note~ <NA>
## 2 17–18 Dec "41%" "42%" 17% 1,598 YouGov "" <NA>
## 3 12–14 Dec "58%" "32%" 10% 529 Ipsos MORI "" <NA>
## 4 11–13 Dec "56%" "35%" 8% 1,001 ComRes "" <NA>
## 5 11–13 Dec "42%" "41%" 17% 2,053 ICM "" <NA>
## 6 4–6 Dec "43%" "39%" 17% 2,022 ICM "" <NA>
## 7 2–3 Dec "36%" "43%" 21% 1,001 ORB "" <NA>
## 8 30 Nov–3 Dec "40%" "42%" 18% 10,015 Survation "Incl~ <NA>
## 9 20–24 Nov "41%" "41%" 18% 4,317 YouGov "" <NA>
## 10 19–24 Nov "40%" "38%" 22% 1,699 YouGov "" <NA>
## # ... with 63 more rows, and 1 more variable: <chr>
##
## [[8]]
## # A tibble: 46 x 9
## `Date(s) conducted` Remain Leave Undecided Sample `Conducted by` Notes ``
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Date(s) conducted "" "" Undecided Sample Conducted by "Note~ <NA>
## 2 14–15 Dec "40%" "39%" 21% 1,648 YouGov "" <NA>
## 3 30 Nov–1 Dec "42%" "39%" 20% 1,763 YouGov "" <NA>
## 4 20–26 Nov "38%" "43%" 19% 1,641 YouGov "" <NA>
## 5 21–23 Nov "32%" "48%" 20% 2,049 ComRes "" <NA>
## 6 20–21 Nov "40%" "41%" 19% 1,970 YouGov "" <NA>
## 7 19–21 Nov "40%" "41%" 19% 2,314 YouGov "" <NA>
## 8 16–17 Nov "39%" "39%" 21% 1,589 YouGov "" <NA>
## 9 7 Nov "31%" "54%" 15% 1,020 Survation "" <NA>
## 10 2–3 Nov "38%" "41%" 21% 1,652 YouGov "" <NA>
## # ... with 36 more rows, and 1 more variable: <chr>
##
## [[9]]
## # A tibble: 40 x 7
## `Date(s) conducted` Remain Leave Undecided Sample `Conducted by` Notes
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Date(s) conducted "" "" Undecided Sample Conducted by "Notes"
## 2 1–9 Dec "37%" "43%" 20% Unknown YouGov ""
## 3 10–11 Nov "39%" "39%" 22% Unknown YouGov[35] ""
## 4 13–14 Oct "42%" "37%" 20% Unknown YouGov[35] ""
## 5 23–27 Sep "36%" "44%" 20% 1,922 YouGov ""
## 6 15–16 Sep "42%" "39%" 20% Unknown YouGov[35] ""
## 7 18–19 Aug "46%" "34%" 20% Unknown YouGov[35] ""
## 8 6–8 Aug "32%" "53%" 15% 1,945 Opinium ""
## 9 4–5 Aug "43%" "35%" 22% Unknown YouGov[35] ""
## 10 18–24 Jul "35%" "45%" 21% 1,968 YouGov ""
## # ... with 30 more rows
##
## [[10]]
## # A tibble: 3 x 7
## `Date(s) conducted` Remain Leave Undecided Sample `Conducted by` Notes
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Date(s) conducted "" "" Undecided Sample Conducted by Notes
## 2 27–28 November "30%" "51%" 9% Unknown YouGov/The Sun Northern ~
## 3 13–15 November "30%" "56%" 14% 1,957 Opinium/Observer Northern ~
##
## [[11]]
## # A tibble: 4 x 7
## `Date(s) conducted` Remain Leave Undecided Sample `Conducted by` Notes
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Date(s) conducted "" "" Undecided Sample Conducted by Notes
## 2 15–16 December "41%" "41%" 19% Unknown YouGov/The Sun Northern Ir~
## 3 8–9 December "35%" "44%" 20% Unknown YouGov/The Sun Northern Ir~
## 4 7–8 August "30%" "52%" 19% Unknown YouGov/The Sun Northern Ir~
##
## [[12]]
## # A tibble: 2 x 7
## `Date(s) conducted` Remain Leave Undecided Sample `Conducted by` Notes
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Date(s) conducted "" "" Undecided Sample Conducted by Notes
## 2 8–9 September "33%" "47%" 19% Unknown YouGov/The Sun Northern Ir~
##
## [[13]]
## # A tibble: 3 x 6
## `Date(s) conducted` Remain Leave Undecided Sample `Held by`
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Date(s) conducted "" "" Undecided Sample Held by
## 2 23 June 2016 "46.6%" "53.4%" N/A – England Results
## 3 9–16 September 2015 "40%" "43%" 17% 1,712 YouGov
##
## [[14]]
## # A tibble: 3 x 6
## `Date(s) conducted` Remain Leave Undecided Sample `Held by`
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Date(s) conducted "" "" Undecided Sample Held by
## 2 23 June 2016 "46.7%" "53.3%" N/A – Results
## 3 26 June – 3 July 2015 "42%" "43%" 15% 956 Panelbase/Sunday Times
##
## [[15]]
## # A tibble: 7 x 6
## `Date(s) conducted` Remain Leave Undecided Sample `Held by`
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Date(s) conducted "" "" Undecided Sample Held by
## 2 23 June 2016 "59.9%" "40.1%" N/A – London Results
## 3 2–6 June 2016 "48%" "35%" 13% 1,179 YouGov
## 4 26 April – 1 May 2016 "51%" "34%" 14% 1,005 Opinium/Evening Standa~
## 5 4–6 January 2016 "39%" "34%" 27% 1,156 YouGov/LBC
## 6 17–19 November 2014 "45%" "37%" 14% 1,124 YouGov/Evening Standard
## 7 20–25 June 2013 "41%" "39%" 20% 1,269 YouGov/Evening Standard
##
## [[16]]
## # A tibble: 35 x 6
## `Date(s) conducted` Remain Leave Undecided Sample `Held by`
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Date(s) conducted "" "" Undecided Sample Held by
## 2 23 June 2016 "62.0%" "38.0%" N/A – Scotland Results
## 3 6–12 Jun 2016 "58%" "33%" 8% 1,000 Ipsos Mori/STV
## 4 4–22 May 2016 "53%" "24%" 23% 1,008 TNS[permanent dead link]
## 5 6–10 May 2016 "54%" "32%" 14% 1,000 ICM/The Scotsman
## 6 1–2 May 2016 "58%" "19%" 19% 1,024 Survation/Daily Record
## 7 23–28 April 2016 "57%" "33%" 11% 1,074 Panelbase/Sunday Times
## 8 18–25 April 2016 "66%" "29%" 5% 1,015 Ipsos MORI/STV
## 9 1–24 April 2016 "48%" "21%" 31% 1,012 TNS
## 10 15–20 April 2016 "54%" "28%" 17% 1,005 Survation/Daily Record
## # ... with 25 more rows
##
## [[17]]
## # A tibble: 16 x 6
## `Date(s) conducted` Remain Leave Undecided Sample `Held by`
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Date(s) conducted "" "" Undecided Sample Held by
## 2 23 June 2016 "47.5%" "52.5%" N/A – Wales Results
## 3 30 May – 2 June 2016 "41%" "41%" 18% 1,017 YouGov
## 4 7–11 April 2016 "38%" "39%" 16% 1,011 YouGov
## 5 9–11 February 2016 "37%" "45%" 18% 1,024 YouGov
## 6 21–24 September 2015 "42%" "38%" 21% 1,010 YouGov
## 7 4–6 May 2015 "47%" "33%" 16% 1,202 YouGov/ITV Wales
## 8 24–27 March 2015 "44%" "38%" 14% 1,189 YouGov/ITV Wales
## 9 5–9 March 2015 "43%" "36%" 17% 1,279 YouGov/ITV Wales
## 10 19–26 February 2015 "63%" "33%" 4% 1,000 ICM/BBC
## 11 19–21 January 2015 "44%" "36%" 16% 1,036 YouGov/ITV Wales
## 12 2–5 December 2014 "42%" "39%" 15% 1,131 YouGov/ITV Wales
## 13 8–11 September 2014 "43%" "37%" 15% 1,025 YouGov/ITV Wales
## 14 26 June – 1 July 2014 "41%" "36%" 18% 1,035 YouGov/ITV Wales
## 15 21–24 February 2014 "54%" "40%" 6% 1,000 ICM/BBC
## 16 14–25 June 2013 "29%" "37%" 35% 1,015 Beaufort Research
##
## [[18]]
## # A tibble: 1 x 2
## X1 X2
## <lgl> <chr>
## 1 NA This section needs additional citations for verification. Please help i~
##
## [[19]]
## # A tibble: 8 x 7
## `Date(s) conducted` Remain Leave Undecided Sample `Held by` Notes
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Date(s) conducted "" "" Undecided Sample Held by "Notes"
## 2 23 June 2016 "55.8%" "44.2%" N/A – Northern Ir~ ""
## 3 Late June 2016 "37%" "26%" <NA> Over 1,000 Belfast Tel~ ""
## 4 20 June 2016 "57%" "43%" Exc. DKs 2,090 The NI Sun/~ ""
## 5 17–19 May 2016 "57%" "35%" 9% 1,090 LucidTalk ""
## 6 May 2016 "44%" "20%" 35% 1,005 Ipsos MORI "Questi~
## 7 19–21 October 2015 "56.5%" "28.3%" 15.2% 2,517 LucidTalk ""
## 8 2–16 October 2015 "55%" "13%" 32% 1,012 BBC/RTÉ ""
##
## [[20]]
## # A tibble: 4 x 6
## `Date(s) conducted` Remain Leave Undecided Sample `Held by`
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Date(s) conducted "" "" Undecided Sample Held by
## 2 23 June 2016 "95.9%" "4.1%" N/A – Gibraltar Results
## 3 13–15 May 2016 "94%" "2%" 4% 596 Gibraltar Chronicle
## 4 11–15 April 2016 "88%" "8%" 3% 596 Gibraltar Chronicle
##
## [[21]]
## # A tibble: 33 x 7
## `Date(s) conducted` Remain Leave Undecided Sample `Held by` Notes
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Date(s) conducted "" "" Undecided Sample Held by "Notes"
## 2 1–2 June 2015 "55%" "24%" 18% 1,063 YouGov/Prospe~ "Northern I~
## 3 8–9 May 2015 "58%" "24%" 16% 1,302 YouGov/Sunday~ "Northern I~
## 4 3–4 May 2015 "56%" "20%" 20% 1,664 YouGov/The Sun "Northern I~
## 5 19–20 April 2015 "57%" "22%" 17% 2,078 YouGov/The Sun "Northern I~
## 6 22–23 March 2015 "57%" "22%" 18% 1,641 YouGov/The Sun "Northern I~
## 7 22–23 February 2015 "57%" "21%" 17% 1,772 YouGov/The Sun "Northern I~
## 8 25–26 January 2015 "54%" "25%" 16% 1,656 YouGov/The Sun "Northern I~
## 9 18–19 January 2015 "57%" "21%" 19% 1,747 YouGov/Britis~ "Northern I~
## 10 14–15 Dec 2014 "55%" "24%" 16% 1,648 YouGov/The Sun ""
## # ... with 23 more rows
##
## [[22]]
## # A tibble: 29 x 4
## Country Remain `Does not matter` Leave
## <chr> <chr> <chr> <chr>
## 1 Country "" "" ""
## 2 Austria "41%" "41%" "19%"
## 3 Belgium "49%" "38%" "13%"
## 4 Bulgaria "67%" "27%" "7%"
## 5 Croatia "49%" "41%" "10%"
## 6 Cyprus "35%" "45%" "19%"
## 7 Czech Republic "40%" "47%" "13%"
## 8 Denmark "56%" "31%" "13%"
## 9 Estonia "65%" "28%" "8%"
## 10 Finland "50%" "39%" "11%"
## # ... with 19 more rows
##
## [[23]]
## # A tibble: 29 x 3
## Country Remain Leave
## <chr> <chr> <chr>
## 1 Country "" ""
## 2 Austria "24%" "76%"
## 3 Belgium "34%" "66%"
## 4 Bulgaria "52%" "48%"
## 5 Croatia "36%" "64%"
## 6 Cyprus "33%" "67%"
## 7 Czech Republic "42%" "58%"
## 8 Denmark "51%" "49%"
## 9 Estonia "44%" "56%"
## 10 Finland "30%" "70%"
## # ... with 19 more rows
##
## [[24]]
## # A tibble: 10 x 3
## Country Remain Leave
## <chr> <chr> <chr>
## 1 Country "" ""
## 2 Denmark "46%" "24%"
## 3 Finland "49%" "19%"
## 4 France "51%" "22%"
## 5 Germany "55%" "19%"
## 6 Italy "63%" "20%"
## 7 Norway "34%" "27%"
## 8 Portugal "74%" "8%"
## 9 Spain "69%" "11%"
## 10 Sweden "43%" "26%"
##
## [[25]]
## # A tibble: 177 x 11
## `Date(s) conducted` Right Wrong Undecided Lead Sample `Conducted by`
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 4–5 Aug 2020 39% 49% 12% 10% 1,606 YouGov
## 2 30–31 Jul 2020 41% 47% 13% 6% 1,623 YouGov
## 3 22–23 Jul 2020 42% 47% 11% 5% 1,648 YouGov
## 4 11–12 Jun 2020 40% 47% 13% 7% 1,693 YouGov
## 5 29–30 May 2020 42% 45% 13% 3% 1,650 YouGov
## 6 18–19 May 2020 43% 45% 13% 2% 1,718 YouGov
## 7 16–17 Apr 2020 43% 44% 13% 1% 2,015 YouGov
## 8 24–26 Mar 2020 48% 40% 12% 8% 1,010 Number Cruncher Polit~
## 9 9–10 Feb 2020 43% 44% 13% 1% 1,694 YouGov
## 10 31 Jan – 2 Feb 2020 43% 46% 12% 3% 1,575 YouGov
## # ... with 167 more rows, and 4 more variables: Polling type <chr>,
## # Notes <chr>, <chr>, <chr>
##
## [[26]]
## # A tibble: 2 x 9
## `Date(s) conducted` Right Wrong Undecided Lead Sample `Conducted by`
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 26 Feb–1 Mar 2019 22% 70% 8% 48% 5,004 YouGov
## 2 5–8 Jul 2018 76% 21% 2% 55% 966 YouGov
## # ... with 2 more variables: Polling type <chr>, Notes <chr>
##
## [[27]]
## # A tibble: 215 x 9
## `Date(s) conducted` Remain Leave Neither Lead Sample `Conducted by`
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 18–21 Oct 2019 55% 45% — 10% 2,017 Deltapoll
## 2 17 Oct 2019 EU and ~ EU and~ EU and U~ EU an~ EU and~ EU and UK nego~
## 3 2–14 Oct 2019 32% 54% 14% 22% 26,000 ComRes
## 4 9–11 Oct 2019 51% 45% 3% 6% 1,622 Panelbase
## 5 25 Sep 2019 51% 45% 4% 6% 821 Survation
## 6 5–9 Sep 2019 37% 34% 29% 3% 1,144 Kantar
## 7 5–7 Sep 2019 46% 40% 14% 6% 2,049 Deltapoll
## 8 5–6 Sep 2019 52% 45% 3% 7% 864 Panelbase
## 9 5–6 Sep 2019 50% 44% 6% 6% 809 Survation
## 10 3–4 Sep 2019 46% 43% 12% 3% 1,533 YouGov
## # ... with 205 more rows, and 2 more variables: Polling type <chr>, Notes <chr>
##
## [[28]]
## # A tibble: 28 x 9
## `Date(s) conducted` Remain Leave Neither Lead Sample `Conducted by`
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 26 Feb-1 Mar 2019 70% 22% 8% 48% 5,004 YouGov
## 2 26 Feb-1 Mar 2019 69% 16% 15% 50% 5,004 YouGov
## 3 26 Feb-1 Mar 2019 72% 19% 10% 53% 5,004 YouGov
## 4 15-22 Feb 2019 76% 14% 10% 62% 499 BMG Research
## 5 15-22 Feb 2019 48% 22% 30% 26% 1,125 BMG Research
## 6 2-7 Nov 2018 61% 34% 4% 27% 914 Panelbase
## 7 30 Oct-2 Nov 2018 45% 41% 14% 4% 1,031 YouGov
## 8 3-6 Oct 2018 90% 7% 2% 83% 665 YouGov
## 9 13-18 Sep 2018 90% 7% 3% 83% 1,054 YouGov
## 10 6-11 Sep 2018 63% 18% 19% 45% 1,645 YouGov
## # ... with 18 more rows, and 2 more variables: Polling type <chr>, Notes <chr>
##
## [[29]]
## # A tibble: 66 x 11
## `Date(s) conducted` Round Remain Deal `No deal` None Lead Sample
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 17–18 Oct 2019 — 45% 34% 17% 5% 11% 1,025
## 2 17–18 Oct 2019 — 41% 38% 18% 4% 3% 1,025
## 3 17–18 Oct 2019 — 42% 42% — 16% 0% 1,025
## 4 17 Oct 2019 EU and~ EU and ~ EU and~ EU and UK~ EU an~ EU an~ EU and~
## 5 2–14 Oct 2019 — 42% 30% 20% 8% 12% 26,000
## 6 5–6 Sep 2019 — 55% — 41% 5% 14% 864
## 7 24 Jul 2019 Boris ~ Boris J~ Boris ~ Boris Joh~ Boris~ Boris~ Boris ~
## 8 2–5 Jul 2019 — — 26% 34% 40% 8% 1,532
## 9 2–5 Jul 2019 — 43% 25% — 32% 18% 1,532
## 10 2–5 Jul 2019 — 44% — 38% 18% 6% 1,532
## # ... with 56 more rows, and 3 more variables: Conducted by <chr>,
## # Polling type <chr>, Notes <chr>
##
## [[30]]
## # A tibble: 2 x 11
## `Date(s) conducted` Round Remain Deal `No deal` None Lead Sample
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 6-11 Sep 2018 I 58% 10% 9% 23% 48% 1,645
## 2 6-11 Sep 2018 II 82% 18% — — 64% 1,645
## # ... with 3 more variables: Conducted by <chr>, Polling type <chr>,
## # Notes <chr>
##
## [[31]]
## # A tibble: 3 x 8
## `Date(s) conducted` Join `Not join` Undecided Lead Sample `Conducted by`
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Date(s) conducted "" "" "" Lead Sample Conducted by
## 2 27 Mar 2019 "38%" "38%" "25%" 0% 1,005 Sky Data
## 3 27 Mar-5 Apr 2018 "31%" "47%" "22%" 16% 1,037 Number Cruncher P~
## # ... with 1 more variable: Polling type <chr>
##
## [[32]]
## # A tibble: 141 x 9
## `Date(s) conducted` Support Oppose Neither Lead Sample `Conducted by`
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 17–18 Oct 2019 47% 44% 9% 3% 1,025 Survation
## 2 17–18 Oct 2019 43% 41% 16% 2% 1,025 Survation
## 3 17 Oct 2019 EU and U~ EU and~ EU and ~ EU an~ EU and~ EU and UK nego~
## 4 2–14 Oct 2019 41% 45% 14% 4% 26,000 ComRes
## 5 29–30 Sep 2019 47% 29% 24% 18% 1,620 YouGov
## 6 29–30 Sep 2019 52% 23% 25% 29% 1,620 YouGov
## 7 5–9 Sep 2019 53% 29% 18% 24% 1,144 Kantar
## 8 5–7 Sep 2019 43% 42% 15% 1% 2,049 Deltapoll
## 9 3–4 Sep 2019 46% 41% 13% 5% 1,533 YouGov
## 10 29–31 Aug 2019 41% 47% 12% 6% 2,028 Deltapoll
## # ... with 131 more rows, and 2 more variables: Polling type <chr>, Notes <chr>
##
## [[33]]
## # A tibble: 21 x 9
## `Date(s) conducted` Support Oppose Neither Lead Sample `Conducted by`
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 26 Feb-1 Mar 2019 65% 22% 13% 43% 5,004 YouGov
## 2 15-22 Feb 2019 54% 19% 27% 35% 499 BMG Research
## 3 15-22 Feb 2019 38% 25% 37% 13% 1,125 BMG Research
## 4 30 Oct-2 Nov 2018 41% 49% 10% 8% 1,031 YouGov
## 5 3-6 Oct 2018 83% 10% 6% 73% 665 YouGov
## 6 13-18 Sep 2018 86% 8% 6% 78% 1,054 YouGov
## 7 6-11 Sep 2018 52% 22% 25% 30% 1,645 YouGov
## 8 30 Aug-5 Sep 2018 56% 33% 10% 23% 620 YouGov
## 9 30 Aug-5 Sep 2018 66% 22% 11% 44% 1,081 YouGov
## 10 30 Aug-5 Sep 2018 59% 33% 8% 26% 1,058 YouGov
## # ... with 11 more rows, and 2 more variables: Polling type <chr>, Notes <chr>
##
## [[34]]
## # A tibble: 31 x 14
## vteBrexit vteBrexit `` `` `` `` `` `` `` `` ``
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 "Renegoti~ "Renegoti~ <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 2 "Referend~ "\"Should~ <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 3 "Referend~ "European~ <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 4 "Backgrou~ "Treaties~ Treat~ "Rome~ Legi~ "Eur~ Prop~ "Eur~ Elec~ "201~ By-e~
## 5 "Treaties" "Rome\n19~ <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 6 "Legislat~ "European~ <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 7 "Proposed~ "European~ <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 8 "Election~ "2014 Eur~ <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 9 "By-elect~ "2014 Cla~ <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 10 "Other" "UK acces~ <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## # ... with 21 more rows, and 3 more variables: <chr>, <chr>, <chr>
##
## [[35]]
## # A tibble: 6 x 2
## X1 X2
## <chr> <chr>
## 1 Treaties "Rome\n1972 Accession Treaty\nSingle European Act\nMaastricht\~
## 2 Legislation "European Communities Act 1972\nAmendments: 1986\n1993\n1998\n~
## 3 Proposed bills "European Union Bill 2004–05\nEuropean Communities Act 1972 (R~
## 4 Elections "2014 European Parliament election\n2015 general election"
## 5 By-elections "2014 Clacton\n2014 Heywood and Middleton\n2014 Rochester and ~
## 6 Other "UK accession\n1973 EC enlargement\nUK membership\n1975 EC mem~
##
## [[36]]
## # A tibble: 2 x 2
## X1 X2
## <chr> <chr>
## 1 Remain "Britain Stronger in Europe (official campaign)\nLabour In for Britain"
## 2 Leave "Vote Leave (official campaign)\nLeave.EU\nBeLeave\nGrassroots Out\nLa~
##
## [[37]]
## # A tibble: 5 x 2
## X1 X2
## <chr> <chr>
## 1 Political partyleadership elections "Conservative\n2016\n2019\nScottish, Feb ~
## 2 Opposition to Brexit "Proposed second referendum\nPeople's Vot~
## 3 Elections "2017 local\n2017 general\n2018 local\n20~
## 4 By-elections "2016 Witney\n2016 Richmond Park\n2016 Sl~
## 5 Other "International reactions\nMarch to Leave\~
##
## [[38]]
## # A tibble: 2 x 2
## X1 X2
## <chr> <chr>
## 1 on Northern Ireland andthe Republic of Ireland "Brexit and the Irish border\n~
## 2 Other "on Gibraltar\non the EU\nScie~
##
## [[39]]
## # A tibble: 3 x 2
## X1 X2
## <chr> <chr>
## 1 White papers "Brexit plan\nRepeal Bill plan\nChequers plan\nBrexit withdrawal~
## 2 Enacted "Notification of Withdrawal Act 2017\nWithdrawal Act 2018 (Gibra~
## 3 Proposed "Terms of Withdrawal from EU (Referendum) Bills\nUK Withdrawal f~
##
## [[40]]
## # A tibble: 14 x 10
## `vte Opinion pol~ `vte Opinion pol~ `` `` `` `` `` `` ``
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 General elections "1945\n1950\n195~ <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 2 Leadership appro~ "2015\n2017\n201~ <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 3 European electio~ "2009\n2014\n201~ <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 4 Referendums "United Kingdom\~ Unit~ "201~ Nort~ "197~ Scot~ "197~ Wales
## 5 United Kingdom "2011 Alternativ~ <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 6 Northern Ireland "1973 border pol~ <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 7 Scotland "1979 devolution~ <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 8 Wales "1979 devolution~ <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 9 Issues "Europe\nPost-re~ <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 10 Devolved legisla~ "London\n2008\nM~ Lond~ "200~ Nort~ "201~ Scot~ "200~ Wales
## 11 London "2008\nMayor\n20~ <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 12 Northern Ireland "2016\n2017\n202~ <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 13 Scotland "2007\n2011\n201~ <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 14 Wales "2011\n2016\n202~ <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## # ... with 1 more variable: <chr>
##
## [[41]]
## # A tibble: 4 x 2
## X1 X2
## <chr> <chr>
## 1 United Kingdom "2011 Alternative Vote\n2016 EU membership"
## 2 Northern Ireland "1973 border poll\n1998 Good Friday Agreement"
## 3 Scotland "1979 devolution\n1997 devolution\n2014 independence"
## 4 Wales "1979 devolution\n1997 devolution\n2011 devolution"
##
## [[42]]
## # A tibble: 4 x 2
## X1 X2
## <chr> <chr>
## 1 London "2008\nMayor\n2012\nMayor\n2016\nMayor\n2021\nMayor"
## 2 Northern Ireland "2016\n2017\n2022\nNext"
## 3 Scotland "2007\n2011\n2016\n2021\nNext"
## 4 Wales "2011\n2016\n2021\nLeadership approval\nNext"
tab[[6]] %>% html_table(fill = TRUE) %>% names()
## [1] "Date(s) conducted" "Remain" "Leave"
## [4] "Undecided" "Lead" "Sample"
## [7] "Conducted by" "Polling type" "Notes"
not_inches <- function(x, smallest = 50, tallest = 84) {
inches <- suppressWarnings(as.numeric(x))
ind <- is.na(inches) | inches < smallest | inches > tallest
ind
}
not_inches(85)
## [1] TRUE
library(stringr)
pattern <- "\\d|ft"
yes <- c("1", "5 ft", "9")
no <- c("12", "123", " 1", "a4", "b")
s <- c(yes, no)
str_view_all(s, pattern)
animals <- c("cat", "puppy", "Moose", "MONKEY")
pattern <- "[a-z]"
str_detect(animals, pattern)
## [1] TRUE TRUE TRUE FALSE
animals <- c("cat", "puppy", "Moose", "MONKEY")
pattern <- "[A-Z]$"
str_detect(animals, pattern)
## [1] FALSE FALSE FALSE TRUE
library(dslabs)
data("research_funding_rates")
research_funding_rates
## discipline applications_total applications_men applications_women
## 1 Chemical sciences 122 83 39
## 2 Physical sciences 174 135 39
## 3 Physics 76 67 9
## 4 Humanities 396 230 166
## 5 Technical sciences 251 189 62
## 6 Interdisciplinary 183 105 78
## 7 Earth/life sciences 282 156 126
## 8 Social sciences 834 425 409
## 9 Medical sciences 505 245 260
## awards_total awards_men awards_women success_rates_total success_rates_men
## 1 32 22 10 26.2 26.5
## 2 35 26 9 20.1 19.3
## 3 20 18 2 26.3 26.9
## 4 65 33 32 16.4 14.3
## 5 43 30 13 17.1 15.9
## 6 29 12 17 15.8 11.4
## 7 56 38 18 19.9 24.4
## 8 112 65 47 13.4 15.3
## 9 75 46 29 14.9 18.8
## success_rates_women
## 1 25.6
## 2 23.1
## 3 22.2
## 4 19.3
## 5 21.0
## 6 21.8
## 7 14.3
## 8 11.5
## 9 11.2
library(rvest)
library(tidyverse)
library(stringr)
url <- "https://en.wikipedia.org/w/index.php?title=Opinion_polling_for_the_United_Kingdom_European_Union_membership_referendum&oldid=896735054"
tab <- read_html(url) %>% html_nodes("table")
polls <- tab[[6]] %>% html_table(fill = TRUE)
names(polls) <- c("dates", "remain", "leave", "undecided", "lead", "samplesize", "pollster", "poll_type", "notes") #rename columns
library(dplyr) #load dplyr to clean data
pattern <- "^(\\d{2}).*(\\d*)%" #pattern of data in Remain column
polls_tidy <- polls %>% setNames(c("dates", "remain", "leave", "undecided", "lead", "samplesize", "pollster", "poll_type", "notes")) %>% filter(nchar(remain) == "3" | nchar(remain) == "5")
as.numeric(str_replace(polls_tidy$remain, "%", ""))/100
## [1] 0.481 0.520 0.550 0.510 0.490 0.440 0.540 0.480 0.410 0.450 0.420 0.530
## [13] 0.450 0.440 0.440 0.420 0.420 0.370 0.460 0.430 0.390 0.450 0.440 0.460
## [25] 0.400 0.480 0.530 0.420 0.440 0.450 0.430 0.430 0.480 0.410 0.430 0.400
## [37] 0.410 0.420 0.440 0.510 0.440 0.440 0.410 0.410 0.450 0.550 0.440 0.440
## [49] 0.520 0.550 0.470 0.430 0.550 0.380 0.360 0.380 0.440 0.420 0.440 0.430
## [61] 0.420 0.490 0.390 0.410 0.450 0.430 0.440 0.510 0.510 0.490 0.480 0.430
## [73] 0.530 0.380 0.400 0.390 0.350 0.450 0.420 0.400 0.390 0.440 0.510 0.390
## [85] 0.350 0.410 0.510 0.450 0.490 0.400 0.480 0.410 0.460 0.470 0.430 0.450
## [97] 0.480 0.490 0.400 0.400 0.400 0.390 0.410 0.390 0.480 0.480 0.370 0.380
## [109] 0.420 0.510 0.450 0.400 0.540 0.360 0.430 0.490 0.410 0.360 0.420 0.380
## [121] 0.550 0.440 0.540 0.410 0.520 0.420 0.380 0.420 0.440
parse_number(polls_tidy$remain)/100
## [1] 0.481 0.520 0.550 0.510 0.490 0.440 0.540 0.480 0.410 0.450 0.420 0.530
## [13] 0.450 0.440 0.440 0.420 0.420 0.370 0.460 0.430 0.390 0.450 0.440 0.460
## [25] 0.400 0.480 0.530 0.420 0.440 0.450 0.430 0.430 0.480 0.410 0.430 0.400
## [37] 0.410 0.420 0.440 0.510 0.440 0.440 0.410 0.410 0.450 0.550 0.440 0.440
## [49] 0.520 0.550 0.470 0.430 0.550 0.380 0.360 0.380 0.440 0.420 0.440 0.430
## [61] 0.420 0.490 0.390 0.410 0.450 0.430 0.440 0.510 0.510 0.490 0.480 0.430
## [73] 0.530 0.380 0.400 0.390 0.350 0.450 0.420 0.400 0.390 0.440 0.510 0.390
## [85] 0.350 0.410 0.510 0.450 0.490 0.400 0.480 0.410 0.460 0.470 0.430 0.450
## [97] 0.480 0.490 0.400 0.400 0.400 0.390 0.410 0.390 0.480 0.480 0.370 0.380
## [109] 0.420 0.510 0.450 0.400 0.540 0.360 0.430 0.490 0.410 0.360 0.420 0.380
## [121] 0.550 0.440 0.540 0.410 0.520 0.420 0.380 0.420 0.440
str_replace(polls_tidy$undecided,"N/A","0")
## [1] "0" "0" "0" "0" "1%" "9%" "0" "11%" "16%" "11%" "13%" "2%"
## [13] "13%" "9%" "12%" "9%" "13%" "16%" "11%" "3%" "15%" "5%" "7%" "9%"
## [25] "13%" "3%" "0" "11%" "13%" "0" "11%" "9%" "5%" "11%" "16%" "16%"
## [37] "13%" "15%" "9%" "3%" "12%" "18%" "13%" "16%" "10%" "3%" "14%" "12%"
## [49] "7%" "5%" "14%" "10%" "5%" "21%" "22%" "16%" "11%" "13%" "11%" "11%"
## [61] "14%" "0" "26%" "13%" "17%" "13%" "10%" "6%" "9%" "8%" "11%" "13%"
## [73] "6%" "28%" "16%" "17%" "30%" "17%" "12%" "16%" "18%" "13%" "5%" "18%"
## [85] "30%" "14%" "0" "12%" "10%" "19%" "11%" "17%" "19%" "4%" "16%" "16%"
## [97] "7%" "15%" "19%" "18%" "19%" "19%" "18%" "18%" "15%" "0" "25%" "25%"
## [109] "17%" "10%" "23%" "19%" "10%" "25%" "18%" "10%" "17%" "19%" "19%" "20%"
## [121] "9%" "14%" "10%" "18%" "0" "17%" "22%" "12%" "18%"
temp <- str_extract_all(polls_tidy$dates, "\\d+\\s[a-zA-Z]{3,5}")
end_date <- sapply(temp, function(x) x[length(x)]) # take last element (handles polls that cross month boundaries)
end_date
## [1] "23 June" "23 June" "22 June" "22 June" "22 June" "22 June" "22 June"
## [8] "22 June" "22 June" "20 June" "19 June" "19 June" "18 June" "17 June"
## [15] "17 June" "16 June" "15 June" "15 June" "15 June" "14 June" "13 June"
## [22] "13 June" "13 June" "13 June" "13 June" "12 June" "12 June" "10 June"
## [29] "10 June" "9 June" "6 June" "5 June" "5 June" "3 June" "3 June"
## [36] "3 June" "31 May" "29 May" "29 May" "29 May" "25 May" "24 May"
## [43] "24 May" "23 May" "22 May" "22 May" "19 May" "17 May" "17 May"
## [50] "16 May" "15 May" "15 May" "15 May" "12 May" "12 May" "12 May"
## [57] "8 May" "6 May" "3 May" "29 Apr" "29 Apr" "29 Apr" "28 Apr"
## [64] "26 Apr" "26 Apr" "26 Apr" "24 Apr" "24 Apr" "19 Apr" "18 Apr"
## [71] "17 Apr" "17 Apr" "17 Apr" "14 Apr" "14 Apr" "12 Apr" "11 Apr"
## [78] "10 Apr" "10 Apr" "7 Apr" "4 Apr" "3 Apr" "3 Apr" "1 Apr"
## [85] "29 Mar" "29 Mar" "28 Mar" "24 Mar" "22 Mar" "22 Mar" "20 Mar"
## [92] "20 Mar" "19 Mar" "14 Mar" "13 Mar" "11 Mar" "10 Mar" "6 Mar"
## [99] "6 Mar" "3 Mar" "2 Mar" "1 Mar" "29 Feb" "28 Feb" "28 Feb"
## [106] "25 Feb" "23 Feb" "23 Feb" "22 Feb" "22 Feb" "20 Feb" "19 Feb"
## [113] "16 Feb" "15 Feb" "14 Feb" "14 Feb" "7 Feb" "4 Feb" "31 Jan"
## [120] "28 Jan" "25 Jan" "25 Jan" "24 Jan" "24 Jan" "21 Jan" "17 Jan"
## [127] "16 Jan" "14 Jan" "10 Jan"
library(dslabs)
library(lubridate)
options(digits = 3) # 3 significant digits
dates <- c("09-01-02", "01-12-07", "02-03-04")
dmy(dates) #either way => need more info
## [1] "2002-01-09" "2007-12-01" "2004-03-02"
data(brexit_polls)
month <- as.data.frame(months(brexit_polls$startdate))
sum(with(month,months(brexit_polls$startdate) == "April"))
## [1] 25
enddate <- as.data.frame(round_date(brexit_polls$enddate,"week"))
sum(with(enddate,round_date(brexit_polls$enddate,"week") == "2016-06-12"))
## [1] 14
sum(weekdays(brexit_polls$enddate) == "Monday")
## [1] 20
sum(weekdays(brexit_polls$enddate) == "Tuesday")
## [1] 23
sum(weekdays(brexit_polls$enddate) == "Wednesday")
## [1] 12
sum(weekdays(brexit_polls$enddate) == "Thursday")
## [1] 17
sum(weekdays(brexit_polls$enddate) == "Friday")
## [1] 14
sum(weekdays(brexit_polls$enddate) == "Saturday")
## [1] 4
sum(weekdays(brexit_polls$enddate) == "Sunday")
## [1] 37
table(weekdays(brexit_polls$enddate))
##
## Friday Monday Saturday Sunday Thursday Tuesday Wednesday
## 14 20 4 37 17 23 12
data("movielens")
library(dplyr)
movielens <- movielens %>% mutate(datetime = as_datetime(movielens$timestamp)) %>%
count(year(datetime)) %>%
arrange(desc(n))
library(tidyverse)
library(gutenbergr)
library(tidytext)
library(Rcpp)
options(digits = 3)
library(dplyr)
gutenberg_metadata %>%
filter(str_detect(title, "Pride and Prejudice"))
## # A tibble: 6 x 8
## gutenberg_id title author gutenberg_autho~ language gutenberg_books~ rights
## <int> <chr> <chr> <int> <chr> <chr> <chr>
## 1 1342 Pride ~ Austen~ 68 en Best Books Ever~ Publi~
## 2 20686 Pride ~ Austen~ 68 en Harvard Classic~ Publi~
## 3 20687 Pride ~ Austen~ 68 en Harvard Classic~ Publi~
## 4 26301 Pride ~ Austen~ 68 en Best Books Ever~ Publi~
## 5 37431 Pride ~ <NA> NA en <NA> Publi~
## 6 42671 Pride ~ Austen~ 68 en Best Books Ever~ Publi~
## # ... with 1 more variable: has_text <lgl>
gutenberg_works(languages = "en") %>% filter(str_detect(title, "Pride and Prejudice"))
## # A tibble: 2 x 8
## gutenberg_id title author gutenberg_autho~ language gutenberg_books~ rights
## <int> <chr> <chr> <int> <chr> <chr> <chr>
## 1 1342 Pride ~ Austen~ 68 en Best Books Ever~ Publi~
## 2 37431 Pride ~ <NA> NA en <NA> Publi~
## # ... with 1 more variable: has_text <lgl>
words <- gutenberg_download(1342) %>% unnest_tokens(word, text) # total words
words <- gutenberg_download(1342) %>% unnest_tokens(word, text) %>% anti_join(stop_words, by = 'word')
gutenberg_download(1342) %>% unnest_tokens(word, text) %>% anti_join(stop_words, by = 'word') %>% filter(!str_detect(word, "\\d+")) %>% count(word) %>% filter(n>= 100) %>% arrange(desc(n))
## # A tibble: 24 x 2
## word n
## <chr> <int>
## 1 elizabeth 596
## 2 darcy 373
## 3 bennet 295
## 4 miss 283
## 5 jane 264
## 6 bingley 258
## 7 time 203
## 8 lady 183
## 9 sister 179
## 10 wickham 162
## # ... with 14 more rows
library(textdata)
afinn <- get_sentiments("afinn")
words <- gutenberg_download(1342) %>% unnest_tokens(word, text) %>% anti_join(stop_words, by = 'word') %>% filter(!str_detect(word, "\\d+"))
afinn_sentiments <- words %>% inner_join(afinn) %>% summarise(n = mean(value > 0), n1 = sum(value == 4)) #dataset changed, proportion of > 0 should be 0.588, while the answer is 0.563
library(tidyverse)
library(tidyr)
library(pdftools)
options(digits = 3) # report 3 significant digits
fn <- system.file("extdata", "RD-Mortality-Report_2015-18-180531.pdf", package="dslabs")
system("cmd.exe", input = paste("start", fn))
## [1] 0
txt <- pdf_text(fn)
x <- str_split(txt[9],"\n")
x; length(x) #number of entry
## [[1]]
## [1] "6/4/2018 Departamento de Salud - Registro Demográfico - División de Calidad y Estadísticas Vitales"
## [2] ""
## [3] "SEP 2015 2016 2017 2018"
## [4] " 1 75 75 94 0"
## [5] " 2 77 67 69 0 Defunciones Ocurridas en Septiembre por Día y Año"
## [6] " 3 67 78 80 0"
## [7] " 140"
## [8] " 4 71 99 84 0"
## [9] " 5 62 89 74 0"
## [10] " 120"
## [11] " 6 77 74 83 0"
## [12] " 7 85 67 87 0"
## [13] " 8 84 77 94 0 100"
## [14] " 9 79 90 72 0"
## [15] " 10 66 73 98 0 80"
## [16] " 11 92 78 92 0"
## [17] " 12 79 66 80 0 60"
## [18] " 13 81 88 100 0"
## [19] " 14 70 81 79 0 40"
## [20] " 15 87 91 84 0"
## [21] " 16 70 71 80 0 20"
## [22] " 17 70 68 88 0"
## [23] " 18 76 79 78 0 0"
## [24] " 19 81 82 75 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30"
## [25] " 20 69 79 106 0 Fuente: Registro Demográfico - División de Calidad y Estadísticas Vitales 2015 2016 2017 2018"
## [26] " 21 70 67 124 0"
## [27] " 22 68 97 110 0"
## [28] " 23 70 71 109 0"
## [29] " 24 78 79 122 0"
## [30] " 25 60 75 137 0"
## [31] " 26 76 82 132 0"
## [32] " 27 78 82 122 0"
## [33] " 28 84 81 112 0"
## [34] " 29 83 70 131 0"
## [35] " 30 73 91 132 0"
## [36] "Total 2258 2367 2928 0"
## [37] "Avg 75 79 98 0"
## [38] "Max 92 99 137 0 NOTA:"
## [39] "Min 60 66 69 0 * Año 2017 - Datos preliminares; archivo final en proceso operacional de revisión de calidad y validación."
## [40] "Med 76 78.5 93 0 ** Año 2018 - Datos preliminares; Certificados de Defunción registrados en sistema hasta 5/31/2018."
## [41] ""
## [1] 1
s <- x[[1]]
s; length(s)
## [1] "6/4/2018 Departamento de Salud - Registro Demográfico - División de Calidad y Estadísticas Vitales"
## [2] ""
## [3] "SEP 2015 2016 2017 2018"
## [4] " 1 75 75 94 0"
## [5] " 2 77 67 69 0 Defunciones Ocurridas en Septiembre por Día y Año"
## [6] " 3 67 78 80 0"
## [7] " 140"
## [8] " 4 71 99 84 0"
## [9] " 5 62 89 74 0"
## [10] " 120"
## [11] " 6 77 74 83 0"
## [12] " 7 85 67 87 0"
## [13] " 8 84 77 94 0 100"
## [14] " 9 79 90 72 0"
## [15] " 10 66 73 98 0 80"
## [16] " 11 92 78 92 0"
## [17] " 12 79 66 80 0 60"
## [18] " 13 81 88 100 0"
## [19] " 14 70 81 79 0 40"
## [20] " 15 87 91 84 0"
## [21] " 16 70 71 80 0 20"
## [22] " 17 70 68 88 0"
## [23] " 18 76 79 78 0 0"
## [24] " 19 81 82 75 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30"
## [25] " 20 69 79 106 0 Fuente: Registro Demográfico - División de Calidad y Estadísticas Vitales 2015 2016 2017 2018"
## [26] " 21 70 67 124 0"
## [27] " 22 68 97 110 0"
## [28] " 23 70 71 109 0"
## [29] " 24 78 79 122 0"
## [30] " 25 60 75 137 0"
## [31] " 26 76 82 132 0"
## [32] " 27 78 82 122 0"
## [33] " 28 84 81 112 0"
## [34] " 29 83 70 131 0"
## [35] " 30 73 91 132 0"
## [36] "Total 2258 2367 2928 0"
## [37] "Avg 75 79 98 0"
## [38] "Max 92 99 137 0 NOTA:"
## [39] "Min 60 66 69 0 * Año 2017 - Datos preliminares; archivo final en proceso operacional de revisión de calidad y validación."
## [40] "Med 76 78.5 93 0 ** Año 2018 - Datos preliminares; Certificados de Defunción registrados en sistema hasta 5/31/2018."
## [41] ""
## [1] 41
library(stringr)
s <- s %>% str_trim()
s
## [1] "6/4/2018 Departamento de Salud - Registro Demográfico - División de Calidad y Estadísticas Vitales"
## [2] ""
## [3] "SEP 2015 2016 2017 2018"
## [4] "1 75 75 94 0"
## [5] "2 77 67 69 0 Defunciones Ocurridas en Septiembre por Día y Año"
## [6] "3 67 78 80 0"
## [7] "140"
## [8] "4 71 99 84 0"
## [9] "5 62 89 74 0"
## [10] "120"
## [11] "6 77 74 83 0"
## [12] "7 85 67 87 0"
## [13] "8 84 77 94 0 100"
## [14] "9 79 90 72 0"
## [15] "10 66 73 98 0 80"
## [16] "11 92 78 92 0"
## [17] "12 79 66 80 0 60"
## [18] "13 81 88 100 0"
## [19] "14 70 81 79 0 40"
## [20] "15 87 91 84 0"
## [21] "16 70 71 80 0 20"
## [22] "17 70 68 88 0"
## [23] "18 76 79 78 0 0"
## [24] "19 81 82 75 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30"
## [25] "20 69 79 106 0 Fuente: Registro Demográfico - División de Calidad y Estadísticas Vitales 2015 2016 2017 2018"
## [26] "21 70 67 124 0"
## [27] "22 68 97 110 0"
## [28] "23 70 71 109 0"
## [29] "24 78 79 122 0"
## [30] "25 60 75 137 0"
## [31] "26 76 82 132 0"
## [32] "27 78 82 122 0"
## [33] "28 84 81 112 0"
## [34] "29 83 70 131 0"
## [35] "30 73 91 132 0"
## [36] "Total 2258 2367 2928 0"
## [37] "Avg 75 79 98 0"
## [38] "Max 92 99 137 0 NOTA:"
## [39] "Min 60 66 69 0 * Año 2017 - Datos preliminares; archivo final en proceso operacional de revisión de calidad y validación."
## [40] "Med 76 78.5 93 0 ** Año 2018 - Datos preliminares; Certificados de Defunción registrados en sistema hasta 5/31/2018."
## [41] ""
s[[1]]
## [1] "6/4/2018 Departamento de Salud - Registro Demográfico - División de Calidad y Estadísticas Vitales"
#Question 6
header_index <- str_which(s,"2015")
header_index
## [1] 3 25
#Question 7
header <- header_index [1] %>% str_split("\\s+",simplify = T)
header
## [,1]
## [1,] "3"
#Question 8
tail_index <- str_which(s, "Total")
tail_index
## [1] 36
#Question 9
n <- s %>% str_count("\\d+")
sum(n == 1); which(n == 1)
## [1] 2
## [1] 7 10
#Question 10
out <- c(1:header_index, which(n==1), tail_index:length(s))
s <- s[-out]
length(s)
## [1] 30
#Question 12
s <- str_split_fixed(s, "\\s+", n = 6) [,1:5]
s
## [,1] [,2] [,3] [,4] [,5]
## [1,] "1" "75" "75" "94" "0"
## [2,] "2" "77" "67" "69" "0"
## [3,] "3" "67" "78" "80" "0"
## [4,] "4" "71" "99" "84" "0"
## [5,] "5" "62" "89" "74" "0"
## [6,] "6" "77" "74" "83" "0"
## [7,] "7" "85" "67" "87" "0"
## [8,] "8" "84" "77" "94" "0"
## [9,] "9" "79" "90" "72" "0"
## [10,] "10" "66" "73" "98" "0"
## [11,] "11" "92" "78" "92" "0"
## [12,] "12" "79" "66" "80" "0"
## [13,] "13" "81" "88" "100" "0"
## [14,] "14" "70" "81" "79" "0"
## [15,] "15" "87" "91" "84" "0"
## [16,] "16" "70" "71" "80" "0"
## [17,] "17" "70" "68" "88" "0"
## [18,] "18" "76" "79" "78" "0"
## [19,] "19" "81" "82" "75" "0"
## [20,] "20" "69" "79" "106" "0"
## [21,] "21" "70" "67" "124" "0"
## [22,] "22" "68" "97" "110" "0"
## [23,] "23" "70" "71" "109" "0"
## [24,] "24" "78" "79" "122" "0"
## [25,] "25" "60" "75" "137" "0"
## [26,] "26" "76" "82" "132" "0"
## [27,] "27" "78" "82" "122" "0"
## [28,] "28" "84" "81" "112" "0"
## [29,] "29" "83" "70" "131" "0"
## [30,] "30" "73" "91" "132" "0"
s_df <- as.data.frame(s) %>% filter(!row_number() %in% c(1:2,7,10))
mean(as.numeric(s_df$V3[2:31])) #mean per day 2015
## [1] NA
mean(as.numeric(s_df$V5[2:20])) #sep 1-19
## [1] 0
mean(as.numeric(s_df$V5[21:31])) #sep 20-30
## [1] NA
#Question 13
# tab <- tab %>% gather(year, deaths, -day) %>%
#mutate(deaths = as.numeric(deaths))
# tab