Assignment 5

The assignment is to do an active reading of the UCL course from 2.4.7 through 2.5.7. I assume that the earlier material in Chapter 2 is under control. In fact there is nothing to do in 2.4.7, so the real work begins in 2.4.8.

To demonstrate that you have read the meterial actively, you should copy and execute the code chunks. Please make brief comments in front of each chunk describing what the chunk does. I really do mean brief, no more than a sentence.

2.4.8

This code block plots to sets of data from 1-100 and 101-200.

#create some datasets, first a vector of 1-100 and 101-200
Data1 <- c(1:100)
Data2 <- c(101:200)
#Plot the data
plot(Data1, Data2, col="red")

This code block gebeerates two sets of numbers with specified means and standard deviations and then plots them.

# Place your code here.

Data3 <- rnorm(100, mean = 53, sd=34)
Data4 <- rnorm(100, mean = 64, sd=14)
#plot
plot(Data3, Data4, col="blue")

2.4.10

This code plots two sets of data.

# Place your code here.

df <- data.frame(Data1, Data2)
plot(df, col="green")

This code block librarys tidyverse and shows the head and tail of the data.

# Place your code here.
library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --

## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.0.6     v dplyr   1.0.4
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

#show the first 10 and then last 10 rows of data in df...
df %>%
  head()

df %>%
  tail()

2.4.11

This code block subsets df.

# Place your code here.

df[1:10, 1]

##  [1]  1  2  3  4  5  6  7  8  9 10

df[5:15,]

df[c(2,3,6),2]

## [1] 102 103 106

df[,1]

##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
##  [19]  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
##  [37]  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
##  [55]  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
##  [73]  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
##  [91]  91  92  93  94  95  96  97  98  99 100

This code block libraries dplyr and renames two columns.

# Place your code here.
library(dplyr)
df <- df %>%
  dplyr::rename(column1 = Data1, column2=Data2)

This code block shows methods of selecting data columns.

# Place your code here.

df %>% 
  dplyr::select(column1)

df$column1

##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
##  [19]  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
##  [37]  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
##  [55]  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
##  [73]  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
##  [91]  91  92  93  94  95  96  97  98  99 100

df[["column1"]]

##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
##  [19]  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
##  [37]  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
##  [55]  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
##  [73]  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
##  [91]  91  92  93  94  95  96  97  98  99 100

2.5.1

This code block reads a csv file.

# Place your code here.

LondonDataOSK<- read.csv("C:/Users/Braden/Documents/CSC495/statistical-gis-boundaries-london/ward-profiles-excel-version.csv", 
                         header = TRUE, 
                         sep = ",",  
                         encoding = "latin1")

This code block reads a csv file using the here package.

# Place your code here.

#install.packages("here")
library(here)

## here() starts at C:/Users/Braden/Documents/CSC495

here::here()

## [1] "C:/Users/Braden/Documents/CSC495"

LondonDataOSK<- read.csv(here::here("statistical-gis-boundaries-london", "ward-profiles-excel-version.csv"), 
                         header = TRUE, sep = ",",  
                         encoding = "latin1")

2.5.3

This code block reads a csv.

# Place your code here.

LondonData <- read_csv("C:/Users/Braden/Documents/CSC495/statistical-gis-boundaries-london/ward-profiles-excel-version.csv",
                       locale = locale(encoding = "latin1"),
                       na = "n/a")

## 
## -- Column specification --------------------------------------------------------
## cols(
##   .default = col_double(),
##   `Ward name` = col_character(),
##   `Old code` = col_character(),
##   `New code` = col_character()
## )
## i Use `spec()` for the full column specifications.

2.5.4

This code block checks the class of two dataframes

# Place your code here.

class(LondonData)

## [1] "spec_tbl_df" "tbl_df"      "tbl"         "data.frame"

class(LondonDataOSK)

## [1] "data.frame"

This code block using dplyr verbs to create a dataframe of all the class types in the dataframe.

# Place your code here.

Datatypelist <- LondonData %>% 
  summarise_all(class) %>%
  pivot_longer(everything(), 
               names_to="All_variables", 
               values_to="Variable_class")

Datatypelist

This code block data reads a csv file.

# Place your code here.

LondonData <- read_csv("C:/Users/Braden/Documents/CSC495/statistical-gis-boundaries-london/ward-profiles-excel-version.csv", 
                       locale = locale(encoding = "latin1"))

## 
## -- Column specification --------------------------------------------------------
## cols(
##   .default = col_double(),
##   `Ward name` = col_character(),
##   `Old code` = col_character(),
##   `New code` = col_character(),
##   `% children in reception year who are obese - 2011/12 to 2013/14` = col_character(),
##   `% children in year 6 who are obese- 2011/12 to 2013/14` = col_character(),
##   `Rate of All Ambulance Incidents per 1,000 population - 2014` = col_character(),
##   `Rates of ambulance call outs for alcohol related illness - 2014` = col_character(),
##   `Number of jobs in area - 2013` = col_character(),
##   `Employment per head of resident WA population - 2013` = col_character(),
##   `(ID2010) - Rank of average score (within London) - 2010` = col_character(),
##   `(ID2010) % of LSOAs in worst 50% nationally - 2010` = col_character(),
##   `Deliberate Fires per 1,000 population - 2014` = col_character(),
##   `% area that is open space - 2014` = col_character(),
##   `Average Public Transport Accessibility score - 2014` = col_character(),
##   `Turnout at Mayoral election - 2012` = col_character()
## )
## i Use `spec()` for the full column specifications.

This code block opens the edit window.

# Place your code here.

#LondonData <- edit(LondonData)

This code block runs a summary of df.

# Place your code here.

summary(df)

##     column1          column2     
##  Min.   :  1.00   Min.   :101.0  
##  1st Qu.: 25.75   1st Qu.:125.8  
##  Median : 50.50   Median :150.5  
##  Mean   : 50.50   Mean   :150.5  
##  3rd Qu.: 75.25   3rd Qu.:175.2  
##  Max.   :100.00   Max.   :200.0

This code block prints the head of column names.

# Place your code here.

LondonData%>%
  colnames()%>%
  # just look at the head, top5
  head()

## [1] "Ward name"                  "Old code"                  
## [3] "New code"                   "Population - 2015"         
## [5] "Children aged 0-15 - 2015"  "Working-age (16-64) - 2015"

2.5.5

This code block selects a range of data.

# Place your code here.

LondonBoroughs<-LondonData[626:658,]

This code block selects a slice of data.

# Place your code here.

LondonBoroughs<-LondonData%>%
  slice(626:658)

Femalelifeexp<- LondonData %>%
  filter(`Female life expectancy -2009-13`>90)

LondonBoroughs<- LondonData %>%
  filter(str_detect(`New code`, "^E09"))

LondonBoroughs$`Ward name`

##  [1] "City of London"         "City of London"         "Barking and Dagenham"  
##  [4] "Barnet"                 "Bexley"                 "Brent"                 
##  [7] "Bromley"                "Camden"                 "Croydon"               
## [10] "Ealing"                 "Enfield"                "Greenwich"             
## [13] "Hackney"                "Hammersmith and Fulham" "Haringey"              
## [16] "Harrow"                 "Havering"               "Hillingdon"            
## [19] "Hounslow"               "Islington"              "Kensington and Chelsea"
## [22] "Kingston upon Thames"   "Lambeth"                "Lewisham"              
## [25] "Merton"                 "Newham"                 "Redbridge"             
## [28] "Richmond upon Thames"   "Southwark"              "Sutton"                
## [31] "Tower Hamlets"          "Waltham Forest"         "Wandsworth"            
## [34] "Westminster"

LondonBoroughs %>%
  dplyr::select(`Ward name`) %>%
  print()

## # A tibble: 34 x 1
##    `Ward name`         
##    <chr>               
##  1 City of London      
##  2 City of London      
##  3 Barking and Dagenham
##  4 Barnet              
##  5 Bexley              
##  6 Brent               
##  7 Bromley             
##  8 Camden              
##  9 Croydon             
## 10 Ealing              
## # ... with 24 more rows

This code block only selects distinct rows in the dataframe.

# Place your code here.

LondonBoroughs<-LondonBoroughs %>%
  distinct()

2.5.5.2

This code block shows three ways to select columns from the dataframe.

# Place your code here.

LondonBoroughs_manualcols<-LondonBoroughs[,c(1,19,20,21)]

LondonBoroughs_dplyrcols<-LondonBoroughs %>%
  dplyr::select(c(1,19,20,21))

LondonBoroughs_contains<-LondonBoroughs %>% 
  dplyr::select(contains("expectancy"), 
         contains("obese - 2011/12 to 2013/14"),
         contains("Ward name"))

2.5.5.3

This code block uses the clean name function on the dataframe.

# Place your code here.

#install.packages("janitor")
library(janitor)

## 
## Attaching package: 'janitor'

## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test

LondonBoroughs <- LondonBoroughs %>%
  dplyr::rename(Borough=`Ward name`)%>%
  clean_names()

LondonBoroughs <- LondonBoroughs %>%
   #here the ., means all data
   clean_names(., case="big_camel")

2.5.5.4

This code block uses dplyr functions to create new variables and select them to a dataframe and slices the dataframe.

# Place your code here.


Life_expectancy <- LondonBoroughs %>%
  #new column with average of male and female life expectancy
  mutate(averagelifeexpectancy= (FemaleLifeExpectancy2009_13 +
                                       MaleLifeExpectancy2009_13)/2)%>%
  #new column with normalised life expectancy
  mutate(normalisedlifeepectancy= averagelifeexpectancy /
           mean(averagelifeexpectancy))%>%
  #select only columns we want
  dplyr::select(NewCode,
         Borough,
         averagelifeexpectancy,
         normalisedlifeepectancy)%>%
  #arrange in descending order
  #ascending is the default and would be
  #arrange(normalisedlifeepectancy)
  arrange(desc(normalisedlifeepectancy))

slice_head(Life_expectancy, n=5)

slice_tail(Life_expectancy,n=5)

2.5.5.5

This code block creates a variable with the casewhen function.

# Place your code here.



Life_expectancy2 <- Life_expectancy %>%
  mutate(UKcompare = case_when(averagelifeexpectancy>81.16 ~ "above UK average",
                               TRUE ~ "below UK average"))
Life_expectancy2

This code block creates a variable, groups by it, and then summarizes using the grouping variable.

# Place your code here.


Life_expectancy2_group <- Life_expectancy2 %>%
  mutate(UKdiff = averagelifeexpectancy-81.16) %>%
  group_by(UKcompare)%>%
  summarise(range=max(UKdiff)-min(UKdiff), count=n(), Average=mean(UKdiff))

Life_expectancy2_group

This code block creates new variables, groups by one variable, and then summarizes using the grouping variable.

# Place your code here.

Life_expectancy3 <- Life_expectancy %>%
  mutate(UKdiff = averagelifeexpectancy-81.16)%>%
  mutate(across(where(is.numeric), round, 3))%>%
  mutate(across(UKdiff, round, 0))%>%
  mutate(UKcompare = case_when(averagelifeexpectancy >= 81 ~
                                 str_c("equal or above UK average by",
                                       UKdiff,
                                       "years",
                                       sep=" "),
                               TRUE ~ str_c("below UK average by",
                                            UKdiff,
                                            "years",
                                            sep=" ")))%>%
  group_by(UKcompare)%>%
  summarise(count=n())
 
Life_expectancy3

This code block uses mutate to create multiple variables.

# Place your code here.


Life_expectancy4 <- Life_expectancy %>%
  mutate(UKdiff = averagelifeexpectancy-81.16)%>%
  mutate(across(is.numeric, round, 3))%>%
  mutate(across(UKdiff, round, 0))

## Warning: Predicate functions must be wrapped in `where()`.
## 
##   # Bad
##   data %>% select(is.numeric)
## 
##   # Good
##   data %>% select(where(is.numeric))
## 
## i Please update your code.
## This message is displayed once per session.

2.5.6

This code block plots two variables.

# Place your code here.

 plot(LondonBoroughs$MaleLifeExpectancy2009_13,
      LondonBoroughs$PercentChildrenInReceptionYearWhoAreObese2011_12To2013_14)

2.5.7

This code block uses plotly to create an interactive graph from the dataframe.

# Place your code here.

#install.packages("plotly")
library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

plot_ly(LondonBoroughs, 
        #data for x axis
        x = ~MaleLifeExpectancy2009_13,
        #data for y axis
        y = ~PercentChildrenInReceptionYearWhoAreObese2011_12To2013_14,
        #attribute to display when hovering
        text = ~Borough,
        type = "scatter",
        mode = "markers")

2.5.8

This code block installs and libraries packages.

# Place your code here.

# install.packages("maptools")
# install.packages(c("classInt", "tmap"))
# 
# # might also need these ones
# install.packages(c("RColorBrewer", "sp", "rgeos", 
#                    "tmaptools", "sf", "downloader", "rgdal", 
#                    "geojsonio"))

library(maptools)

## Loading required package: sp

## Checking rgeos availability: TRUE

library(RColorBrewer)
library(classInt)
library(sp)
library(rgeos)

## rgeos version: 0.5-5, (SVN revision 640)
##  GEOS runtime version: 3.8.0-CAPI-1.13.1 
##  Linking to sp version: 1.4-5 
##  Polygon checking: TRUE

library(tmap)
library(tmaptools)
library(sf)

## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1

library(rgdal)

## rgdal: version: 1.5-23, (SVN revision 1121)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.2.1, released 2020/12/29
## Path to GDAL shared files: C:/Users/Braden/Documents/R/win-library/4.0/rgdal/gdal
## GDAL binary built with GEOS: TRUE 
## Loaded PROJ runtime: Rel. 7.2.1, January 1st, 2021, [PJ_VERSION: 721]
## Path to PROJ shared files: C:/Users/Braden/Documents/R/win-library/4.0/rgdal/proj
## PROJ CDN enabled: FALSE
## Linking to sp version:1.4-5
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading rgdal.
## Overwritten PROJ_LIB was C:/Users/Braden/Documents/R/win-library/4.0/rgdal/proj

library(geojsonio)

## Registered S3 method overwritten by 'geojsonsf':
##   method        from   
##   print.geojson geojson

## 
## Attaching package: 'geojsonio'

## The following object is masked from 'package:base':
## 
##     pretty

2.5.8.2

This files reads a dataset from arcgis.

# Place your code here.

#EW <- st_read("https://opendata.arcgis.com/datasets/8edafbe3276d4b56aec60991cb#ddda50_2.geojson")

load(file = "~/CSC495/gv.RData")

This code block filters a dataset and maps the object.

# Place your code here.

LondonMap<- EW %>%
  filter(str_detect(lad15cd, "^E09"))

#plot it using the qtm function
qtm(LondonMap)

2.5.8.3

This code block cleans the names of two datasets and then merges them and keeps only distinct rows.

# Place your code here.

LondonData <- clean_names(LondonData)

#EW is the data we read in straight from the web
BoroughDataMap <- EW %>%
  clean_names()%>%
  # the . here just means use the data already loaded
  filter(str_detect(lad15cd, "^E09"))%>%
  merge(.,
        LondonData, 
        by.x="lad15cd", 
        by.y="new_code",
        no.dups = TRUE)%>%
  distinct(.,lad15cd, 
           .keep_all = TRUE)

This code block cleans the name of a dataframe, filters using string detect, and joins the data with another dataframe using a key.

# Place your code here.

BoroughDataMap2 <- EW %>% 
  clean_names() %>%
  filter(str_detect(lad15cd, "^E09"))%>%
  left_join(., 
            LondonData,
            by = c("lad15cd" = "new_code"))

2.5.9

This code block librarys tmpa and plots a map.

# Place your code here.

library(tmap)
library(tmaptools)
tmap_mode("plot")

## tmap mode set to plotting

qtm(BoroughDataMap, 
    fill = "rate_of_job_seekers_allowance_jsa_claimants_2015")

This code block using the open street map to add a new layer on the map.

# Place your code here.

tmaplondon <- BoroughDataMap %>%
  st_bbox(.) %>% 
  tmaptools::read_osm(., type = "osm", zoom = NULL)

This code block creates a map of the london using tmap functions.

# Place your code here.

tmap_mode("plot")

## tmap mode set to plotting

tm_shape(tmaplondon)+
tm_rgb()+
tm_shape(BoroughDataMap) + 
tm_polygons("rate_of_job_seekers_allowance_jsa_claimants_2015", 
        style="jenks",
        palette="YlOrBr",
        midpoint=NA,
        title="Rate per 1,000 people",
        alpha = 0.5) + 
  tm_compass(position = c("left", "bottom"),type = "arrow") + 
  tm_scale_bar(position = c("left", "bottom")) +
  tm_layout(title = "Job seekers' Allowance Claimants", legend.position = c("right", "bottom"))

This code block merges the EW dataframe with life expectancy.

# Place your code here.

Life_expectancy4map <- EW %>%
  merge(.,
        Life_expectancy4, 
        by.x="lad15cd", 
        by.y="NewCode",
        no.dups = TRUE)%>%
  distinct(.,lad15cd, 
           .keep_all = TRUE)

This code block creates a tmap using the newly merged dataframe for expectancy.

# Place your code here.


tmap_mode("plot")

## tmap mode set to plotting

tm_shape(tmaplondon)+
tm_rgb()+
tm_shape(Life_expectancy4map) + 
tm_polygons("UKdiff", 
        style="pretty",
        palette="Blues",
        midpoint=NA,
        title="Number of years",
        alpha = 0.5) + 
  tm_compass(position = c("left", "bottom"),type = "arrow") + 
  tm_scale_bar(position = c("left", "bottom")) +
  tm_layout(title = "Difference in life expectancy", legend.position = c("right", "bottom"))

2.6

This code block reads in the flytipping csv.

# Place your code here.

flytipping <- read_csv("https://data.london.gov.uk/download/fly-tipping-incidents/536278ff-a391-4f20-bc79-9e705c9b3ec0/fly-tipping-borough.csv")

## 
## -- Column specification --------------------------------------------------------
## cols(
##   code = col_character(),
##   area = col_character(),
##   year = col_character(),
##   total_incidents = col_character(),
##   total_action_taken = col_character(),
##   warning_letters = col_character(),
##   fixed_penalty_notices = col_character(),
##   statutory_notices = col_character(),
##   formal_cautions = col_character(),
##   injunctions = col_character(),
##   prosecutions = col_character()
## )

This code block reads in the flytipping csv and sets it coloum types.

# Place your code here.

flytipping1 <- read_csv("https://data.london.gov.uk/download/fly-tipping-incidents/536278ff-a391-4f20-bc79-9e705c9b3ec0/fly-tipping-borough.csv", 
                       col_types = cols(
                         code = col_character(),
                         area = col_character(),
                         year = col_character(),
                         total_incidents = col_number(),
                         total_action_taken = col_number(),
                         warning_letters = col_number(),
                         fixed_penalty_notices = col_number(),
                         statutory_notices = col_number(),
                         formal_cautions = col_number(),
                         injunctions = col_number(),
                         prosecutions = col_number()
                       ))

## Warning: 15 parsing failures.
## row                   col expected actual                                                                                                                     file
## 116 warning_letters       a number      - 'https://data.london.gov.uk/download/fly-tipping-incidents/536278ff-a391-4f20-bc79-9e705c9b3ec0/fly-tipping-borough.csv'
## 116 fixed_penalty_notices a number      - 'https://data.london.gov.uk/download/fly-tipping-incidents/536278ff-a391-4f20-bc79-9e705c9b3ec0/fly-tipping-borough.csv'
## 116 statutory_notices     a number      - 'https://data.london.gov.uk/download/fly-tipping-incidents/536278ff-a391-4f20-bc79-9e705c9b3ec0/fly-tipping-borough.csv'
## 116 formal_cautions       a number      - 'https://data.london.gov.uk/download/fly-tipping-incidents/536278ff-a391-4f20-bc79-9e705c9b3ec0/fly-tipping-borough.csv'
## 116 injunctions           a number      - 'https://data.london.gov.uk/download/fly-tipping-incidents/536278ff-a391-4f20-bc79-9e705c9b3ec0/fly-tipping-borough.csv'
## ... ..................... ........ ...... ........................................................................................................................
## See problems(...) for more details.

# view the data
head(flytipping1)

This code block using pivot longer on flytipping to change the format of the dataframe and shows an alternative method.

# Place your code here.

flytipping_long <- flytipping1 %>% 
  pivot_longer(
  cols = 4:11,
  names_to = "tipping_type",
  values_to = "count"
)

flytipping2 <- flytipping1[,1:4]

# view the data
head(flytipping_long)

This code block using pivot wider on flytipping to change the format of the dataframe.

# Place your code here.

flytipping_wide <- flytipping_long %>% 
  pivot_wider(
  id_cols = 1:2,
  names_from = c(year,tipping_type),
  names_sep = "_",
  values_from = count
)

head(flytipping_wide)