R Bridge Course Week 2 Assignment One of the challenges in working with data is wrangling. In this assignment we will use R to perform this task. Here is a list of data sets: http://vincentarelbundock.github.io/Rdatasets/ (click on the csv index for a list) Please select one, download it and perform the following tasks:
—–selected guns.csv—–
#Two ways to read the file, one from local directory, another from github repo.
setwd("/users/carlos/downloads/")
gun_data <- read.csv("guns.csv")
head(gun_data)
library(RCurl)
## Loading required package: bitops
git_guns <- getURL("https://raw.githubusercontent.com/cfalvarez36/MSDS_R_Programming/main/Guns.csv")
guns_2 <- read.csv(text = git_guns)
summary(guns_2)
## X year violent murder
## Min. : 1 Min. :1977 Min. : 47.0 Min. : 0.200
## 1st Qu.: 294 1st Qu.:1982 1st Qu.: 283.1 1st Qu.: 3.700
## Median : 587 Median :1988 Median : 443.0 Median : 6.400
## Mean : 587 Mean :1988 Mean : 503.1 Mean : 7.665
## 3rd Qu.: 880 3rd Qu.:1994 3rd Qu.: 650.9 3rd Qu.: 9.800
## Max. :1173 Max. :1999 Max. :2921.8 Max. :80.600
##
## robbery prisoners afam cauc
## Min. : 6.4 Min. : 19.0 Min. : 0.2482 Min. :21.78
## 1st Qu.: 71.1 1st Qu.: 114.0 1st Qu.: 2.2022 1st Qu.:59.94
## Median : 124.1 Median : 187.0 Median : 4.0262 Median :65.06
## Mean : 161.8 Mean : 226.6 Mean : 5.3362 Mean :62.95
## 3rd Qu.: 192.7 3rd Qu.: 291.0 3rd Qu.: 6.8507 3rd Qu.:69.20
## Max. :1635.1 Max. :1913.0 Max. :26.9796 Max. :76.53
##
## male population income density
## Min. :12.21 Min. : 0.4027 Min. : 8555 Min. : 0.000707
## 1st Qu.:14.65 1st Qu.: 1.1877 1st Qu.:11935 1st Qu.: 0.031911
## Median :15.90 Median : 3.2713 Median :13402 Median : 0.081569
## Mean :16.08 Mean : 4.8163 Mean :13725 Mean : 0.352038
## 3rd Qu.:17.53 3rd Qu.: 5.6856 3rd Qu.:15271 3rd Qu.: 0.177718
## Max. :22.35 Max. :33.1451 Max. :23647 Max. :11.102120
##
## state law
## Alabama : 23 no :888
## Alaska : 23 yes:285
## Arizona : 23
## Arkansas : 23
## California: 23
## Colorado : 23
## (Other) :1035
summary(gun_data)
## X year violent murder
## Min. : 1 Min. :1977 Min. : 47.0 Min. : 0.200
## 1st Qu.: 294 1st Qu.:1982 1st Qu.: 283.1 1st Qu.: 3.700
## Median : 587 Median :1988 Median : 443.0 Median : 6.400
## Mean : 587 Mean :1988 Mean : 503.1 Mean : 7.665
## 3rd Qu.: 880 3rd Qu.:1994 3rd Qu.: 650.9 3rd Qu.: 9.800
## Max. :1173 Max. :1999 Max. :2921.8 Max. :80.600
##
## robbery prisoners afam cauc
## Min. : 6.4 Min. : 19.0 Min. : 0.2482 Min. :21.78
## 1st Qu.: 71.1 1st Qu.: 114.0 1st Qu.: 2.2022 1st Qu.:59.94
## Median : 124.1 Median : 187.0 Median : 4.0262 Median :65.06
## Mean : 161.8 Mean : 226.6 Mean : 5.3362 Mean :62.95
## 3rd Qu.: 192.7 3rd Qu.: 291.0 3rd Qu.: 6.8507 3rd Qu.:69.20
## Max. :1635.1 Max. :1913.0 Max. :26.9796 Max. :76.53
##
## male population income density
## Min. :12.21 Min. : 0.4027 Min. : 8555 Min. : 0.000707
## 1st Qu.:14.65 1st Qu.: 1.1877 1st Qu.:11935 1st Qu.: 0.031911
## Median :15.90 Median : 3.2713 Median :13402 Median : 0.081569
## Mean :16.08 Mean : 4.8163 Mean :13725 Mean : 0.352038
## 3rd Qu.:17.53 3rd Qu.: 5.6856 3rd Qu.:15271 3rd Qu.: 0.177718
## Max. :22.35 Max. :33.1451 Max. :23647 Max. :11.102120
##
## state law
## Alabama : 23 no :888
## Alaska : 23 yes:285
## Arizona : 23
## Arkansas : 23
## California: 23
## Colorado : 23
## (Other) :1035
ny_gun <- subset(gun_data, state == "New York")
ny_gun
ny_gun_edit <- setNames(ny_gun, c("Index","Year","Violence", "Murder", "Theft", "Prisoners", "African_American", "Caucasian", "Male", "Population", "Income", "Density", "State", "Carry_Law"))
ny_gun_edit
colnames(ny_gun)
## [1] "X" "year" "violent" "murder" "robbery"
## [6] "prisoners" "afam" "cauc" "male" "population"
## [11] "income" "density" "state" "law"
colnames(ny_gun_edit)
## [1] "Index" "Year" "Violence"
## [4] "Murder" "Theft" "Prisoners"
## [7] "African_American" "Caucasian" "Male"
## [10] "Population" "Income" "Density"
## [13] "State" "Carry_Law"
summary(ny_gun_edit)
## Index Year Violence Murder
## Min. :737.0 Min. :1977 Min. : 588.8 Min. : 5.00
## 1st Qu.:742.5 1st Qu.:1982 1st Qu.: 841.5 1st Qu.: 9.80
## Median :748.0 Median :1988 Median : 965.6 Median :11.10
## Mean :748.0 Mean :1988 Mean : 941.3 Mean :10.67
## 3rd Qu.:753.5 3rd Qu.:1994 3rd Qu.:1071.5 3rd Qu.:12.50
## Max. :759.0 Max. :1999 Max. :1180.9 Max. :14.50
##
## Theft Prisoners African_American Caucasian
## Min. :240.8 Min. : 98.0 Min. :5.272 Min. :55.93
## 1st Qu.:472.4 1st Qu.:151.5 1st Qu.:6.246 1st Qu.:56.90
## Median :514.1 Median :229.0 Median :7.330 Median :58.52
## Mean :501.8 Mean :244.7 Mean :7.142 Mean :58.95
## 3rd Qu.:588.1 3rd Qu.:347.0 3rd Qu.:8.055 3rd Qu.:61.06
## Max. :684.0 Max. :397.0 Max. :8.679 Max. :62.96
##
## Male Population Income Density
## Min. :13.20 Min. :17.57 Min. :13404 Min. :0.3676
## 1st Qu.:13.96 1st Qu.:17.72 1st Qu.:13980 1st Qu.:0.3729
## Median :15.47 Median :17.94 Median :17349 Median :0.3787
## Mean :15.27 Mean :17.91 Mean :16447 Mean :0.3780
## 3rd Qu.:16.56 3rd Qu.:18.14 3rd Qu.:17829 3rd Qu.:0.3842
## Max. :17.11 Max. :18.20 Max. :20273 Max. :0.3853
##
## State Carry_Law
## New York :23 no :23
## Alabama : 0 yes: 0
## Alaska : 0
## Arizona : 0
## Arkansas : 0
## California: 0
## (Other) : 0
gun_robbery_mean <- mean(gun_data$robbery)
gun_robbery_median <- median(gun_data$robbery)
gun_theft_mean <- mean(ny_gun_edit$Theft)
gun_theft_median <- median(ny_gun_edit$Theft)
mean_median_df = data.frame(gun_robbery_mean, gun_robbery_median, gun_theft_mean, gun_theft_median)
mean_median_df
The values changed because the scope has changed. What was oringinally focusing on the number of rows below:
nrow(gun_data[!is.na(gun_data$robbery), ])
## [1] 1173
has now changed to focus on this amount of rows:
nrow(ny_gun_edit[!is.na(ny_gun_edit$Theft), ])
## [1] 23
The mean and median will definitely change if we’re comparing the calculations between 1173 values and 23 values.
ny_gun_edit_1 <- replace(ny_gun_edit$Theft, ny_gun_edit$Theft==514.1, 0)
ny_gun_edit_2 <- replace(ny_gun_edit_1, ny_gun_edit_1==610.7, 1)
ny_gun_edit_3 <- replace(ny_gun_edit_2, ny_gun_edit_2==641.3, 2)
ny_gun_edit_3 #Shows the final product where three values have been changed.
## [1] 472.6 472.1 529.6 2.0 684.0 1.0 536.5 506.9 504.4 0.0 503.3
## [12] 544.4 579.3 624.7 622.1 596.9 561.2 476.7 399.7 340.0 309.3 270.3
## [23] 240.8
#This function will show you the final subset dataframe with changed column names and specific rows updated.
example_row <- function(){
library(RCurl)
git_guns <- getURL("https://raw.githubusercontent.com/cfalvarez36/MSDS_R_Programming/main/Guns.csv")
guns_2 <- read.csv(text = git_guns)
ny_gun <- subset(guns_2, state == "New York")
ny_gun_edit <- setNames(ny_gun, c("Index","Year","Violence", "Murder", "Theft", "Prisoners", "African_American", "Caucasian", "Male", "Population", "Income", "Density", "State", "Carry_Law"))
summary(ny_gun_edit)
ny_gun_edit_1 <- replace(ny_gun_edit$Theft, ny_gun_edit$Theft==514.1, 0)
ny_gun_edit_2 <- replace(ny_gun_edit_1, ny_gun_edit_1==610.7, 1)
ny_gun_edit_3 <- replace(ny_gun_edit_2, ny_gun_edit_2==641.3, 2)
ny_gun_edit <- replace(ny_gun_edit, 5, ny_gun_edit_3)
return(ny_gun_edit)
}
example_row()
Please submit your .rmd file and the .csv file as well as a link to your RPubs.