#loaded the tidyverse library in order to use the read_csv function
library("tidyverse")
## Warning: package 'tidyverse' was built under R version 3.4.3
## -- Attaching packages ---------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1 v purrr 0.2.4
## v tibble 1.4.1 v dplyr 0.7.4
## v tidyr 0.7.2 v stringr 1.2.0
## v readr 1.1.1 v forcats 0.2.0
## Warning: package 'tibble' was built under R version 3.4.3
## Warning: package 'tidyr' was built under R version 3.4.3
## Warning: package 'readr' was built under R version 3.4.2
## Warning: package 'purrr' was built under R version 3.4.3
## Warning: package 'dplyr' was built under R version 3.4.2
## Warning: package 'forcats' was built under R version 3.4.3
## -- Conflicts ------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
arrests <- read_csv("C:\\Users\\bkl2001\\Documents\\Personal\\CUNY\\Classes\\Winter Bridge\\USArrests.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## X1 = col_character(),
## Murder = col_double(),
## Assault = col_integer(),
## UrbanPop = col_integer(),
## Rape = col_double()
## )
glimpse(arrests)
## Observations: 50
## Variables: 5
## $ X1 <chr> "Alabama", "Alaska", "Arizona", "Arkansas", "Californ...
## $ Murder <dbl> 13.2, 10.0, 8.1, 8.8, 9.0, 7.9, 3.3, 5.9, 15.4, 17.4,...
## $ Assault <int> 236, 263, 294, 190, 276, 204, 110, 238, 335, 211, 46,...
## $ UrbanPop <int> 58, 48, 80, 50, 91, 78, 77, 72, 80, 60, 83, 54, 83, 6...
## $ Rape <dbl> 21.2, 44.5, 31.0, 19.5, 40.6, 38.7, 11.1, 15.8, 31.9,...
Based on the return from the glimpse function the missing header was automatically given a title of X1
names(arrests)[names(arrests)=="X1"]<-"States"
names(arrests)
## [1] "States" "Murder" "Assault" "UrbanPop" "Rape"
#use the summary function to gain an overview of the data set.
summary(arrests)
## States Murder Assault UrbanPop
## Length:50 Min. : 0.800 Min. : 45.0 Min. :32.00
## Class :character 1st Qu.: 4.075 1st Qu.:109.0 1st Qu.:54.50
## Mode :character Median : 7.250 Median :159.0 Median :66.00
## Mean : 7.788 Mean :170.8 Mean :65.54
## 3rd Qu.:11.250 3rd Qu.:249.0 3rd Qu.:77.75
## Max. :17.400 Max. :337.0 Max. :91.00
## Rape
## Min. : 7.30
## 1st Qu.:15.07
## Median :20.10
## Mean :21.23
## 3rd Qu.:26.18
## Max. :46.00
#display the mean and median for at least two attributes
#mean & median of the Murder variable
mean(arrests$Murder)
## [1] 7.788
median(arrests$Murder)
## [1] 7.25
#mean & median of the Assault variable
mean(arrests$Assault)
## [1] 170.76
median(arrests$Assault)
## [1] 159
#create a new data frame with a subset of the columns and rows, make sure to rename it
#create a data frame entitled Dangerous that has murder rates higher than the median
dangerous <-subset(arrests, Murder > 7.788)
head(dangerous)
## # A tibble: 6 x 5
## States Murder Assault UrbanPop Rape
## <chr> <dbl> <int> <int> <dbl>
## 1 Alabama 13.2 236 58 21.2
## 2 Alaska 10.0 263 48 44.5
## 3 Arizona 8.10 294 80 31.0
## 4 Arkansas 8.80 190 50 19.5
## 5 California 9.00 276 91 40.6
## 6 Colorado 7.90 204 78 38.7
library("plyr")
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following object is masked from 'package:purrr':
##
## compact
rename(dangerous,c("States"="DangerousStates", "Murder"="HomicideRate",
"Assault"="AssaultRate","UrbanPop"="Population",
"Rape"="RapeRate"))
## # A tibble: 23 x 5
## DangerousStates HomicideRate AssaultRate Population RapeRate
## <chr> <dbl> <int> <int> <dbl>
## 1 Alabama 13.2 236 58 21.2
## 2 Alaska 10.0 263 48 44.5
## 3 Arizona 8.10 294 80 31.0
## 4 Arkansas 8.80 190 50 19.5
## 5 California 9.00 276 91 40.6
## 6 Colorado 7.90 204 78 38.7
## 7 Florida 15.4 335 80 31.9
## 8 Georgia 17.4 211 60 25.8
## 9 Illinois 10.4 249 83 24.0
## 10 Kentucky 9.70 109 52 16.3
## # ... with 13 more rows
dangerous
## # A tibble: 23 x 5
## States Murder Assault UrbanPop Rape
## <chr> <dbl> <int> <int> <dbl>
## 1 Alabama 13.2 236 58 21.2
## 2 Alaska 10.0 263 48 44.5
## 3 Arizona 8.10 294 80 31.0
## 4 Arkansas 8.80 190 50 19.5
## 5 California 9.00 276 91 40.6
## 6 Colorado 7.90 204 78 38.7
## 7 Florida 15.4 335 80 31.9
## 8 Georgia 17.4 211 60 25.8
## 9 Illinois 10.4 249 83 24.0
## 10 Kentucky 9.70 109 52 16.3
## # ... with 13 more rows
#Use the sumamry function to create an overview of your new data frame
summary(dangerous)
## States Murder Assault UrbanPop
## Length:23 Min. : 7.90 Min. :109.0 Min. :44.00
## Class :character 1st Qu.: 9.35 1st Qu.:202.5 1st Qu.:55.00
## Mode :character Median :11.40 Median :252.0 Median :67.00
## Mean :11.75 Mean :241.7 Mean :66.65
## 3rd Qu.:13.20 3rd Qu.:277.5 3rd Qu.:80.00
## Max. :17.40 Max. :337.0 Max. :91.00
## Rape
## Min. :16.10
## 1st Qu.:21.70
## Median :26.10
## Mean :27.82
## 3rd Qu.:32.00
## Max. :46.00
#mean & median of the Murder variable
mean(dangerous$Murder)
## [1] 11.75217
median(dangerous$Murder)
## [1] 11.4
#mean & median of the Assault variable
mean(dangerous$Assault)
## [1] 241.7391
median(dangerous$Assault)
## [1] 252
murder <-c("Full List","Most Dangerous")
boxplot(arrests$Murder,dangerous$Murder,names=murder, horizontal = TRUE,main="Murder Rates of Top Dangerous States", xlab="Murder Rate", col="beige")
#for at least 3 values in a column please rename so that every value in that column is renamed.
#rename the first five states by their corresponding nicknames
dangerous$States[dangerous$States =="Alabama"]<-"Yellowhammer State"
dangerous$States[dangerous$States =="Alaska"]<-"The Last Frontier"
dangerous$States[dangerous$States =="Arizona"]<-"The Grand Canyon State"
dangerous$States[dangerous$States =="California"]<-"The Golden State"
dangerous$States[dangerous$States =="Colorado"]<-"The Centinnial State"
head(dangerous$States,n=5)
## [1] "Yellowhammer State" "The Last Frontier"
## [3] "The Grand Canyon State" "Arkansas"
## [5] "The Golden State"