#importing library function to filter east coast data
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#read file from the directory in this case desktop storing in x variable
x <- read.csv('USArrests_Coasts.csv',header = TRUE)
#segregating only east coast data
y <- filter(x,East.Coast == 'Yes')
# see the structure of the filtered dataset
str(y)
## 'data.frame': 15 obs. of 8 variables:
## $ City : Factor w/ 50 levels "Alabama","Alaska",..: 7 8 9 10 19 20 21 29 30 32 ...
## $ Murder : num 3.3 5.9 15.4 17.4 2.1 11.3 4.4 2.1 7.4 11.1 ...
## $ Assault : int 110 238 335 211 83 300 149 57 159 254 ...
## $ UrbanPop : int 77 72 80 60 51 67 85 56 89 86 ...
## $ Rape : num 11.1 15.8 31.9 25.8 7.8 27.8 16.3 9.5 18.8 26.1 ...
## $ East.Coast: Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ West.Coast: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ Any.Coast : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
#There are 15 variables and 8 varaibles
#summary of the filtered dataset, mean, median and na values
summary(y)
## City Murder Assault UrbanPop
## Connecticut:1 Min. : 2.10 Min. : 48.0 Min. :32.00
## Delaware :1 1st Qu.: 3.35 1st Qu.:108.0 1st Qu.:58.00
## Florida :1 Median : 6.30 Median :159.0 Median :72.00
## Georgia :1 Mean : 7.68 Mean :177.3 Mean :68.33
## Maine :1 3rd Qu.:11.20 3rd Qu.:246.0 3rd Qu.:82.50
## Maryland :1 Max. :17.40 Max. :335.0 Max. :89.00
## (Other) :9
## Rape East.Coast West.Coast Any.Coast
## Min. : 7.80 No : 0 No :15 No : 0
## 1st Qu.:11.15 Yes:15 Yes: 0 Yes:15
## Median :16.30
## Mean :17.90
## 3rd Qu.:24.15
## Max. :31.90
##
#ploting murder in east coast using actual dataset
plot(y = x$Murder, x = x$East.Coast, xlab="East Coast", ylab="Murder",
main = "Murder rate in east coast, US")

#ploting Assualt in east coast using actual dataset
#Median of yes is 152 where as median of no is 152
#Yes is skewed both side but more rightly skewed. No is skewed right and left but skewed more on right
plot(y = x$Assault, x = x$East.Coast, xlab="East Coast", ylab="Assualt",
main = "Assualt rate in east coast, US")

#ploting Rape in east coast using actual dataset
#Median of yes is 17 where as median of no is 21
#Yes is more rightly skewed. No is skewed right and left but skewed more on right and has one outlier
plot(y = x$Rape, x = x$East.Coast, xlab="East Coast", ylab="Rape",
main = "Rape rate in east coast, US")

#ttst on filtered dataset(y) of murder in east coast, first mean then ttest
# t-value of 0 indicates that the sample results exactly equal the null hypothesis, p-value is 0.5 equal #to confidence level
MMurder = mean(y$Murder)
t.test(y$Murder, mu=MMurder, alternative = "greater")
##
## One Sample t-test
##
## data: y$Murder
## t = 0, df = 14, p-value = 0.5
## alternative hypothesis: true mean is greater than 7.68
## 95 percent confidence interval:
## 5.340755 Inf
## sample estimates:
## mean of x
## 7.68
#ttst on filtered dataset(y) of assualt in east coast, first mean then ttest
# t-value of 0 indicates that the sample results exactly equal the null hypothesis, p-value is 0.5 equal #to confidence level
MAssault = mean(y$Assault)
t.test(y$Assault, mu=MAssault, alternative = "greater")
##
## One Sample t-test
##
## data: y$Assault
## t = 0, df = 14, p-value = 0.5
## alternative hypothesis: true mean is greater than 177.2667
## 95 percent confidence interval:
## 136.5425 Inf
## sample estimates:
## mean of x
## 177.2667
#ttst on filtered dataset(y) of rape in east coast, first mean then ttest
# t-value of 0 indicates that the sample results exactly equal the null hypothesis, p-value is 0.5 equal #to confidence level
MRape = mean(y$Rape)
t.test(y$Rape, mu=MRape, alternative = "greater")
##
## One Sample t-test
##
## data: y$Rape
## t = 0, df = 14, p-value = 0.5
## alternative hypothesis: true mean is greater than 17.9
## 95 percent confidence interval:
## 14.40834 Inf
## sample estimates:
## mean of x
## 17.9