Homework2.utf8.md

#importing library function to filter east coast data
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

#read file from the directory in this case desktop storing in x variable
x <- read.csv('USArrests_Coasts.csv',header = TRUE)
#segregating only east coast data
y <- filter(x,East.Coast == 'Yes')

# see the structure of the filtered dataset
str(y)

## 'data.frame':    15 obs. of  8 variables:
##  $ City      : Factor w/ 50 levels "Alabama","Alaska",..: 7 8 9 10 19 20 21 29 30 32 ...
##  $ Murder    : num  3.3 5.9 15.4 17.4 2.1 11.3 4.4 2.1 7.4 11.1 ...
##  $ Assault   : int  110 238 335 211 83 300 149 57 159 254 ...
##  $ UrbanPop  : int  77 72 80 60 51 67 85 56 89 86 ...
##  $ Rape      : num  11.1 15.8 31.9 25.8 7.8 27.8 16.3 9.5 18.8 26.1 ...
##  $ East.Coast: Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
##  $ West.Coast: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Any.Coast : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...

#There are 15 variables and 8 varaibles 
#summary of the filtered dataset, mean, median and na values
summary(y)

##           City       Murder         Assault         UrbanPop    
##  Connecticut:1   Min.   : 2.10   Min.   : 48.0   Min.   :32.00  
##  Delaware   :1   1st Qu.: 3.35   1st Qu.:108.0   1st Qu.:58.00  
##  Florida    :1   Median : 6.30   Median :159.0   Median :72.00  
##  Georgia    :1   Mean   : 7.68   Mean   :177.3   Mean   :68.33  
##  Maine      :1   3rd Qu.:11.20   3rd Qu.:246.0   3rd Qu.:82.50  
##  Maryland   :1   Max.   :17.40   Max.   :335.0   Max.   :89.00  
##  (Other)    :9                                                  
##       Rape       East.Coast West.Coast Any.Coast
##  Min.   : 7.80   No : 0     No :15     No : 0   
##  1st Qu.:11.15   Yes:15     Yes: 0     Yes:15   
##  Median :16.30                                  
##  Mean   :17.90                                  
##  3rd Qu.:24.15                                  
##  Max.   :31.90                                  
##

#ploting murder in east coast using actual dataset
plot(y = x$Murder, x = x$East.Coast, xlab="East Coast", ylab="Murder",
     main = "Murder rate in east coast, US")

#ploting Assualt in east coast using actual dataset
#Median of yes is 152 where as median of no is 152 
#Yes is skewed both side but more rightly skewed. No is skewed right and left but skewed more on right
plot(y = x$Assault, x = x$East.Coast, xlab="East Coast", ylab="Assualt",
     main = "Assualt rate in east coast, US")

#ploting Rape in east coast using actual dataset
#Median of yes is 17 where as median of no is 21
#Yes is more rightly skewed. No is skewed right and left but skewed more on right and has one outlier
plot(y = x$Rape, x = x$East.Coast, xlab="East Coast", ylab="Rape",
     main = "Rape rate in east coast, US")

#ttst on filtered dataset(y) of murder in east coast, first mean then ttest
# t-value of 0 indicates that the sample results exactly equal the null hypothesis, p-value is 0.5 equal #to confidence level
MMurder = mean(y$Murder)
t.test(y$Murder, mu=MMurder, alternative = "greater")

## 
##  One Sample t-test
## 
## data:  y$Murder
## t = 0, df = 14, p-value = 0.5
## alternative hypothesis: true mean is greater than 7.68
## 95 percent confidence interval:
##  5.340755      Inf
## sample estimates:
## mean of x 
##      7.68

#ttst on filtered dataset(y) of assualt in east coast, first mean then ttest
# t-value of 0 indicates that the sample results exactly equal the null hypothesis, p-value is 0.5 equal #to confidence level
MAssault = mean(y$Assault)
t.test(y$Assault, mu=MAssault, alternative = "greater")

## 
##  One Sample t-test
## 
## data:  y$Assault
## t = 0, df = 14, p-value = 0.5
## alternative hypothesis: true mean is greater than 177.2667
## 95 percent confidence interval:
##  136.5425      Inf
## sample estimates:
## mean of x 
##  177.2667

#ttst on filtered dataset(y) of rape in east coast, first mean then ttest
# t-value of 0 indicates that the sample results exactly equal the null hypothesis, p-value is 0.5 equal #to confidence level
MRape = mean(y$Rape)
t.test(y$Rape, mu=MRape, alternative = "greater")

## 
##  One Sample t-test
## 
## data:  y$Rape
## t = 0, df = 14, p-value = 0.5
## alternative hypothesis: true mean is greater than 17.9
## 95 percent confidence interval:
##  14.40834      Inf
## sample estimates:
## mean of x 
##      17.9

Homework2.R

arnabchakraboty

2020-02-06