Project1

Question 1

6-number summary for sepal width in Setosa

 summary(iris[iris$Species=="setosa",]$Sepal.Width)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.300   3.200   3.400   3.428   3.675   4.400

6-number summary for sepal width in Versicolor

 summary(iris[iris$Species=="versicolor",]$Sepal.Width)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.000   2.525   2.800   2.770   3.000   3.400

6-number summary for sepal width in Virginica

summary(iris[iris$Species=="virginica",]$Sepal.Width)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.200   2.800   3.000   2.974   3.175   3.800

Question 2

mtcars$cyl2 <- as.factor(mtcars$cyl)
boxplot(mpg~cyl2,data=mtcars, xlab="Number of Cylinders", ylab="Miles Per Gallon")

Question 3

set.seed(500)
sample <- replicate(1000, rnorm(100, mean = 10, sd= 5),simplify = "array")
array1<-(apply(sample,2,mean))
mean(array1)

## [1] 9.976664

sd(array1)

## [1] 0.4931614

Our results are consistant with the Central Limit Theorem. The mean of the sample means is only .025556 less than the population mean (10), and the sample standard deviation is .0068386 less than the expected sample standard deviation (.5).

Question 4

geese <- read.csv(file="C:\\Users\\Denise\\Documents/geese.txt", header = TRUE)
geese$logAestimate <- log(geese$Aestimate)
geese$logBestimate <- log(geese$Bestimate)

attach(geese)
par(mfrow=c(1,2))
aHist <- hist(geese$logAestimate, xlab = "Logarithmic A Estimate", main = "A")
bHist<- hist(geese$logBestimate, xlab = "Logarithmic B Estimate", main = "B")

Question 5

cereal79 <- read.csv(file= "C:\\Users\\Denise\\Documents/CerealSugar1979.csv", header = FALSE)
cereal06 <- read.csv(file= "C:\\Users\\Denise\\Documents/CerealSugar2006.csv", header= FALSE)
names(cereal79)[1]<- paste("Cereal")
names(cereal79)[2]<- paste("Sugar Content")
names(cereal06)[1]<- paste("Cereal")
names(cereal06)[2]<- paste("Sugar Content")
t.test(cereal79$`Sugar Content`, cereal06$`Sugar Content`, var.equal =TRUE)

## 
##  Two Sample t-test
## 
## data:  cereal79$`Sugar Content` and cereal06$`Sugar Content`
## t = -0.40946, df = 109, p-value = 0.683
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -7.013194  4.611614
## sample estimates:
## mean of x mean of y 
##  26.76452  27.96531

The p-value is .683 which is very high. We fail to reject our null hypothesis that the sugar content in cereals has changed from 1979 to 2006.

Question 6

retail <- read.table("C:\\Users\\Denise\\Documents/retail.txt", header = TRUE)
model <- lm(Gross.Sales ~ Gross.Cash+ Cash.Items +Gross.Check, data= retail)

Linear Model:

y = Gross Sales x1 = Gross Cash x2 = Cash Items x3 = Gross Check

y = -12.92979 + 0.08669 x1 + 7.62916 x2 + 1.16480 x3

Question 7

kudzu <- read.table("C:\\Users\\Denise\\Documents/kudzu.txt", header = TRUE)
by(kudzu$BMD, kudzu$Treatment, sd)

## kudzu$Treatment: Control
## [1] 0.01158735
## -------------------------------------------------------- 
## kudzu$Treatment: HighDose
## [1] 0.01877105
## -------------------------------------------------------- 
## kudzu$Treatment: LowDose
## [1] 0.01151066

None of the standard deviations are more than 2 times greater than any of the others so the conditions for an ANOVA Test are met.

kudzuAnova<- aov(kudzu$BMD ~kudzu$Treatment)
summary(kudzuAnova)

##                 Df   Sum Sq   Mean Sq F value Pr(>F)   
## kudzu$Treatment  2 0.003186 0.0015928   7.718 0.0014 **
## Residuals       42 0.008668 0.0002064                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Our P value is very small, which means there is significant evidence that the means for the treatment groups are not the same.

Question 8

file8 <- read.csv(file ="C:\\Users\\Denise\\Documents/data.csv")
freq<- sum(file8$VAL == 24, na.rm = TRUE)
freq

## [1] 53

The frequency of 24 for VAL is 53.

Question 9

country <- read.csv("C:\\Users\\Denise\\Documents/CountryText.txt", header=FALSE)
countrylog<- log(as.numeric(country$V6)/as.numeric(country$V5))
loghist <- hist(countrylog, main = "Density of Countries", xlab= "Logarithm of population density")

Question 10

city<- read.csv("C:\\Users\\Denise\\Documents/City.txt", header=FALSE)
library(maps)
map()
city$V5<-as.numeric(as.character(city$V5))

## Warning: NAs introduced by coercion

city$V6<-as.numeric(as.character(city$V6))

## Warning: NAs introduced by coercion

points(city$V5, city$V6, col ="red")