Porject 1

Problem 1

by(iris$Sepal.Width, iris$Species, summary)

## iris$Species: setosa
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.300   3.200   3.400   3.428   3.675   4.400 
## -------------------------------------------------------- 
## iris$Species: versicolor
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.000   2.525   2.800   2.770   3.000   3.400 
## -------------------------------------------------------- 
## iris$Species: virginica
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.200   2.800   3.000   2.974   3.175   3.800

by(iris\(Sepal.Width, iris\)Species, summary)

Problem 2

cy12<-as.factor(mtcars$cyl)
boxplot(mtcars$mpg~mtcars$cyl, data=mtcars, xlab="Number of Cylinders", ylab="Miles per Gallon")

Problem 3

set.seed(987)
small<-sample(0:6)
large <- replicate(1000, runif(75, min=0, max=6))
sampleMeans <- apply(large, 2, mean)

hist(sampleMeans, main = "Sample Mean Distribution", xlab = "Sample Means")

sd(sampleMeans)

## [1] 0.1976011

mean(sampleMeans)

## [1] 3.002946

Central Theorm limit is proven because the sample mean is around the expected mean and the standard deviation is .19 which is around the expected deviation of .2

Problem 4

geese <- read.csv(file ='C:/Users/Shahid/Desktop/R/Data/geese.txt')

AG<-log(geese$Aestimate)
BG<-log(geese$Bestimate)

par(mfrow=c(1,2))
hist(AG,main = "Aestimate", xlab="Log of Aestimate")
hist(BG, main = "Bestimate", xlab="Log of Bestimate")

Problem 5

CerealSugar1979 <- read.csv("C:/Users/Shahid/Desktop/R/Data/CerealSugar1979.txt", header=FALSE)
CerealSugar2006 <- read.csv("C:/Users/Shahid/Desktop/R/Data/CerealSugar2006.txt", header=FALSE)

names(CerealSugar1979)<-c("Cereal", "SugarContent")
names(CerealSugar2006)<-c("Cereal", "SugarContent")

Sugar1979<-(CerealSugar1979[ ,2])
Sugar2006<-(CerealSugar2006[ ,2])

t.test(Sugar1979,Sugar2006,var.equal=TRUE, alternative="two.sided",mu=0,paired=FALSE, conf.level = .95)

## 
##  Two Sample t-test
## 
## data:  Sugar1979 and Sugar2006
## t = -0.40946, df = 109, p-value = 0.683
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -7.013194  4.611614
## sample estimates:
## mean of x mean of y 
##  26.76452  27.96531

P value equals .683 so we can reject the null hypthoesis. This means that the sugar level has not changed much.

Problem 6

retail <- read.delim("C:/Users/Shahid/Desktop/R/Data/retail.txt")
lm(formula= retail$Gross.Sales ~ retail$Gross.Cash + retail$Cash.Items + retail$Gross.Check, data = retail)

## 
## Call:
## lm(formula = retail$Gross.Sales ~ retail$Gross.Cash + retail$Cash.Items + 
##     retail$Gross.Check, data = retail)
## 
## Coefficients:
##        (Intercept)   retail$Gross.Cash   retail$Cash.Items  
##          -12.92979             0.08669             7.62916  
## retail$Gross.Check  
##            1.16480

Gross sales = -12.92979+.08669+7.62916+1.16480

Problem 7

kudzu <- read.csv("C:/Users/Shahid/Desktop/R/Data/kudzu.txt", sep="")
kudzu <- by(kudzu$BMD, kudzu$Treatment, sd)
kudzu

## kudzu$Treatment: Control
## [1] 0.01158735
## -------------------------------------------------------- 
## kudzu$Treatment: HighDose
## [1] 0.01877105
## -------------------------------------------------------- 
## kudzu$Treatment: LowDose
## [1] 0.01151066

kudzu <- read.csv("C:/Users/Shahid/Desktop/R/Data/kudzu.txt", sep="")
kudzu1<-lm(formula = kudzu$BMD~kudzu$Treatment, data= kudzu)
anova(kudzu1)

## Analysis of Variance Table
## 
## Response: kudzu$BMD
##                 Df    Sum Sq    Mean Sq F value   Pr(>F)   
## kudzu$Treatment  2 0.0031856 0.00159282  7.7182 0.001397 **
## Residuals       42 0.0086676 0.00020637                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

STD is larger than one meaning that a one way ANOVA can happen. P value is .001397 which reject the null hypothesis. There is signifcant evidence that treatments levels have an effect on the BMD.

Problem 8

Random <- read.csv("C:/Users/Shahid/Desktop/R/Data/Random.csv", header=FALSE)
sum(Random$V37==24)

## [1] 53

Problem 9

library(readxl)
MONDIAL <- read_excel("C:/Users/Shahid/Desktop/R/Data/exporteda.xlsx")
Data<-log(MONDIAL$Population/MONDIAL$Area)
hist(Data, xlab= "Log of Population Density", main = "Population Density")

Problem 10

City <- read_excel("C:/Users/Shahid/Desktop/R/Data/City.xlsx")
library(maps)
map(database="world")
longitude <- (City$Longitude)
latitude <- (City$Latitude)
points(City$Longitude, City$Latitude, col="blue", cex= .4)