### Exercise 1
#1-1
#1-1-a
heights<-c(71, 68, 60)
names<-c("Keni","Qiao","Chen")
#1-1-b
cbind(heights,names)
## heights names
## [1,] "71" "Keni"
## [2,] "68" "Qiao"
## [3,] "60" "Chen"
#1-2
NCbirths <-read.csv("C:/Users/kenie/OneDrive/桌面/405398682_lab1/births.csv",header=TRUE)
head(NCbirths)
#1-3
#1-3-a
find.package("maps")
## [1] "C:/Users/kenie/AppData/Local/R/win-library/4.3/maps"
#1-3-b
library(maps)
map("state")
#1-4
#1-4-1a
weights<-NCbirths$weight
#1-4-b The weights should be in ounce.
#1-4-c
weight_in_pounds<-NCbirths$weight*0.0625
#1-4-d
weight_in_pounds[1:20]
## [1] 7.7500 11.0625 6.6875 9.0000 7.3125 6.1250 9.1875 8.6250 6.5000
## [10] 7.6875 9.5625 8.0625 7.4375 6.7500 6.6250 7.8125 7.1875 8.0000
## [19] 8.2500 5.1875
###Exercise 2
#2-1
mean(NCbirths$weight*0.0625)
## [1] 7.2532
#2-2
library(mosaic)
## Registered S3 method overwritten by 'mosaic':
## method from
## fortify.SpatialPolygonsDataFrame ggplot2
##
## The 'mosaic' package masks several functions from core packages in order to add
## additional features. The original behavior of these functions should not be affected by this.
##
## Attaching package: 'mosaic'
## The following objects are masked from 'package:dplyr':
##
## count, do, tally
## The following object is masked from 'package:Matrix':
##
## mean
## The following object is masked from 'package:ggplot2':
##
## stat
## The following objects are masked from 'package:stats':
##
## binom.test, cor, cor.test, cov, fivenum, IQR, median, prop.test,
## quantile, sd, t.test, var
## The following objects are masked from 'package:base':
##
## max, mean, min, prod, range, sample, sum

tally(NCbirths$Habit,format="percent")
## X
## NonSmoker Smoker
## 90.61245 9.38755
#2-3
21-tally(NCbirths$Habit,format="percent")[2]
## Smoker
## 11.61245
###Exercise 3
#3-1
library(ggformula)
library(ggplot2)
library(ggridges)
dotPlot(weight_in_pounds)

#3-2
histogram(weight_in_pounds,breaks=3)

histogram(weight_in_pounds,breaks=20)

histogram(weight_in_pounds,breaks=100)

#The 20 bins give the best visualization because the 30 bins are too wide and the 100 bins are too narrow. Both are hard fo us to find pattern.
#3-3
boxplot(NCbirths$Fage,NCbirths$Mage)

#The fathers tend to be older.
#3-4
histogram(~weight|Habit,data=NCbirths,layout=c(1,2))

#This code creates two histogram graphs which show the density of weight layout for two habits, the smoker and nonsmoker. Both graphs are left-skewed. First, the smoker's weight tend to have more extreme values(outlier) on the left side of the graph while the nonsmoker' weight is more centered. Second, the nonsmoker's weight tend to have less variability comparing with the smoker's weight.
###Exercise 4
tally(~Habit|MomPriorCond,data=NCbirths,format="proportion")
## MomPriorCond
## Habit At Least One None
## NonSmoker 0.8917910 0.9114011
## Smoker 0.1082090 0.0885989
tally(~Premie|Habit,data=NCbirths,format="proportion")
## Habit
## Premie NonSmoker Smoker
## No 0.91191136 0.88235294
## Yes 0.08808864 0.11764706
#I think mom's smoking habit does have impact with baby's health.
#My hypothesis is correct, smoking habit does tend to influence health of the baby. According to the data, the premature rate of smoker's baby is 11.76% while the premature rate of nonsmokers' baby is 8.81% which is a lot lower.
###Exercise 5
plot(NCbirths$weight~NCbirths$Mage,col="blue",cex=0.8,pch=1,xlab="Mother's age", ylab="Baby weight(oz)",main="Baby weight vs Mother's age")

### Exercise 6
a<-read.table("http://www.stat.ucla.edu/~nchristo/statistics12/ozone.txt",header=TRUE)
AQI_colors<-c("brown","green","yellow","blue","pink")
AQI_levels<-cut(a$o3,c(0,0.06,0.075,0.104,0.115,0.374))
as.numeric(AQI_levels)
## [1] 1 3 1 3 1 3 1 1 1 1 3 2 1 1 1 3 2 3 3 1 2 2 3 1 3 1 1 1 1 1 1 1 1 4 1 3 3
## [38] 3 3 2 3 3 3 1 1 1 3 1 1 1 3 1 1 1 3 2 1 3 1 1 1 4 1 3 3 3 1 3 3 1 3 2 1 2
## [75] 3 1 1 3 3 1 2 3 2 2 2 2 1 2 1 1 3 2 2 1 1 1 2 2 1 4 1 3 1 1 3 2 1 3 1 3 3
## [112] 3 1 1 3 2 2 3 2 1 3 2 1 2 1 3 3 2 3 3 2 1 3 3 2 3 3 1 1 2 3 4 3 1 1 2 2 2
## [149] 3 2 1 2 2 1 3 1 1 1 1 3 3 4 3 1 4 1 1 1 1 1 3 1 1 1 1
plot(a$x,a$y,xlim=c(-125,-114),ylim=c(32,43),xlab="Longitude",ylab="Latitude",main="California ozen bubble plot","n")
map("county","ca",add=TRUE)
points(a$x,a$y,cex=a$o3/mean(a$o3),col=AQI_colors[as.numeric(AQI_levels)],pch=2)
