# This script assumes you have the necessary data file loaded into the
# current directory.
# Problem 1
serzinc <- read.csv("~/bio-informatics/Bio-stats HW 1/serzinc.csv")
summary(serzinc)
## zinc
## Min. : 50.0
## 1st Qu.: 76.0
## Median : 86.0
## Mean : 87.9
## 3rd Qu.: 98.0
## Max. :153.0
plot(serzinc$zinc, ylab = "Zinc Levels", main = "Point Plot of Zinc Levels")
hist(serzinc$zinc, xlab = "Zinc Levels", main = "Histogram of Zinc Levels")
# Cut serzinc data into intervals, 10 points per interval and then plot
# the histogram
serzinc_interval <- cut(serzinc$zinc, 10)
plot(serzinc_interval, xlab = "Intervals", ylab = "Count in Interval", main = "Zinc Level Data in 10 Intervals")
# Get a table of the relative frequencies
sz_rf <- table(serzinc_interval)
sz_rf
## serzinc_interval
## (49.9,60.2] (60.2,70.5] (70.5,80.9] (80.9,91.2] (91.2,102] (102,112]
## 9 41 112 123 85 57
## (112,122] (122,132] (132,143] (143,153]
## 25 5 2 3
# 17 lowbwt sex 1=male, tox 1=toxemia, grmhem 1=hemorrhage
lowbwt <- read.csv("~/bio-informatics/Bio-stats HW 1/lowbwt.csv")
summary(lowbwt)
## nation lowbwt life60 life92
## Afghanistan: 1 Min. : 4 Min. :32.0 Min. :42.0
## Albania : 1 1st Qu.: 7 1st Qu.:41.0 1st Qu.:55.0
## Algeria : 1 Median :10 Median :48.0 Median :67.0
## Angola : 1 Mean :12 Mean :51.8 Mean :63.9
## Argentina : 1 3rd Qu.:15 3rd Qu.:64.0 3rd Qu.:72.0
## Armenia : 1 Max. :50 Max. :73.0 Max. :79.0
## (Other) :138 NA's :33 NA's :14
plot(lowbwt$lowbwt, ylab = "Birth Weight", main = "Point Plot of Low Birth Weight Data")
hist(lowbwt$lowbwt, xlab = "Birth Weight", main = "Histogram of Low Birth Weight Data")
boxplot(lowbwt$lowbwt, main = "Boxplot of Low Birth Weight data")
# Cut lowbwt data into intervals, 10 wide per interval then plot histogram
lowbwt_interval <- cut(lowbwt$lowbwt, 10)
plot(lowbwt_interval, xlab = "Intervals", ylab = "Count in Interval", main = "Low Birth Weight Data in 10 Intervals")
# Get a table of the relative frequencies
lowbwt_rf <- table(lowbwt_interval)
lowbwt_rf
## lowbwt_interval
## (3.95,8.56] (8.56,13.2] (13.2,17.8] (17.8,22.4] (22.4,27] (27,31.6]
## 38 33 25 10 3 0
## (31.6,36.2] (36.2,40.8] (40.8,45.4] (45.4,50]
## 1 0 0 1
# 18 nursehome, state and residents
nursehome <- read.csv("~/bio-informatics/Bio-stats HW 1/nurshome.csv")
summary(nursehome)
## state resident
## Alabama : 1 Min. :13.6
## Alaska : 1 1st Qu.:32.9
## Arizona : 1 Median :44.2
## Arkansas : 1 Mean :43.9
## California: 1 3rd Qu.:54.3
## Colorado : 1 Max. :74.9
## (Other) :45
plot(nursehome$resident, ylab = "Resident Levels", main = "Point Plot of Nursing Home Resident data")
hist(nursehome$resident, xlab = "Resident Levels", main = "Histogram of Nursing Home Resident data")
boxplot(nursehome$resident, main = "Boxplot of Nursing Home Resident data")
# Cut nursehome data into intervals, 10 wide per interval then plot
# histogram
nursehome_interval <- cut(nursehome$resident, 10)
plot(nursehome_interval, xlab = "Intervals", ylab = "Count in Interval", main = "Nursing Home Resident Data in 10 Intervals")
# Get a table of the relative frequencies
nursehome_rf <- table(nursehome_interval)
nursehome_rf
## nursehome_interval
## (13.5,19.7] (19.7,25.8] (25.8,32] (32,38.1] (38.1,44.3] (44.3,50.4]
## 2 3 7 9 5 7
## (50.4,56.5] (56.5,62.7] (62.7,68.8] (68.8,75]
## 6 7 3 2
max(nursehome$resident)
## [1] 74.9
# 19 use data set called cigarett, work with vars tar and nicotine
cigarette <- read.csv("~/bio-informatics/Bio-stats HW 1/cigarett.csv")
summary(cigarette)
## tar nicotine
## Min. : 0.7 Min. :0.090
## 1st Qu.: 9.5 1st Qu.:0.900
## Median :13.0 Median :1.100
## Mean :11.5 Mean :0.991
## 3rd Qu.:16.0 3rd Qu.:1.300
## Max. :19.0 Max. :1.400
plot(cigarette$tar, ylab = "Tar Levels", main = "Tar Levels in Cigarettes data")
plot(cigarette$nicotine, ylab = "Nicotine Levels", main = "Nicotine Levels in Cigarettes data")
hist(cigarette$tar, xlab = "Tar Levels", main = "Histogram of Tar Levels in Cigarettes")
hist(cigarette$nicotine, xlab = "Nicotine Levels", main = "Histogram of Nicotine Levels in Cigarettes")
boxplot(cigarette$tar, main = "Boxplot of Tar Levels in Cigarettes")
boxplot(cigarette$nicotine, main = "Boxplot of Nicotine Levels in Cigarettes")
# Cut cigarette data into intervals, 10 wide per interval then plot
# histogram
tar_interval <- cut(cigarette$tar, 10)
nicotine_interval <- cut(cigarette$nicotine, 10)
plot(tar_interval, xlab = "Intervals", ylab = "Count In Interval", main = "Histogram of Tar Interval Data")
plot(nicotine_interval, xlab = "Intervals", ylab = "Count In Interval", main = "Histogram of Nicotine Interval Data")
plot(cigarette$tar ~ cigarette$nicotine, xlab = "Nicotine Level", ylab = "Tar Level",
main = "Two-Way Scatter Plot of Tar ~ Nicotine Data")
# 20 User brate file with variables year and birthrt
brate <- read.csv("~/bio-informatics/Bio-stats HW 1/brate.csv")
summary(brate)
## year birthrt
## Min. :1940 Min. : 7.1
## 1st Qu.:1953 1st Qu.:16.9
## Median :1966 Median :23.4
## Mean :1966 Mean :23.3
## 3rd Qu.:1979 3rd Qu.:27.2
## Max. :1992 Max. :45.2
# Plot of birth rate data for unmarried women between ages of 15 and 44
plot(brate, xlab = "Year", ylab = "Birth Rate/1000", main = "Birth Rate by Year for Unmarried\n Women between 15 and 44 Years Old")