clev=read.csv("ClevelandHeart.csv")
hist(clev$RestBP,xlab="blood pressure",ylab="number",main="Histogram of patients' resting blood pressure",breaks=20)
hist(clev$MaxHR,xlab="maximum heart rate",ylab="number",main="Histogram of patient maximum heart rates",breaks=20)
Q1 c
Total amount of patients is 303
dim(clev)
[1] 303 12
sum(clev$AHD=="Yes")
[1] 139
139/303
[1] 0.4587459
45.9% are diagnosed with heart disease
chestpain_table=table(clev$ChestPain, clev$AHD)
chestpain_table
No Yes
asymptomatic 39 105
nonanginal 68 18
nontypical 41 9
typical 16 7
prop.table(chestpain_table, margin = 2)
No Yes
asymptomatic 0.23780488 0.75539568
nonanginal 0.41463415 0.12949640
nontypical 0.25000000 0.06474820
typical 0.09756098 0.05035971
From the table, we can observe that patients with heart disease predominantly have asymptomatic chest pain whereas those who don’t have heart disease mostly have nonanginal, nontypical or asymptomatic chest pain. Typical chest pain is rare in both cases.
#Q1 d
heart_disease = subset(clev, AHD == "Yes")
hist(heart_disease$MaxHR,
main = "Histogram of Maximum Heart Rate\n(Patients with Heart Disease)",
xlab = "Maximum Heart Rate",
col = "salmon",
border = "black")
x = runif(10000)
hist(x)
vecthresh = function(x,threshold){
output = numeric(length(threshold))
for (i in seq_along(threshold)){
output[i]=mean(x <= threshold[i])
}
return(output)
}
x=runif(10000)
threshold=c(0.25, 0.5, 0.75, 1.0)
vecthresh(x,threshold)
[1] 0.2430 0.4895 0.7421 1.0000
genvec = function(m,n){
output = numeric(n)
for (i in 1:n){
output[i]=mean(runif(m))
}
graph=hist(output)
return(graph)
}
genvec(1000,1000)
$breaks
[1] 0.475 0.480 0.485 0.490 0.495 0.500 0.505 0.510 0.515 0.520
[11] 0.525 0.530
$counts
[1] 11 25 99 139 218 208 160 89 36 14 1
$density
[1] 2.2 5.0 19.8 27.8 43.6 41.6 32.0 17.8 7.2 2.8 0.2
$mids
[1] 0.4775 0.4825 0.4875 0.4925 0.4975 0.5025 0.5075 0.5125 0.5175
[10] 0.5225 0.5275
$xname
[1] "output"
$equidist
[1] TRUE
attr(,"class")
[1] "histogram"
genvec(100000,1000)
$breaks
[1] 0.4970 0.4975 0.4980 0.4985 0.4990 0.4995 0.5000 0.5005 0.5010 0.5015 0.5020 0.5025 0.5030
[14] 0.5035
$counts
[1] 6 10 43 81 166 204 200 152 84 33 16 2 3
$density
[1] 12 20 86 162 332 408 400 304 168 66 32 4 6
$mids
[1] 0.49725 0.49775 0.49825 0.49875 0.49925 0.49975 0.50025 0.50075 0.50125 0.50175 0.50225
[12] 0.50275 0.50325
$xname
[1] "output"
$equidist
[1] TRUE
attr(,"class")
[1] "histogram"
Observations, as m gets larger, the scale of the x axis gets smaller, meaning that all the numbers lie at a smaller range of values. This is because increasing the number of variables will decrease the variance.
#Q2 d
y = rexp(10000,rate=1)
hist(y)
#Q3 a
covid=read.csv("key-countries-pivoted.csv")
plot(covid$US,type="l",xlab="days since Jan 22, 2020",ylab = "cumulative
cases",main="Cumulative cases in US")
#Q3 b
new_cases=diff(covid$US)
plot(new_cases,type="l",xlab="days since Jan 22, 2020",ylab = "new cases",main="New daily cases in US")
#Q3 c
new_cases=diff(covid$China)
plot(new_cases,type="l",xlab="days since Jan 22, 2020",ylab = "new cases",main="New daily cases in China")
population <- c(
China = 1412000000,
US = 332000000,
United_Kingdom = 67000000,
Italy = 59000000,
France = 68000000,
Germany = 83000000,
Spain = 47000000,
Iran = 88000000
)
cases_apr16 <- covid[nrow(covid), names(population)]
results <- data.frame(
Cases = cases_apr16,
Fraction_of_total = cases_apr16 / sum(cases_apr16),
Cases_per_capita = cases_apr16 / population
)
results
NA
Total Cases:
sum(x)
[1] 190085610