getwd()
## [1] "C:/Users/m0965094/OneDrive/Desktop/1 - STU - DATA ANALYTICS/7 - DMML/CA#10"
# make sure the packages for this chapter
# are installed, install if necessary
#pkg <- c("ggplot2", "scales", "maptools",
# "sp", "maps", "grid", "car" )
#new.pkg <- pkg[!(pkg %in% installed.packages())]
#if (length(new.pkg)) {
#install.packages(new.pkg)
#}
# read the CSV with headers
za <- read.csv("C:/Users/m0965094/OneDrive/Desktop/1 - STU - DATA ANALYTICS/7 - DMML/CA#10/zeroaccess.csv", header=T,sep ="," )
#View(za)
# Load ggplot2 to create graphics
#library(ggplot2)
# create a ggplot instance with zeroaccess data
#gg <- ggplot(data=za, aes(x=long, y=lat))
# add the points, set transparency to 1/40th
#gg <- gg + geom_point(size=1, color="#000099", alpha=1/40)
# add axes labels
#gg <- gg + xlab("Longitude") + ylab("Latitude")
# simplify the theme for aesthetics
#gg <- gg + theme_bw()
# this may take a while, over 800,000 points plotted
#print(gg)
#install.packages("mapproj")
#library(mapproj)
# requires package : ggplot2
# requires object: za (5-1)
# the "maps" and "mapproj" packages are used by ggplot
# load map data of the world
#world <- map_data("world")
#Remove Antarctica
#world <- subset(world, world$region!="Antarctica")
# load world data into ggplot object
#gg <- ggplot(data=world, aes(x=long, y=lat))
# trace along the lat/long coords by group (countries)
#gg <- gg + geom_path(aes(group=group), colour="gray70")
# now project using the mercator projection
# try different projections with ?mapproject
#gg <- gg + coord_map("mercator", xlim=c(-200, 200))
# load up the ZeroAccess points, overiding the default data set
#gg <- gg + geom_point(data=za, aes(long, lat),
# colour="#000099", alpha=1/40, size=1)
# remove text, axes ticks, grid lines and do gray border on white
#gg <- gg + theme(text=element_blank(),
# axis.ticks=element_blank(),
# panel.grid=element_blank(),
# panel.background=element_rect(color="gray50",
# fill="white"))
#print(gg)
county.data <- read.csv("C:/Users/m0965094/OneDrive/Desktop/1 - STU - DATA ANALYTICS/7 - DMML/CA#10/countydataset.csv", header=T,sep = ",")
#View(county.data)
#set.seed(1)
# generate 200 random numbers around 10
#input <- rnorm(200, mean=10)
#summary(input)
# requires objects: input (5-16)
# generate output around a mean of 2 x input
#output <- rnorm(200, mean=input*2)
# put into data frame to plot it
#our.data <- data.frame(input, output)
#gg <- ggplot(our.data, aes(input, output))
#gg <- gg + geom_point()
#gg <- gg + geom_smooth(method = "lm", se=F, color="red")
#gg <- gg + theme_bw()
#print(gg)
#Make comments about both the syntax and the output of the task executed above. Is it possible to customize this graph to make it more explanatory?
#QUESTION: Is the input significant at a 5% significance level? 1%?
#model <- lm(output ~ input)
#summary(model)
#confint(model)
#summary(lm(county.data$Infections ~ county.data$ufo2010, data= county.data))
#View(county.data)
#summary(lm(Infections ~ pop + income + ipaddr + ufo2010,
# data=county.data))
#install.packages("carData")
#library(car) # for the vif() function
#model <- lm(Infections ~ pop + income + ipaddr + ufo2010,
# data=county.data)
#sqrt(vif(model))
#summary(lm(Infections ~ pop, data=county.data))
#QUESTION: PREDICT FOR 1 000 000 AND 2 000 000
#RECREATE THIS SOLUTION BUILDING MODEL THAT DEPENDS ON INCOME
# PREDICT NUMBER OF INFECTIONS BASED ON VARIABLE INCOME: 10 000, 20 000, 40 000, 90 000
pop.lm <- lm(Infections ~ pop, data=county.data)
predict(pop.lm, data.frame(pop=6000000), interval="confidence")
## fit lwr upr
## 1 378.7109 295.9209 461.5009
#Answer: Number infections based of Pupulation = 1 000 000.
pop.lm <- lm(Infections ~ pop, data=county.data)
predict(pop.lm, data.frame(pop=1000000), interval="confidence")
## fit lwr upr
## 1 167.3069 153.9224 180.6915
#Answer: Number infections based of Pupulation = 2 000 000.
pop.lm <- lm(Infections ~ pop, data=county.data)
predict(pop.lm, data.frame(pop=2000000), interval="confidence")
## fit lwr upr
## 1 209.5877 182.5947 236.5807
#Answer: predict number of infections. Income = 10 000$
pop.lm <- lm(Infections ~ income, data=county.data)
predict(pop.lm, data.frame(income=10000), interval="confidence")
## fit lwr upr
## 1 100.2013 85.70806 114.6945
#Answer: predict number of infections. Income = 20 000$
pop.lm <- lm(Infections ~ income, data=county.data)
predict(pop.lm, data.frame(income=20000), interval="confidence")
## fit lwr upr
## 1 108.4966 97.66147 119.3318
#Answer: predict number of infections. Income = 40 000$
pop.lm <- lm(Infections ~ income, data=county.data)
predict(pop.lm, data.frame(income=40000), interval="confidence")
## fit lwr upr
## 1 125.0872 120.1358 130.0387
#Answer: predict number of infections. Income = 90 000$
pop.lm <- lm(Infections ~ income, data=county.data)
predict(pop.lm, data.frame(income=90000), interval="confidence")
## fit lwr upr
## 1 166.5638 148.3582 184.7694