#install.packages("ISLR")
library(ISLR)
help("Carseats")
head(Carseats)
##   Sales CompPrice Income Advertising Population Price ShelveLoc Age Education
## 1  9.50       138     73          11        276   120       Bad  42        17
## 2 11.22       111     48          16        260    83      Good  65        10
## 3 10.06       113     35          10        269    80    Medium  59        12
## 4  7.40       117    100           4        466    97    Medium  55        14
## 5  4.15       141     64           3        340   128       Bad  38        13
## 6 10.81       124    113          13        501    72       Bad  78        16
##   Urban  US
## 1   Yes Yes
## 2   Yes Yes
## 3   Yes Yes
## 4   Yes Yes
## 5   Yes  No
## 6    No Yes
summary(Carseats)
##      Sales          CompPrice       Income        Advertising    
##  Min.   : 0.000   Min.   : 77   Min.   : 21.00   Min.   : 0.000  
##  1st Qu.: 5.390   1st Qu.:115   1st Qu.: 42.75   1st Qu.: 0.000  
##  Median : 7.490   Median :125   Median : 69.00   Median : 5.000  
##  Mean   : 7.496   Mean   :125   Mean   : 68.66   Mean   : 6.635  
##  3rd Qu.: 9.320   3rd Qu.:135   3rd Qu.: 91.00   3rd Qu.:12.000  
##  Max.   :16.270   Max.   :175   Max.   :120.00   Max.   :29.000  
##    Population        Price        ShelveLoc        Age          Education   
##  Min.   : 10.0   Min.   : 24.0   Bad   : 96   Min.   :25.00   Min.   :10.0  
##  1st Qu.:139.0   1st Qu.:100.0   Good  : 85   1st Qu.:39.75   1st Qu.:12.0  
##  Median :272.0   Median :117.0   Medium:219   Median :54.50   Median :14.0  
##  Mean   :264.8   Mean   :115.8                Mean   :53.32   Mean   :13.9  
##  3rd Qu.:398.5   3rd Qu.:131.0                3rd Qu.:66.00   3rd Qu.:16.0  
##  Max.   :509.0   Max.   :191.0                Max.   :80.00   Max.   :18.0  
##  Urban       US     
##  No :118   No :142  
##  Yes:282   Yes:258  
##                     
##                     
##                     
## 
#library(ggplot2)
library(alr4)
## Loading required package: car
## Loading required package: carData
## Loading required package: effects
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
#p<-ggplot(Carseats,aes(x=Price, y=Sales)) + geom_point()
plot(Carseats$Sales~Carseats$Price)

m1<-lm(Sales~Price,data=Carseats)
summary(m1)
## 
## Call:
## lm(formula = Sales ~ Price, data = Carseats)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.5224 -1.8442 -0.1459  1.6503  7.5108 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 13.641915   0.632812  21.558   <2e-16 ***
## Price       -0.053073   0.005354  -9.912   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.532 on 398 degrees of freedom
## Multiple R-squared:  0.198,  Adjusted R-squared:  0.196 
## F-statistic: 98.25 on 1 and 398 DF,  p-value: < 2.2e-16
#The linear regression model shows me that as the sales increase on average, the price will very slightly decrease. I see this in the slope of price as it is negative. As sales increase, the price will decrease by $0.053. 
#Problem 2
library(alr4)

head(oldfaith)
##   Duration Interval
## 1      216       79
## 2      108       54
## 3      200       74
## 4      137       62
## 5      272       85
## 6      173       55
dim(oldfaith)
## [1] 270   2
summary(oldfaith)
##     Duration        Interval    
##  Min.   : 96.0   Min.   :43.00  
##  1st Qu.:130.0   1st Qu.:58.00  
##  Median :240.0   Median :76.00  
##  Mean   :209.9   Mean   :71.11  
##  3rd Qu.:267.8   3rd Qu.:82.00  
##  Max.   :306.0   Max.   :96.00
plot(Interval~Duration,data=oldfaith)

o1<-lm(Interval~Duration,data=oldfaith)
summary(o1)
## 
## Call:
## lm(formula = Interval ~ Duration, data = oldfaith)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.3337  -4.5250   0.0612   3.7683  16.9722 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 33.987808   1.181217   28.77   <2e-16 ***
## Duration     0.176863   0.005352   33.05   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.004 on 268 degrees of freedom
## Multiple R-squared:  0.8029, Adjusted R-squared:  0.8022 
## F-statistic:  1092 on 1 and 268 DF,  p-value: < 2.2e-16
#The linear regression model shows me that as the intervals increase, the duration of eruptions also increases. The slope of the duration being positive shows me this. The slope for duration is statistically significant at 0.01 significance level (2x10^-6<0.01), so there is some relationship between duration and interval. The multiple R-squared shows how much variance the dependent variable can have, which in this model is 0.8029. This tells me that there can be some significant variance in the dependent variable. The adjusted R-squared checks for bias in the independent variable and corrects it. The adjusted R-squared is 0.8022, which tells me there can be some significant bias in the independent variable.