library(tidyverse)
  library(openintro)
  library(statsr)
  library(broom)

Exercise 1:

The data set is 6 by 123. Each row represents one country’s human freedom index.

head(hfi)
## # A tibble: 6 × 123
##    year ISO_code countries region pf_rol_procedural pf_rol_civil pf_rol_criminal
##   <dbl> <chr>    <chr>     <chr>              <dbl>        <dbl>           <dbl>
## 1  2016 ALB      Albania   Easte…              6.66         4.55            4.67
## 2  2016 DZA      Algeria   Middl…             NA           NA              NA   
## 3  2016 AGO      Angola    Sub-S…             NA           NA              NA   
## 4  2016 ARG      Argentina Latin…              7.10         5.79            4.34
## 5  2016 ARM      Armenia   Cauca…             NA           NA              NA   
## 6  2016 AUS      Australia Ocean…              8.44         7.53            7.36
## # ℹ 116 more variables: pf_rol <dbl>, pf_ss_homicide <dbl>,
## #   pf_ss_disappearances_disap <dbl>, pf_ss_disappearances_violent <dbl>,
## #   pf_ss_disappearances_organized <dbl>,
## #   pf_ss_disappearances_fatalities <dbl>, pf_ss_disappearances_injuries <dbl>,
## #   pf_ss_disappearances <dbl>, pf_ss_women_fgm <dbl>,
## #   pf_ss_women_missing <dbl>, pf_ss_women_inheritance_widows <dbl>,
## #   pf_ss_women_inheritance_daughters <dbl>, pf_ss_women_inheritance <dbl>, …

Exercise 2

hfi_2016 <- hfi |>
  filter(year == "2016")
head(hfi_2016)
## # A tibble: 6 × 123
##    year ISO_code countries region pf_rol_procedural pf_rol_civil pf_rol_criminal
##   <dbl> <chr>    <chr>     <chr>              <dbl>        <dbl>           <dbl>
## 1  2016 ALB      Albania   Easte…              6.66         4.55            4.67
## 2  2016 DZA      Algeria   Middl…             NA           NA              NA   
## 3  2016 AGO      Angola    Sub-S…             NA           NA              NA   
## 4  2016 ARG      Argentina Latin…              7.10         5.79            4.34
## 5  2016 ARM      Armenia   Cauca…             NA           NA              NA   
## 6  2016 AUS      Australia Ocean…              8.44         7.53            7.36
## # ℹ 116 more variables: pf_rol <dbl>, pf_ss_homicide <dbl>,
## #   pf_ss_disappearances_disap <dbl>, pf_ss_disappearances_violent <dbl>,
## #   pf_ss_disappearances_organized <dbl>,
## #   pf_ss_disappearances_fatalities <dbl>, pf_ss_disappearances_injuries <dbl>,
## #   pf_ss_disappearances <dbl>, pf_ss_women_fgm <dbl>,
## #   pf_ss_women_missing <dbl>, pf_ss_women_inheritance_widows <dbl>,
## #   pf_ss_women_inheritance_daughters <dbl>, pf_ss_women_inheritance <dbl>, …

Exercise 3

A scatter plot with linear regression could be used to display the relationship between pf_score, and pf_expression_control.

ggplot(hfi_2016, aes(x=pf_expression_control, y=pf_score))+
  geom_point() +
  geom_smooth(method = "lm") + #adds in line for scatter plot
  theme_bw()+
  labs(x="Freedom of Expression", 
       y="Personal Freedom (score)",
       title = "Scatterplot of Freedom of Expression to Personal Freedom",
       caption = "Source:Ian Vasquez and Tanja Porcnik, The Human Freedom Index 2018: A Global Measurement of Personal, Civil, and Economic Freedom (Washington: Cato Institute, Fraser Institute, and the Friedrich Naumann Foundation for Freedom, 2018).")
## `geom_smooth()` using formula = 'y ~ x'

hfi_2016 |>
  summarise(cor(pf_expression_control, pf_score))
## # A tibble: 1 × 1
##   `cor(pf_expression_control, pf_score)`
##                                    <dbl>
## 1                                  0.845

Exercise 4

We can see a linear, positive, moderate association: as freedom of expression increases, personal freedom tends to increase with it. There are a few outliers who have very low personal freedom and freedom of expression.

plot_ss(x = pf_expression_control, y = pf_score, data = hfi_2016)

## Click two points to make a line.                                
## Call:
## lm(formula = y ~ x, data = pts)
## 
## Coefficients:
## (Intercept)            x  
##      4.2838       0.5418  
## 
## Sum of Squares:  102.213
plot_ss(x = pf_expression_control, y = pf_score, data = hfi_2016, showSquares = TRUE)

## Click two points to make a line.                                
## Call:
## lm(formula = y ~ x, data = pts)
## 
## Coefficients:
## (Intercept)            x  
##      4.2838       0.5418  
## 
## Sum of Squares:  102.213

Exercise 5

The smallest sum of squares I got after changing the Global Tools setting was 105.968.

m1 <- lm(pf_score ~ pf_expression_control, data = hfi_2016)
tidy(m1)
## # A tibble: 2 × 5
##   term                  estimate std.error statistic  p.value
##   <chr>                    <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)              4.28     0.149       28.8 4.23e-65
## 2 pf_expression_control    0.542    0.0271      20.0 2.31e-45
glance(m1)
## # A tibble: 1 × 12
##   r.squared adj.r.squared sigma statistic  p.value    df logLik   AIC   BIC
##       <dbl>         <dbl> <dbl>     <dbl>    <dbl> <dbl>  <dbl> <dbl> <dbl>
## 1     0.714         0.712 0.799      400. 2.31e-45     1  -193.  391.  400.
## # ℹ 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>

Exercise 6

Equation of Regression Line: y = 4.28 + 0.542*pf_expression_control The slope tells us that when the human freedom score increases by 0.542 for every 1 unit of amount of political pressure.

ggplot(data = hfi_2016, aes(x = pf_expression_control, y = pf_score)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula = 'y ~ x'

Exercise 7

The corresponding personal freedom score for a pf_expression_control rating of 3 is about 5.9. This seems to be an overestimate because it is higer than all actual freedom scores at a pf_expression_control rating of 3. It is an over estimate by 0.5.

m1_aug <- augment(m1)
ggplot(data = m1_aug, aes(x = .fitted, y = .resid)) +
  geom_point() +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  xlab("Fitted values") +
  ylab("Residuals")

Exercise 8

The residuals plot shows points scattered in no particular pattern about the horizontal line at y=0. This indicated that the relationship between the two variables is linear.

ggplot(data = m1_aug, aes(x = .resid)) +
  geom_histogram(binwidth = 0.25) +
  xlab("Residuals")

Exercise 9

The conditions do not appear to be violated. The histogram shows a bell-shaped curve mostly centered around 0, confirming the linearity of the relationship between the two variables.

Exercise 10

I think the residual vs fitted plot shows the constant variability conditions not being violated as points are randomly scattered on the plot.

LS0tCnRpdGxlOiAiTGFiIDE6IEludHJvIHRvIFIiCmF1dGhvcjogIkpPa3VpIgpkYXRlOiAiYHIgU3lzLkRhdGUoKWAiCm91dHB1dDogb3BlbmludHJvOjpsYWJfcmVwb3J0Ci0tLQoKYGBge3IgbG9hZC1wYWNrYWdlcywgbWVzc2FnZT1GQUxTRX0KICBsaWJyYXJ5KHRpZHl2ZXJzZSkKICBsaWJyYXJ5KG9wZW5pbnRybykKICBsaWJyYXJ5KHN0YXRzcikKICBsaWJyYXJ5KGJyb29tKQpgYGAKCiMjIyBFeGVyY2lzZSAxOgoKVGhlIGRhdGEgc2V0IGlzIDYgYnkgMTIzLiBFYWNoIHJvdyByZXByZXNlbnRzIG9uZSBjb3VudHJ5J3MgaHVtYW4gZnJlZWRvbSBpbmRleC4KCmBgYHtyIHZpZXctZ2lybHMtY291bnRzfQpoZWFkKGhmaSkKYGBgCgojIyMgRXhlcmNpc2UgMgoKYGBge3IgdHJlbmQtZ2lybHN9CmhmaV8yMDE2IDwtIGhmaSB8PgogIGZpbHRlcih5ZWFyID09ICIyMDE2IikKaGVhZChoZmlfMjAxNikKYGBgCgojIyMgRXhlcmNpc2UgMwoKQSBzY2F0dGVyIHBsb3Qgd2l0aCBsaW5lYXIgcmVncmVzc2lvbiBjb3VsZCBiZSB1c2VkIHRvIGRpc3BsYXkgdGhlIHJlbGF0aW9uc2hpcCBiZXR3ZWVuIHBmX3Njb3JlLCBhbmQgcGZfZXhwcmVzc2lvbl9jb250cm9sLgoKYGBge3J9CmdncGxvdChoZmlfMjAxNiwgYWVzKHg9cGZfZXhwcmVzc2lvbl9jb250cm9sLCB5PXBmX3Njb3JlKSkrCiAgZ2VvbV9wb2ludCgpICsKICBnZW9tX3Ntb290aChtZXRob2QgPSAibG0iKSArICNhZGRzIGluIGxpbmUgZm9yIHNjYXR0ZXIgcGxvdAogIHRoZW1lX2J3KCkrCiAgbGFicyh4PSJGcmVlZG9tIG9mIEV4cHJlc3Npb24iLCAKICAgICAgIHk9IlBlcnNvbmFsIEZyZWVkb20gKHNjb3JlKSIsCiAgICAgICB0aXRsZSA9ICJTY2F0dGVycGxvdCBvZiBGcmVlZG9tIG9mIEV4cHJlc3Npb24gdG8gUGVyc29uYWwgRnJlZWRvbSIsCiAgICAgICBjYXB0aW9uID0gIlNvdXJjZTpJYW4gVmFzcXVleiBhbmQgVGFuamEgUG9yY25paywgVGhlIEh1bWFuIEZyZWVkb20gSW5kZXggMjAxODogQSBHbG9iYWwgTWVhc3VyZW1lbnQgb2YgUGVyc29uYWwsIENpdmlsLCBhbmQgRWNvbm9taWMgRnJlZWRvbSAoV2FzaGluZ3RvbjogQ2F0byBJbnN0aXR1dGUsIEZyYXNlciBJbnN0aXR1dGUsIGFuZCB0aGUgRnJpZWRyaWNoIE5hdW1hbm4gRm91bmRhdGlvbiBmb3IgRnJlZWRvbSwgMjAxOCkuIikKYGBgCgpgYGB7ciBwbG90LXByb3AtYm95cy1hcmJ1dGhub3R9CmhmaV8yMDE2IHw+CiAgc3VtbWFyaXNlKGNvcihwZl9leHByZXNzaW9uX2NvbnRyb2wsIHBmX3Njb3JlKSkKYGBgCgojIyMgRXhlcmNpc2UgNAoKV2UgY2FuIHNlZSBhIGxpbmVhciwgcG9zaXRpdmUsIG1vZGVyYXRlIGFzc29jaWF0aW9uOiBhcyBmcmVlZG9tIG9mIGV4cHJlc3Npb24gaW5jcmVhc2VzLCBwZXJzb25hbCBmcmVlZG9tIHRlbmRzIHRvIGluY3JlYXNlIHdpdGggaXQuIFRoZXJlIGFyZSBhIGZldyBvdXRsaWVycyB3aG8gaGF2ZSB2ZXJ5IGxvdyBwZXJzb25hbCBmcmVlZG9tIGFuZCBmcmVlZG9tIG9mIGV4cHJlc3Npb24uCgpgYGB7ciBkaW0tcHJlc2VudH0KcGxvdF9zcyh4ID0gcGZfZXhwcmVzc2lvbl9jb250cm9sLCB5ID0gcGZfc2NvcmUsIGRhdGEgPSBoZmlfMjAxNikKYGBgCgpgYGB7cn0KcGxvdF9zcyh4ID0gcGZfZXhwcmVzc2lvbl9jb250cm9sLCB5ID0gcGZfc2NvcmUsIGRhdGEgPSBoZmlfMjAxNiwgc2hvd1NxdWFyZXMgPSBUUlVFKQpgYGAKCiMjIyBFeGVyY2lzZSA1CgpUaGUgc21hbGxlc3Qgc3VtIG9mIHNxdWFyZXMgSSBnb3QgYWZ0ZXIgY2hhbmdpbmcgdGhlIEdsb2JhbCBUb29scyBzZXR0aW5nIHdhcyAxMDUuOTY4LgoKYGBge3IgY291bnQtY29tcGFyZX0KbTEgPC0gbG0ocGZfc2NvcmUgfiBwZl9leHByZXNzaW9uX2NvbnRyb2wsIGRhdGEgPSBoZmlfMjAxNikKYGBgCgpgYGB7cn0KdGlkeShtMSkKYGBgCgpgYGB7cn0KZ2xhbmNlKG0xKQpgYGAKCiMjIyBFeGVyY2lzZSA2CgpFcXVhdGlvbiBvZiBSZWdyZXNzaW9uIExpbmU6IHkgPSA0LjI4ICsgMC41NDJcKnBmX2V4cHJlc3Npb25fY29udHJvbCBUaGUgc2xvcGUgdGVsbHMgdXMgdGhhdCB3aGVuIHRoZSBodW1hbiBmcmVlZG9tIHNjb3JlIGluY3JlYXNlcyBieSAwLjU0MiBmb3IgZXZlcnkgMSB1bml0IG9mIGFtb3VudCBvZiBwb2xpdGljYWwgcHJlc3N1cmUuCgpgYGB7ciBwbG90LXByb3AtYm95cy1wcmVzZW50fQpnZ3Bsb3QoZGF0YSA9IGhmaV8yMDE2LCBhZXMoeCA9IHBmX2V4cHJlc3Npb25fY29udHJvbCwgeSA9IHBmX3Njb3JlKSkgKwogIGdlb21fcG9pbnQoKSArCiAgZ2VvbV9zbW9vdGgobWV0aG9kID0gImxtIiwgc2UgPSBGQUxTRSkKYGBgCgojIyMgRXhlcmNpc2UgNwoKVGhlIGNvcnJlc3BvbmRpbmcgcGVyc29uYWwgZnJlZWRvbSBzY29yZSBmb3IgYSBwZl9leHByZXNzaW9uX2NvbnRyb2wgcmF0aW5nIG9mIDMgaXMgYWJvdXQgNS45LiBUaGlzIHNlZW1zIHRvIGJlIGFuIG92ZXJlc3RpbWF0ZSBiZWNhdXNlIGl0IGlzIGhpZ2VyIHRoYW4gYWxsIGFjdHVhbCBmcmVlZG9tIHNjb3JlcyBhdCBhIHBmX2V4cHJlc3Npb25fY29udHJvbCByYXRpbmcgb2YgMy4gSXQgaXMgYW4gb3ZlciBlc3RpbWF0ZSBieSAwLjUuCgpgYGB7ciBmaW5kLW1heC10b3RhbH0KbTFfYXVnIDwtIGF1Z21lbnQobTEpCmBgYAoKYGBge3J9CmdncGxvdChkYXRhID0gbTFfYXVnLCBhZXMoeCA9IC5maXR0ZWQsIHkgPSAucmVzaWQpKSArCiAgZ2VvbV9wb2ludCgpICsKICBnZW9tX2hsaW5lKHlpbnRlcmNlcHQgPSAwLCBsaW5ldHlwZSA9ICJkYXNoZWQiLCBjb2xvciA9ICJyZWQiKSArCiAgeGxhYigiRml0dGVkIHZhbHVlcyIpICsKICB5bGFiKCJSZXNpZHVhbHMiKQpgYGAKCiMjIyBFeGVyY2lzZSA4CgpUaGUgcmVzaWR1YWxzIHBsb3Qgc2hvd3MgcG9pbnRzIHNjYXR0ZXJlZCBpbiBubyBwYXJ0aWN1bGFyIHBhdHRlcm4gYWJvdXQgdGhlIGhvcml6b250YWwgbGluZSBhdCB5PTAuIFRoaXMgaW5kaWNhdGVkIHRoYXQgdGhlIHJlbGF0aW9uc2hpcCBiZXR3ZWVuIHRoZSB0d28gdmFyaWFibGVzIGlzIGxpbmVhci4KCmBgYHtyfQpnZ3Bsb3QoZGF0YSA9IG0xX2F1ZywgYWVzKHggPSAucmVzaWQpKSArCiAgZ2VvbV9oaXN0b2dyYW0oYmlud2lkdGggPSAwLjI1KSArCiAgeGxhYigiUmVzaWR1YWxzIikKYGBgCgojIyMgRXhlcmNpc2UgOQoKVGhlIGNvbmRpdGlvbnMgZG8gbm90IGFwcGVhciB0byBiZSB2aW9sYXRlZC4gVGhlIGhpc3RvZ3JhbSBzaG93cyBhIGJlbGwtc2hhcGVkIGN1cnZlIG1vc3RseSBjZW50ZXJlZCBhcm91bmQgMCwgY29uZmlybWluZyB0aGUgbGluZWFyaXR5IG9mIHRoZSByZWxhdGlvbnNoaXAgYmV0d2VlbiB0aGUgdHdvIHZhcmlhYmxlcy4KCiMjIyBFeGVyY2lzZSAxMAoKSSB0aGluayB0aGUgcmVzaWR1YWwgdnMgZml0dGVkIHBsb3Qgc2hvd3MgdGhlIGNvbnN0YW50IHZhcmlhYmlsaXR5IGNvbmRpdGlvbnMgbm90IGJlaW5nIHZpb2xhdGVkIGFzIHBvaW50cyBhcmUgcmFuZG9tbHkgc2NhdHRlcmVkIG9uIHRoZSBwbG90Lgo=