Paola_PAD 6833_Homework 5

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

data <- read_csv("eci.csv")

## Rows: 274 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): County, CSCS, CSFA, TS, PPSC, TPPS
## dbl (1): B3P
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Before analyzing the data I must convert relevant columns to numeric where applicable.
# Replacing '*' with NA for proper numeric conversion
data$B3P <- as.numeric(data$B3P)
data$CSCS <- as.numeric(data$CSCS)

## Warning: NAs introduced by coercion

data$CSFA <- as.numeric(gsub("\\*", NA, data$CSFA))  

# Handle '*' values
data$TS <- as.numeric(data$TS)

## Warning: NAs introduced by coercion

# Next, I removed rows with missing values 
cleaned_data <- na.omit(data)

# The correlations between variables
correlations <- cor(cleaned_data[, c("B3P", "CSCS", "TS")], use="complete.obs")
print(correlations)

##            B3P      CSCS        TS
## B3P  1.0000000 0.9757853 0.9759675
## CSCS 0.9757853 1.0000000 0.9999116
## TS   0.9759675 0.9999116 1.0000000

# Visualize pairwise relationships
pairs(cleaned_data[, c("B3P", "CSCS", "TS")])

# The two correlated variables and applied Pearson correlation
pearson_corr <- cor(cleaned_data$B3P, cleaned_data$CSCS, method = "pearson")
spearman_corr <- cor(cleaned_data$B3P, cleaned_data$CSCS, method = "spearman")
kendall_corr <- cor(cleaned_data$B3P, cleaned_data$CSCS, method = "kendall")

# The results of different correlation methods
print(paste("Pearson Correlation:", pearson_corr))

## [1] "Pearson Correlation: 0.975785261578813"

print(paste("Spearman Correlation:", spearman_corr))

## [1] "Spearman Correlation: 0.428571428571429"

print(paste("Kendall Correlation:", kendall_corr))

## [1] "Kendall Correlation: 0.2"

Paola_PAD 6833_Homework 5

2024-10-14