Mini Project

data(cars)
# The 'cars' dataset has two columns: 'speed' (first) and 'dist' (second).
# The first column is 'speed'.
median(cars$speed)

## [1] 15

# 1. Install and load the jsonlite package. 
library(jsonlite)

# 2. Construct the API URL.
# The endpoint for Daily Pair OHLCV is /data/v2/histoday.
#   - fsym=BTC (From Symbol: Bitcoin)
#   - tsym=USD (To Symbol: US Dollar)
#   - limit=100 (Get the last 100 days of data)
api_url <- "https://min-api.cryptocompare.com/data/v2/histoday?fsym=BTC&tsym=USD&limit=100"

# 3. Use fromJSON() to retrieve and parse the data.
btc_raw_data <- fromJSON(api_url)

# 4. Extract the actual historical data frame.
btc_ohlcv_df <- btc_raw_data$Data$Data


# 5. What is the maximum of the daily close price?

max_close_price <- max(btc_ohlcv_df$close, na.rm = TRUE)

# 6. Display the final result using print statements.
print("Maximum daily close price for BTC/USD over the last 100 days:")

## [1] "Maximum daily close price for BTC/USD over the last 100 days:"

print(max_close_price)

## [1] 124723

# Title: Major Decisions: A Comparative Analysis of Economic Outcomes for Recent College Graduates

# 3 Questions: 
# 1. Which Major Categories yield the highest and lowest average median salaries?
# 2. What is the relationship between a major's median salary and the rate of low-wage job employment?
# 3. How does the total size of a major category correlate with its unemployment rate?


library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(knitr)



# URL for the FiveThirtyEight recent graduates dataset
data_url <- "https://raw.githubusercontent.com/fivethirtyeight/data/master/college-majors/recent-grads.csv"

# Use base R's read.csv() function
raw_data <- read.csv(data_url) 

print("STATUS: Data Extracted.")

## [1] "STATUS: Data Extracted."

print(paste("Dimensions of Raw Data (Rows, Columns):", paste(dim(raw_data), collapse = ", ")))

## [1] "Dimensions of Raw Data (Rows, Columns): 173, 21"

# Check the structure of the raw data
print("--- Initial Data Structure (str) ---")

## [1] "--- Initial Data Structure (str) ---"

str(raw_data)

## 'data.frame':    173 obs. of  21 variables:
##  $ Rank                : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Major_code          : int  2419 2416 2415 2417 2405 2418 6202 5001 2414 2408 ...
##  $ Major               : chr  "PETROLEUM ENGINEERING" "MINING AND MINERAL ENGINEERING" "METALLURGICAL ENGINEERING" "NAVAL ARCHITECTURE AND MARINE ENGINEERING" ...
##  $ Total               : int  2339 756 856 1258 32260 2573 3777 1792 91227 81527 ...
##  $ Men                 : int  2057 679 725 1123 21239 2200 2110 832 80320 65511 ...
##  $ Women               : int  282 77 131 135 11021 373 1667 960 10907 16016 ...
##  $ Major_category      : chr  "Engineering" "Engineering" "Engineering" "Engineering" ...
##  $ ShareWomen          : num  0.121 0.102 0.153 0.107 0.342 ...
##  $ Sample_size         : int  36 7 3 16 289 17 51 10 1029 631 ...
##  $ Employed            : int  1976 640 648 758 25694 1857 2912 1526 76442 61928 ...
##  $ Full_time           : int  1849 556 558 1069 23170 2038 2924 1085 71298 55450 ...
##  $ Part_time           : int  270 170 133 150 5180 264 296 553 13101 12695 ...
##  $ Full_time_year_round: int  1207 388 340 692 16697 1449 2482 827 54639 41413 ...
##  $ Unemployed          : int  37 85 16 40 1672 400 308 33 4650 3895 ...
##  $ Unemployment_rate   : num  0.0184 0.1172 0.0241 0.0501 0.0611 ...
##  $ Median              : int  110000 75000 73000 70000 65000 65000 62000 62000 60000 60000 ...
##  $ P25th               : int  95000 55000 50000 43000 50000 50000 53000 31500 48000 45000 ...
##  $ P75th               : int  125000 90000 105000 80000 75000 102000 72000 109000 70000 72000 ...
##  $ College_jobs        : int  1534 350 456 529 18314 1142 1768 972 52844 45829 ...
##  $ Non_college_jobs    : int  364 257 176 102 4440 657 314 500 16384 10874 ...
##  $ Low_wage_jobs       : int  193 50 0 0 972 244 259 220 3253 3170 ...

# 3.1. Removing missing values for key columns (Similar to na.omit(), but explicit subsetting)
rows_before <- nrow(raw_data)

# Filter out NA values in Total, Unemployment_rate, and Employed columns
cleaned_data <- raw_data[!is.na(raw_data$Total) & 
                         !is.na(raw_data$Unemployment_rate) & 
                         !is.na(raw_data$Employed),]

rows_after <- nrow(cleaned_data)
print(paste("\nSTATUS: NA Cleaning Complete. Removed", rows_before - rows_after, "row(s). Retained", rows_after, "rows."))

## [1] "\nSTATUS: NA Cleaning Complete. Removed 1 row(s). Retained 172 rows."

analysis_data <- cleaned_data %>%
  # Use mutate() to derive new metrics and rename columns (Section 3.3)
  mutate(
    # Q2 Derivation: Calculate the Low-Wage Job Rate (%)
    Low_Wage_Job_Rate = (Low_wage_jobs / Employed) * 100,
    
    # Rename columns for clarity
    Median_Salary = Median,
    Major_Category = Major_category
  ) %>%
  # Use select() to keep only the variables needed for analysis (Section 3.3)
  select(
    Major, Major_Category, Total, Median_Salary,
    Unemployment_rate, Low_Wage_Job_Rate
  )

print("\nSTATUS: Derived metrics (Low_Wage_Job_Rate) calculated.")

## [1] "\nSTATUS: Derived metrics (Low_Wage_Job_Rate) calculated."

category_summary <- analysis_data %>%
  # Create groups for aggregation (Section 3.3)
  group_by(Major_Category) %>%
  # Summarize the data to get category-level metrics (Section 3.3)
  summarise(
    Avg_Median_Salary = mean(Median_Salary, na.rm = TRUE),
    Avg_Unemployment_Rate = mean(Unemployment_rate, na.rm = TRUE),
    Avg_Low_Wage_Job_Rate = mean(Low_Wage_Job_Rate, na.rm = TRUE),
    Total_Graduates = sum(Total, na.rm = TRUE)
  ) %>%
  # Sort the data by salary to answer Q1 (Section 3.3)
  arrange(desc(Avg_Median_Salary)) 


print("\n--- Q1: Summary by Major Category (Highest and Lowest Paying) ---")

## [1] "\n--- Q1: Summary by Major Category (Highest and Lowest Paying) ---"

print("Highest Paying Categories:")

## [1] "Highest Paying Categories:"

print(head(category_summary, 5) %>% kable(caption = "Top 5 Major Categories by Average Median Salary", digits = 0))

## Warning in attr(x, "align"): 'xfun::attr()' is deprecated.
## Use 'xfun::attr2()' instead.
## See help("Deprecated")

## Warning in attr(x, "format"): 'xfun::attr()' is deprecated.
## Use 'xfun::attr2()' instead.
## See help("Deprecated")

## 
## 
## Table: Top 5 Major Categories by Average Median Salary
## 
## |Major_Category          | Avg_Median_Salary| Avg_Unemployment_Rate| Avg_Low_Wage_Job_Rate| Total_Graduates|
## |:-----------------------|-----------------:|---------------------:|---------------------:|---------------:|
## |Engineering             |             57383|                     0|                     6|          537583|
## |Business                |             43538|                     0|                    11|         1302376|
## |Computers & Mathematics |             42745|                     0|                     7|          299008|
## |Law & Public Policy     |             42200|                     0|                    13|          179107|
## |Physical Sciences       |             41890|                     0|                    12|          185479|

print("\nLowest Paying Categories:")

## [1] "\nLowest Paying Categories:"

print(tail(category_summary, 5) %>% kable(caption = "Bottom 5 Major Categories by Average Median Salary", digits = 0))

## Warning in attr(x, "align"): 'xfun::attr()' is deprecated.
## Use 'xfun::attr2()' instead.
## See help("Deprecated")
## Warning in attr(x, "align"): 'xfun::attr()' is deprecated.
## Use 'xfun::attr2()' instead.
## See help("Deprecated")

## 
## 
## Table: Bottom 5 Major Categories by Average Median Salary
## 
## |Major_Category              | Avg_Median_Salary| Avg_Unemployment_Rate| Avg_Low_Wage_Job_Rate| Total_Graduates|
## |:---------------------------|-----------------:|---------------------:|---------------------:|---------------:|
## |Communications & Journalism |             34500|                     0|                    15|          392601|
## |Arts                        |             33062|                     0|                    23|          357130|
## |Education                   |             32350|                     0|                     9|          559129|
## |Humanities & Liberal Arts   |             31913|                     0|                    18|          713468|
## |Psychology & Social Work    |             30100|                     0|                    12|          481007|

print("\n--- Correlation Results (Answering Q2 and Q3) ---")

## [1] "\n--- Correlation Results (Answering Q2 and Q3) ---"

# Q2: Relationship between Median Salary and Low-Wage Job Rate
salary_low_wage_corr <- cor(analysis_data$Median_Salary, analysis_data$Low_Wage_Job_Rate, use = "complete.obs")

# Q3: Correlation between Total Graduates (size) and Avg Unemployment Rate
total_unemp_corr <- cor(category_summary$Total_Graduates, category_summary$Avg_Unemployment_Rate, use = "complete.obs")

print(paste("Q2 (Median Salary vs Low-Wage Job Rate):", round(salary_low_wage_corr, 3)))

## [1] "Q2 (Median Salary vs Low-Wage Job Rate): -0.459"

print(paste("Q3 (Total Graduates vs Avg Unemployment Rate):", round(total_unemp_corr, 3)))

## [1] "Q3 (Total Graduates vs Avg Unemployment Rate): 0.151"

print("\n--- Final Prepared Analysis Dataset (First 5 Rows) ---")

## [1] "\n--- Final Prepared Analysis Dataset (First 5 Rows) ---"

print(head(analysis_data, 5))

##                                       Major Major_Category Total Median_Salary
## 1                     PETROLEUM ENGINEERING    Engineering  2339        110000
## 2            MINING AND MINERAL ENGINEERING    Engineering   756         75000
## 3                 METALLURGICAL ENGINEERING    Engineering   856         73000
## 4 NAVAL ARCHITECTURE AND MARINE ENGINEERING    Engineering  1258         70000
## 5                      CHEMICAL ENGINEERING    Engineering 32260         65000
##   Unemployment_rate Low_Wage_Job_Rate
## 1        0.01838053          9.767206
## 2        0.11724138          7.812500
## 3        0.02409639          0.000000
## 4        0.05012531          0.000000
## 5        0.06109771          3.782984

Mini Project

Sara W

2025-10-15