Question 1

data(cars)
median_speed <- median(cars$speed)
median_speed
## [1] 15

Question 2

#install.packages("jsonlite")
library("jsonlite")

url <- "https://min-api.cryptocompare.com/data/v2/histoday?fsym=BTC&tsym=USD&limit=100"

response <- fromJSON(url)

btc_data <- response$Data$Data

str(btc_data)
## 'data.frame':    101 obs. of  9 variables:
##  $ time            : int  1721520000 1721606400 1721692800 1721779200 1721865600 1721952000 1722038400 1722124800 1722211200 1722297600 ...
##  $ high            : num  68371 68491 67775 67121 66154 ...
##  $ low             : num  65841 66595 65466 65112 63429 ...
##  $ open            : num  67164 68177 67568 65940 65376 ...
##  $ volumefrom      : num  18123 26713 29466 21576 29473 ...
##  $ volumeto        : num  1.22e+09 1.81e+09 1.96e+09 1.43e+09 1.90e+09 ...
##  $ close           : num  68177 67568 65940 65376 65794 ...
##  $ conversionType  : chr  "direct" "direct" "direct" "direct" ...
##  $ conversionSymbol: chr  "" "" "" "" ...
max_close_price <- max(btc_data$close, na.rm = TRUE)
print(max_close_price)
## [1] 72683.93
# Maximum daily close price is $69,020.94

Question 3

Identify a topic of interest and give your project a name/title. –> Remote Work & its Impact on Mental Health

Phrase 3-5 research questions you would like to explore. 1. How has the shift to remote work affected employee productivity across various industries? 2. What industries have the most impact on employee mental health? 3. Are there differences in productivity levels between full-time remote workers and hybrid workers?

List the data sources that your find that are relevant with your research questions. https://www.kaggle.com/datasets/waqi786/remote-work-and-mental-health

Describe your data extracted, statistically and/or visually. - Our dataset includes variables such as work location, industry, stress level, age, number of virtual meetings, and social isolation rating.

remote_work_mental_health <- read.csv("Impact_of_Remote_Work_on_Mental_Health.csv")
#View(remote_work_mental_health)
summary(remote_work_mental_health)
##  Employee_ID             Age        Gender            Job_Role        
##  Length:5000        Min.   :22   Length:5000        Length:5000       
##  Class :character   1st Qu.:31   Class :character   Class :character  
##  Mode  :character   Median :41   Mode  :character   Mode  :character  
##                     Mean   :41                                        
##                     3rd Qu.:51                                        
##                     Max.   :60                                        
##    Industry         Years_of_Experience Work_Location     
##  Length:5000        Min.   : 1.00       Length:5000       
##  Class :character   1st Qu.: 9.00       Class :character  
##  Mode  :character   Median :18.00       Mode  :character  
##                     Mean   :17.81                         
##                     3rd Qu.:26.00                         
##                     Max.   :35.00                         
##  Hours_Worked_Per_Week Number_of_Virtual_Meetings Work_Life_Balance_Rating
##  Min.   :20.00         Min.   : 0.000             Min.   :1.000           
##  1st Qu.:29.00         1st Qu.: 4.000             1st Qu.:2.000           
##  Median :40.00         Median : 8.000             Median :3.000           
##  Mean   :39.61         Mean   : 7.559             Mean   :2.984           
##  3rd Qu.:50.00         3rd Qu.:12.000             3rd Qu.:4.000           
##  Max.   :60.00         Max.   :15.000             Max.   :5.000           
##  Stress_Level       Mental_Health_Condition Access_to_Mental_Health_Resources
##  Length:5000        Length:5000             Length:5000                      
##  Class :character   Class :character        Class :character                 
##  Mode  :character   Mode  :character        Mode  :character                 
##                                                                              
##                                                                              
##                                                                              
##  Productivity_Change Social_Isolation_Rating Satisfaction_with_Remote_Work
##  Length:5000         Min.   :1.000           Length:5000                  
##  Class :character    1st Qu.:2.000           Class :character             
##  Mode  :character    Median :3.000           Mode  :character             
##                      Mean   :2.994                                        
##                      3rd Qu.:4.000                                        
##                      Max.   :5.000                                        
##  Company_Support_for_Remote_Work Physical_Activity  Sleep_Quality     
##  Min.   :1.000                   Length:5000        Length:5000       
##  1st Qu.:2.000                   Class :character   Class :character  
##  Median :3.000                   Mode  :character   Mode  :character  
##  Mean   :3.008                                                        
##  3rd Qu.:4.000                                                        
##  Max.   :5.000                                                        
##     Region         
##  Length:5000       
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)

str(remote_work_mental_health)
## 'data.frame':    5000 obs. of  20 variables:
##  $ Employee_ID                      : chr  "EMP0001" "EMP0002" "EMP0003" "EMP0004" ...
##  $ Age                              : int  32 40 59 27 49 59 31 42 56 30 ...
##  $ Gender                           : chr  "Non-binary" "Female" "Non-binary" "Male" ...
##  $ Job_Role                         : chr  "HR" "Data Scientist" "Software Engineer" "Software Engineer" ...
##  $ Industry                         : chr  "Healthcare" "IT" "Education" "Finance" ...
##  $ Years_of_Experience              : int  13 3 22 20 32 31 24 6 9 28 ...
##  $ Work_Location                    : chr  "Hybrid" "Remote" "Hybrid" "Onsite" ...
##  $ Hours_Worked_Per_Week            : int  47 52 46 32 35 39 51 54 24 57 ...
##  $ Number_of_Virtual_Meetings       : int  7 4 11 8 12 3 7 7 4 6 ...
##  $ Work_Life_Balance_Rating         : int  2 1 5 4 2 4 3 3 2 1 ...
##  $ Stress_Level                     : chr  "Medium" "Medium" "Medium" "High" ...
##  $ Mental_Health_Condition          : chr  "Depression" "Anxiety" "Anxiety" "Depression" ...
##  $ Access_to_Mental_Health_Resources: chr  "No" "No" "No" "Yes" ...
##  $ Productivity_Change              : chr  "Decrease" "Increase" "No Change" "Increase" ...
##  $ Social_Isolation_Rating          : int  1 3 4 3 3 5 5 5 2 2 ...
##  $ Satisfaction_with_Remote_Work    : chr  "Unsatisfied" "Satisfied" "Unsatisfied" "Unsatisfied" ...
##  $ Company_Support_for_Remote_Work  : int  1 2 5 3 3 1 3 4 4 1 ...
##  $ Physical_Activity                : chr  "Weekly" "Weekly" "None" "None" ...
##  $ Sleep_Quality                    : chr  "Good" "Good" "Poor" "Poor" ...
##  $ Region                           : chr  "Europe" "Asia" "North America" "Europe" ...
head(data)
##                                                                             
## 1 function (..., list = character(), package = NULL, lib.loc = NULL,        
## 2     verbose = getOption("verbose"), envir = .GlobalEnv, overwrite = TRUE) 
## 3 {                                                                         
## 4     fileExt <- function(x) {                                              
## 5         db <- grepl("\\\\.[^.]+\\\\.(gz|bz2|xz)$", x)                     
## 6         ans <- sub(".*\\\\.", "", x)
sum(is.na(remote_work_mental_health$Work_Location))
## [1] 0
sum(is.na(remote_work_mental_health$Stress_Level))
## [1] 0
stress_summary <- remote_work_mental_health %>%
  group_by(Work_Location) %>%
  summarise(average_stress = mean(Stress_Level, na.rm = TRUE), 
            count = n()) %>%
  arrange(desc(average_stress))
## Warning: There were 3 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `average_stress = mean(Stress_Level, na.rm = TRUE)`.
## ℹ In group 1: `Work_Location = "Hybrid"`.
## Caused by warning in `mean.default()`:
## ! argument is not numeric or logical: returning NA
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 2 remaining warnings.