library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(pastecs)
## 
## Attaching package: 'pastecs'
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
library(readr)
Workers_Compensation_Claims_Data <- read_csv("Workers__Compensation_Claims_Data.csv")
## Rows: 56 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (18): Year, Subject employers, Subject employees, Accepted disabling cla...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
stat.desc(Workers_Compensation_Claims_Data$`Denied claims`,norm=T)
##       nbr.val      nbr.null        nbr.na           min           max 
##  5.400000e+01  0.000000e+00  2.000000e+00  1.709000e+03  2.091500e+04 
##         range           sum        median          mean       SE.mean 
##  1.920600e+04  6.152410e+05  1.026450e+04  1.139335e+04  7.983136e+02 
##  CI.mean.0.95           var       std.dev      coef.var      skewness 
##  1.601214e+03  3.441445e+07  5.866383e+03  5.148953e-01  1.380053e-01 
##      skew.2SE      kurtosis      kurt.2SE    normtest.W    normtest.p 
##  2.126061e-01 -1.122192e+00 -8.782316e-01  9.350957e-01  5.851845e-03
library(dplyr)
claims_4<-Workers_Compensation_Claims_Data %>% select(`Denied claims`,`Subject employees`) %>% arrange(-`Denied claims`,`Subject employees`)
hist(claims_4$`Denied claims`, main="Histogram of Denied Claims", xlab="Denied Claims", breaks=50)

ggplot(claims_4,aes(x=`Denied claims`,y=`Subject employees`)) + geom_point() + ggtitle("Untransformed Data")
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).

claims_4<-Workers_Compensation_Claims_Data %>% mutate(Employees=log(`Subject employees`)) %>% select(`Denied claims`,Employees)

head(claims_4)
## # A tibble: 6 × 2
##   `Denied claims` Employees
##             <dbl>     <dbl>
## 1              NA      13.4
## 2              NA      13.5
## 3            1935      13.5
## 4            1709      13.5
## 5            2177      13.6
## 6            2408      13.6
hist(claims_4$`Denied claims`,breaks=10,probability = T)

hist(claims_4$`Denied claims`,breaks=10,probability = T)
lines(density(claims_4$Employees),col='red',lwd=2)