setwd("C:/Users/cdaniels/Downloads")
my_data <- read.csv("train 14(in).csv", header = TRUE)
file_path <- "train 14(in).csv"
df <- read.csv(file_path, stringsAsFactors = FALSE, na.strings = c("", "NA"))
nrow(df); ncol(df)
## [1] 25121
## [1] 67
str(df)
## 'data.frame': 25121 obs. of 67 variables:
## $ id : int 0 1 2 3 4 5 6 7 8 9 ...
## $ forecasting.period : int 4 4 1 2 4 4 2 2 4 2 ...
## $ net_profit_over_total_assets : chr "0.141939282" "0.147560479" "-0.024542174" "0.15931717" ...
## $ total_liabilities_over_total_assets : chr "0.450292264" "0.234839979" "0.311544325" "0.777016886" ...
## $ working_capital_over_total_assets : chr "0.021421962" "0.708334845" "0.067830648" "0.207430847" ...
## $ current_assets_over_short_term_liabilities : chr "1.052986556" "4.016018367" "1.255247103" "1.267310049" ...
## $ cash_plus_short_term_securities_plus_receivables_minus_short_term_liabilities_over_operating_expenses_minus_depreciation_times_365: chr "-28.52908379" "119.4456107" "-15.01992269" "-4.892917756" ...
## $ retained_earnings_over_total_assets : chr "0.141933515" "0.457089625" "-0.657002778" "-5.19E-06" ...
## $ EBIT_over_total_assets : chr "0.172629003" "0.186069671" "-0.024536711" "0.159321164" ...
## $ book_value_of_equity_over_total_liabilities : chr "0.990337006" "3.206444965" "2.209000358" "0.288177363" ...
## $ sales_over_total_assets : chr "1.087355901" "1.093404091" "0.82557841" "2.283531974" ...
## $ equity_over_total_assets : chr "0.445947831" "0.753147272" "0.68845271" "0.222975074" ...
## $ gross_profit_plus_extraordinary_items_plus_financial_expenses_over_total_assets : chr "0.172639901" "0.186074127" "-0.024536117" "0.401454214" ...
## $ gross_profit_over_short_term_liabilities : chr "0.428585346" "0.792273513" "-0.092083189" "0.205514237" ...
## $ gross_profit_plus_depreciation_over_sales : chr "0.098482024" "0.086799659" "0.001798631" "0.070944185" ...
## $ gross_profit_plus_interest_over_total_assets : chr "0.172623756" "0.186064405" "-0.024538477" "0.159313154" ...
## $ total_liabilities_times_365_over_gross_profit_plus_depreciation : chr "737.5983611" "427.990535" "79930.04047" "1750.629791" ...
## $ gross_profit_plus_depreciation_over_total_liabilities : chr "0.495021686" "0.852970183" "0.004578722" "0.208547924" ...
## $ total_assets_over_total_liabilities : chr "2.220461536" "4.257780528" "3.209402349" "1.286706862" ...
## $ gross_profit_over_total_assets : chr "0.172636989" "0.186069814" "-0.024531282" "0.159326426" ...
## $ gross_profit_over_sales : chr "0.076197623" "0.080696018" "-0.029641659" "0.069795748" ...
## $ inventory_times_365_over_sales : chr "28.29928484" "1.542870004" "42.33579188" "36.81410415" ...
## $ sales_n_over_sales_n_minus_1 : chr "1.028425054" "1.051071834" "1.369948775" "?" ...
## $ profit_on_operating_activities_over_total_assets : chr "0.189883384" "0.184396365" "3.03E-06" "0.219399533" ...
## $ net_profit_over_sales : chr "0.062674528" "0.064051056" "-0.029717478" "0.069806285" ...
## $ gross_profit_in_3_years_over_total_assets : chr "0.172627309" "0.576977247" "-0.105845027" "?" ...
## $ equity_minus_share_capital_over_total_assets : chr "0.44597852" "0.753142175" "-0.681545104" "0.126421763" ...
## $ net_profit_plus_depreciation_over_total_liabilities : chr "0.426728805" "0.688728876" "0.004576887" "0.208290256" ...
## $ profit_on_operating_activities_over_financial_expenses : chr "?" "0.839642289" "?" "0.829707025" ...
## $ working_capital_over_fixed_assets : chr "0.037082538" "12.4681881" "0.101929962" "11.82908241" ...
## $ logarithm_of_total_assets : chr "4.572500412" "4.531599778" "4.098599267" "4.094300228" ...
## $ total_liabilities_minus_cash_over_sales : chr "0.192113491" "0.062276517" "0.285022352" "0.181123207" ...
## $ gross_profit_plus_interest_over_sales : chr "0.076140262" "0.08074136" "-0.059420243" "0.069739439" ...
## $ current_liabilities_times_365_over_cost_of_products_sold : chr "70.55540863" "40.79720761" "131.6358562" "137.2042179" ...
## $ operating_expenses_over_short_term_liabilities : chr "5.173970681" "8.975367733" "2.773073593" "2.662950931" ...
## $ operating_expenses_over_total_liabilities : chr "0.421744751" "0.785422271" "2.371519482" "2.656136523" ...
## $ profit_on_sales_over_total_assets : chr "0.189890183" "0.184388267" "0.004913423" "0.219739891" ...
## $ total_sales_over_total_assets : chr "2.290576424" "2.314395629" "0.825596769" "2.28351007" ...
## $ current_assets_minus_inventories_over_long_term_liabilities : chr "5.224748458" "?" "6.267797656" "?" ...
## $ constant_capital_over_total_assets : chr "0.493564626" "0.753162904" "0.72649338" "0.222963457" ...
## $ profit_on_sales_over_sales : chr "0.083730273" "0.079966362" "0.005919707" "0.09620642" ...
## $ current_assets_minus_inventory_minus_receivables_over_short_term_liabilities : chr "0.057484471" "0.40272699" "0.295161911" "0.471850147" ...
## $ total_liabilities_over_profit_on_operating_activities_plus_depreciation_times_12_over_365 : chr "0.059289849" "0.039095725" "0.400987929" "0.116465754" ...
## $ profit_on_operating_activities_over_sales : chr "0.083764648" "0.079992035" "-1.00E-05" "0.096058248" ...
## $ rotation_receivables_plus_inventory_turnover_in_days : chr "64.960411" "134.2431965" "112.7638256" "98.73102072" ...
## $ receivables_times_365_over_sales : chr "36.35313301" "132.9435579" "70.77571731" "61.64946552" ...
## $ net_profit_over_inventory : chr "0.809084542" "14.9372214" "-0.256301524" "0.689734318" ...
## $ current_assets_minus_inventory_over_short_term_liabilities : chr "0.617366834" "3.974228113" "0.894991135" "0.970343764" ...
## $ inventory_times_365_over_cost_of_products_sold : chr "30.73059918" "1.685797747" "47.24909088" "40.80691807" ...
## $ EBITDA_profit_on_operating_activities_minus_depreciation_over_total_assets : chr "0.139657816" "0.170146135" "-0.025969977" "0.216729403" ...
## $ EBITDA_profit_on_operating_activities_minus_depreciation_over_sales : chr "0.06163928" "0.073831252" "-0.031458484" "0.094920716" ...
## $ current_assets_over_total_liabilities : chr "0.941651229" "4.015087615" "1.073177262" "1.264002613" ...
## $ short_term_liabilities_over_total_assets : chr "0.402678044" "0.234846036" "0.266457526" "0.775033011" ...
## $ short_term_liabilities_times_365_over_cost_of_products_sold : chr "0.191955456" "0.111752055" "0.361056749" "0.375547235" ...
## $ equity_over_fixed_assets : chr "0.775922489" "13.26017504" "1.034522543" "12.71667874" ...
## $ constant_capital_over_fixed_assets : chr "0.856485847" "13.25665936" "1.092347601" "12.71483821" ...
## $ working_capital : chr "800.5424075" "24093.00049" "851.1791996" "2577.61484" ...
## $ sales_minus_cost_of_products_sold_over_sales : chr "0.079571878" "0.095217706" "0.109034526" "0.10297114" ...
## $ current_assets_minus_inventory_minus_short_term_liabilities_over_sales_minus_gross_profit_minus_depreciation : chr "0.318285559" "0.195943466" "-0.035634075" "0.714527717" ...
## $ total_costs_overtotal_sales : chr "0.927686967" "0.907056134" "0.929825197" "0.937895435" ...
## $ long_term_liabilities_over_equity : chr "0.106818145" "-2.93E-05" "0.055331375" "-7.68E-05" ...
## $ sales_over_inventory : chr "12.94179729" "233.3016883" "8.589990831" "9.928221186" ...
## $ sales_over_receivables : chr "10.04458489" "2.747415818" "5.164737918" "5.912786248" ...
## $ short_term_liabilities_times_365_over_sales : chr "64.71369972" "36.96743452" "117.9240397" "123.7526101" ...
## $ sales_over_short_term_liabilities : chr "5.626894265" "9.813419478" "3.098272636" "2.946008382" ...
## $ sales_over_fixed_assets : chr "3.937064101" "40.56773918" "1.239710503" "130.2083212" ...
## $ class : int 1 0 0 0 0 0 0 0 0 0 ...
summary(df)
## id forecasting.period net_profit_over_total_assets
## Min. : 0 Min. :1.000 Length:25121
## 1st Qu.: 6280 1st Qu.:2.000 Class :character
## Median :12560 Median :3.000 Mode :character
## Mean :12560 Mean :2.609
## 3rd Qu.:18840 3rd Qu.:4.000
## Max. :25120 Max. :4.000
## total_liabilities_over_total_assets working_capital_over_total_assets
## Length:25121 Length:25121
## Class :character Class :character
## Mode :character Mode :character
##
##
##
## current_assets_over_short_term_liabilities
## Length:25121
## Class :character
## Mode :character
##
##
##
## cash_plus_short_term_securities_plus_receivables_minus_short_term_liabilities_over_operating_expenses_minus_depreciation_times_365
## Length:25121
## Class :character
## Mode :character
##
##
##
## retained_earnings_over_total_assets EBIT_over_total_assets
## Length:25121 Length:25121
## Class :character Class :character
## Mode :character Mode :character
##
##
##
## book_value_of_equity_over_total_liabilities sales_over_total_assets
## Length:25121 Length:25121
## Class :character Class :character
## Mode :character Mode :character
##
##
##
## equity_over_total_assets
## Length:25121
## Class :character
## Mode :character
##
##
##
## gross_profit_plus_extraordinary_items_plus_financial_expenses_over_total_assets
## Length:25121
## Class :character
## Mode :character
##
##
##
## gross_profit_over_short_term_liabilities
## Length:25121
## Class :character
## Mode :character
##
##
##
## gross_profit_plus_depreciation_over_sales
## Length:25121
## Class :character
## Mode :character
##
##
##
## gross_profit_plus_interest_over_total_assets
## Length:25121
## Class :character
## Mode :character
##
##
##
## total_liabilities_times_365_over_gross_profit_plus_depreciation
## Length:25121
## Class :character
## Mode :character
##
##
##
## gross_profit_plus_depreciation_over_total_liabilities
## Length:25121
## Class :character
## Mode :character
##
##
##
## total_assets_over_total_liabilities gross_profit_over_total_assets
## Length:25121 Length:25121
## Class :character Class :character
## Mode :character Mode :character
##
##
##
## gross_profit_over_sales inventory_times_365_over_sales
## Length:25121 Length:25121
## Class :character Class :character
## Mode :character Mode :character
##
##
##
## sales_n_over_sales_n_minus_1 profit_on_operating_activities_over_total_assets
## Length:25121 Length:25121
## Class :character Class :character
## Mode :character Mode :character
##
##
##
## net_profit_over_sales gross_profit_in_3_years_over_total_assets
## Length:25121 Length:25121
## Class :character Class :character
## Mode :character Mode :character
##
##
##
## equity_minus_share_capital_over_total_assets
## Length:25121
## Class :character
## Mode :character
##
##
##
## net_profit_plus_depreciation_over_total_liabilities
## Length:25121
## Class :character
## Mode :character
##
##
##
## profit_on_operating_activities_over_financial_expenses
## Length:25121
## Class :character
## Mode :character
##
##
##
## working_capital_over_fixed_assets logarithm_of_total_assets
## Length:25121 Length:25121
## Class :character Class :character
## Mode :character Mode :character
##
##
##
## total_liabilities_minus_cash_over_sales gross_profit_plus_interest_over_sales
## Length:25121 Length:25121
## Class :character Class :character
## Mode :character Mode :character
##
##
##
## current_liabilities_times_365_over_cost_of_products_sold
## Length:25121
## Class :character
## Mode :character
##
##
##
## operating_expenses_over_short_term_liabilities
## Length:25121
## Class :character
## Mode :character
##
##
##
## operating_expenses_over_total_liabilities profit_on_sales_over_total_assets
## Length:25121 Length:25121
## Class :character Class :character
## Mode :character Mode :character
##
##
##
## total_sales_over_total_assets
## Length:25121
## Class :character
## Mode :character
##
##
##
## current_assets_minus_inventories_over_long_term_liabilities
## Length:25121
## Class :character
## Mode :character
##
##
##
## constant_capital_over_total_assets profit_on_sales_over_sales
## Length:25121 Length:25121
## Class :character Class :character
## Mode :character Mode :character
##
##
##
## current_assets_minus_inventory_minus_receivables_over_short_term_liabilities
## Length:25121
## Class :character
## Mode :character
##
##
##
## total_liabilities_over_profit_on_operating_activities_plus_depreciation_times_12_over_365
## Length:25121
## Class :character
## Mode :character
##
##
##
## profit_on_operating_activities_over_sales
## Length:25121
## Class :character
## Mode :character
##
##
##
## rotation_receivables_plus_inventory_turnover_in_days
## Length:25121
## Class :character
## Mode :character
##
##
##
## receivables_times_365_over_sales net_profit_over_inventory
## Length:25121 Length:25121
## Class :character Class :character
## Mode :character Mode :character
##
##
##
## current_assets_minus_inventory_over_short_term_liabilities
## Length:25121
## Class :character
## Mode :character
##
##
##
## inventory_times_365_over_cost_of_products_sold
## Length:25121
## Class :character
## Mode :character
##
##
##
## EBITDA_profit_on_operating_activities_minus_depreciation_over_total_assets
## Length:25121
## Class :character
## Mode :character
##
##
##
## EBITDA_profit_on_operating_activities_minus_depreciation_over_sales
## Length:25121
## Class :character
## Mode :character
##
##
##
## current_assets_over_total_liabilities short_term_liabilities_over_total_assets
## Length:25121 Length:25121
## Class :character Class :character
## Mode :character Mode :character
##
##
##
## short_term_liabilities_times_365_over_cost_of_products_sold
## Length:25121
## Class :character
## Mode :character
##
##
##
## equity_over_fixed_assets constant_capital_over_fixed_assets working_capital
## Length:25121 Length:25121 Length:25121
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## sales_minus_cost_of_products_sold_over_sales
## Length:25121
## Class :character
## Mode :character
##
##
##
## current_assets_minus_inventory_minus_short_term_liabilities_over_sales_minus_gross_profit_minus_depreciation
## Length:25121
## Class :character
## Mode :character
##
##
##
## total_costs_overtotal_sales long_term_liabilities_over_equity
## Length:25121 Length:25121
## Class :character Class :character
## Mode :character Mode :character
##
##
##
## sales_over_inventory sales_over_receivables
## Length:25121 Length:25121
## Class :character Class :character
## Mode :character Mode :character
##
##
##
## short_term_liabilities_times_365_over_sales sales_over_short_term_liabilities
## Length:25121 Length:25121
## Class :character Class :character
## Mode :character Mode :character
##
##
##
## sales_over_fixed_assets class
## Length:25121 Min. :0.00000
## Class :character 1st Qu.:0.00000
## Mode :character Median :0.00000
## Mean :0.04482
## 3rd Qu.:0.00000
## Max. :1.00000
head(df, 8)
tail(df, 6)
library(knitr)
# Basic type table
var_table <- data.frame(
variable = names(df),
R_inferred_class = sapply(df, function(x) class(x)[1]),
n_unique = sapply(df, function(x) length(unique(x))),
n_missing = sapply(df, function(x) sum(is.na(x))),
stringsAsFactors = FALSE
)
kable(head(var_table, 30))
| variable | R_inferred_class | n_unique | n_missing | |
|---|---|---|---|---|
| id | id | integer | 25121 | 0 |
| forecasting.period | forecasting.period | integer | 4 | 0 |
| net_profit_over_total_assets | net_profit_over_total_assets | character | 25114 | 0 |
| total_liabilities_over_total_assets | total_liabilities_over_total_assets | character | 25118 | 0 |
| working_capital_over_total_assets | working_capital_over_total_assets | character | 25119 | 0 |
| current_assets_over_short_term_liabilities | current_assets_over_short_term_liabilities | character | 25044 | 0 |
| cash_plus_short_term_securities_plus_receivables_minus_short_term_liabilities_over_operating_expenses_minus_depreciation_times_365 | cash_plus_short_term_securities_plus_receivables_minus_short_term_liabilities_over_operating_expenses_minus_depreciation_times_365 | character | 25065 | 0 |
| retained_earnings_over_total_assets | retained_earnings_over_total_assets | character | 17459 | 0 |
| EBIT_over_total_assets | EBIT_over_total_assets | character | 25116 | 0 |
| book_value_of_equity_over_total_liabilities | book_value_of_equity_over_total_liabilities | character | 25072 | 0 |
| sales_over_total_assets | sales_over_total_assets | character | 25115 | 0 |
| equity_over_total_assets | equity_over_total_assets | character | 25119 | 0 |
| gross_profit_plus_extraordinary_items_plus_financial_expenses_over_total_assets | gross_profit_plus_extraordinary_items_plus_financial_expenses_over_total_assets | character | 25096 | 0 |
| gross_profit_over_short_term_liabilities | gross_profit_over_short_term_liabilities | character | 25044 | 0 |
| gross_profit_plus_depreciation_over_sales | gross_profit_plus_depreciation_over_sales | character | 25045 | 0 |
| gross_profit_plus_interest_over_total_assets | gross_profit_plus_interest_over_total_assets | character | 25117 | 0 |
| total_liabilities_times_365_over_gross_profit_plus_depreciation | total_liabilities_times_365_over_gross_profit_plus_depreciation | character | 25100 | 0 |
| gross_profit_plus_depreciation_over_total_liabilities | gross_profit_plus_depreciation_over_total_liabilities | character | 25072 | 0 |
| total_assets_over_total_liabilities | total_assets_over_total_liabilities | character | 25073 | 0 |
| gross_profit_over_total_assets | gross_profit_over_total_assets | character | 25118 | 0 |
| gross_profit_over_sales | gross_profit_over_sales | character | 25040 | 0 |
| inventory_times_365_over_sales | inventory_times_365_over_sales | character | 25045 | 0 |
| sales_n_over_sales_n_minus_1 | sales_n_over_sales_n_minus_1 | character | 21210 | 0 |
| profit_on_operating_activities_over_total_assets | profit_on_operating_activities_over_total_assets | character | 24243 | 0 |
| net_profit_over_sales | net_profit_over_sales | character | 25037 | 0 |
| gross_profit_in_3_years_over_total_assets | gross_profit_in_3_years_over_total_assets | character | 24598 | 0 |
| equity_minus_share_capital_over_total_assets | equity_minus_share_capital_over_total_assets | character | 25119 | 0 |
| net_profit_plus_depreciation_over_total_liabilities | net_profit_plus_depreciation_over_total_liabilities | character | 25071 | 0 |
| profit_on_operating_activities_over_financial_expenses | profit_on_operating_activities_over_financial_expenses | character | 23522 | 0 |
| working_capital_over_fixed_assets | working_capital_over_fixed_assets | character | 24631 | 0 |
# Save for manual review if desired
write.csv(var_table, "initial_variable_table.csv", row.names = FALSE)
df$id <- as.character(df$id)
saveRDS(df, "df_after_recoding.rds")
suggest_type <- function(x){
if(is.numeric(x)){
nuniq <- length(unique(na.omit(x)))
disc_or_cont <- ifelse(nuniq <= 10, "discrete", "continuous")
# assume ratio if non-negative and has meaningful zero
if(all(na.omit(x) >= 0)) stevens <- "ratio (probable)" else stevens <- "interval (possible)"
return(c("quantitative", stevens, disc_or_cont, nuniq))
} else {
# categorical
uniqvals <- tolower(as.character(unique(na.omit(x))))
ord_keywords <- c("low","medium","high","very low","very high","grade","freshman","sophomore","junior","senior","agree","disagree")
if(any(sapply(ord_keywords, function(k) any(grepl(k, uniqvals))))) stevens <- "ordinal (probable)" else stevens <- "nominal (probable)"
return(c("qualitative", stevens, "discrete", length(uniqvals)))
}
}
suggestions <- t(sapply(df, suggest_type))
colnames(suggestions) <- c("quant_vs_qual", "stevens_typology", "discrete_vs_continuous", "n_unique")
suggestions <- data.frame(variable=rownames(suggestions), suggestions, row.names = NULL, stringsAsFactors = FALSE)
kable(head(suggestions, 50))
| variable | quant_vs_qual | stevens_typology | discrete_vs_continuous | n_unique |
|---|---|---|---|---|
| id | qualitative | nominal (probable) | discrete | 25121 |
| forecasting.period | quantitative | ratio (probable) | discrete | 4 |
| net_profit_over_total_assets | qualitative | nominal (probable) | discrete | 25114 |
| total_liabilities_over_total_assets | qualitative | nominal (probable) | discrete | 25118 |
| working_capital_over_total_assets | qualitative | nominal (probable) | discrete | 25119 |
| current_assets_over_short_term_liabilities | qualitative | nominal (probable) | discrete | 25044 |
| cash_plus_short_term_securities_plus_receivables_minus_short_term_liabilities_over_operating_expenses_minus_depreciation_times_365 | qualitative | nominal (probable) | discrete | 25065 |
| retained_earnings_over_total_assets | qualitative | nominal (probable) | discrete | 17459 |
| EBIT_over_total_assets | qualitative | nominal (probable) | discrete | 25116 |
| book_value_of_equity_over_total_liabilities | qualitative | nominal (probable) | discrete | 25072 |
| sales_over_total_assets | qualitative | nominal (probable) | discrete | 25115 |
| equity_over_total_assets | qualitative | nominal (probable) | discrete | 25119 |
| gross_profit_plus_extraordinary_items_plus_financial_expenses_over_total_assets | qualitative | nominal (probable) | discrete | 25096 |
| gross_profit_over_short_term_liabilities | qualitative | nominal (probable) | discrete | 25044 |
| gross_profit_plus_depreciation_over_sales | qualitative | nominal (probable) | discrete | 25045 |
| gross_profit_plus_interest_over_total_assets | qualitative | nominal (probable) | discrete | 25117 |
| total_liabilities_times_365_over_gross_profit_plus_depreciation | qualitative | nominal (probable) | discrete | 25100 |
| gross_profit_plus_depreciation_over_total_liabilities | qualitative | nominal (probable) | discrete | 25072 |
| total_assets_over_total_liabilities | qualitative | nominal (probable) | discrete | 25073 |
| gross_profit_over_total_assets | qualitative | nominal (probable) | discrete | 25118 |
| gross_profit_over_sales | qualitative | nominal (probable) | discrete | 25040 |
| inventory_times_365_over_sales | qualitative | nominal (probable) | discrete | 25045 |
| sales_n_over_sales_n_minus_1 | qualitative | nominal (probable) | discrete | 21210 |
| profit_on_operating_activities_over_total_assets | qualitative | nominal (probable) | discrete | 24243 |
| net_profit_over_sales | qualitative | nominal (probable) | discrete | 25037 |
| gross_profit_in_3_years_over_total_assets | qualitative | nominal (probable) | discrete | 24598 |
| equity_minus_share_capital_over_total_assets | qualitative | nominal (probable) | discrete | 25119 |
| net_profit_plus_depreciation_over_total_liabilities | qualitative | nominal (probable) | discrete | 25071 |
| profit_on_operating_activities_over_financial_expenses | qualitative | nominal (probable) | discrete | 23522 |
| working_capital_over_fixed_assets | qualitative | nominal (probable) | discrete | 24631 |
| logarithm_of_total_assets | qualitative | nominal (probable) | discrete | 25111 |
| total_liabilities_minus_cash_over_sales | qualitative | nominal (probable) | discrete | 25045 |
| gross_profit_plus_interest_over_sales | qualitative | nominal (probable) | discrete | 25044 |
| current_liabilities_times_365_over_cost_of_products_sold | qualitative | nominal (probable) | discrete | 24898 |
| operating_expenses_over_short_term_liabilities | qualitative | nominal (probable) | discrete | 25040 |
| operating_expenses_over_total_liabilities | qualitative | nominal (probable) | discrete | 25070 |
| profit_on_sales_over_total_assets | qualitative | nominal (probable) | discrete | 25106 |
| total_sales_over_total_assets | qualitative | nominal (probable) | discrete | 25119 |
| current_assets_minus_inventories_over_long_term_liabilities | qualitative | nominal (probable) | discrete | 13969 |
| constant_capital_over_total_assets | qualitative | nominal (probable) | discrete | 25119 |
| profit_on_sales_over_sales | qualitative | nominal (probable) | discrete | 25024 |
| current_assets_minus_inventory_minus_receivables_over_short_term_liabilities | qualitative | nominal (probable) | discrete | 25044 |
| total_liabilities_over_profit_on_operating_activities_plus_depreciation_times_12_over_365 | qualitative | nominal (probable) | discrete | 24665 |
| profit_on_operating_activities_over_sales | qualitative | nominal (probable) | discrete | 24007 |
| rotation_receivables_plus_inventory_turnover_in_days | qualitative | nominal (probable) | discrete | 25045 |
| receivables_times_365_over_sales | qualitative | nominal (probable) | discrete | 25045 |
| net_profit_over_inventory | qualitative | nominal (probable) | discrete | 23843 |
| current_assets_minus_inventory_over_short_term_liabilities | qualitative | nominal (probable) | discrete | 25043 |
| inventory_times_365_over_cost_of_products_sold | qualitative | nominal (probable) | discrete | 24938 |
| EBITDA_profit_on_operating_activities_minus_depreciation_over_total_assets | qualitative | nominal (probable) | discrete | 25050 |
write.csv(suggestions, "suggested_variable_classifications_from_R.csv", row.names = FALSE)
Why Understanding Variable Types Matters
Knowing your variable types is one of the most important parts of EDA because it shapes every decision you make in analysis. Each type—nominal, ordinal, interval, or ratio—tells you what kind of math, visuals, and models make sense. For example, you summarize categorical variables with counts or percentages, not averages, while numeric variables can use means and standard deviations.
When it comes to visuals, variable type guides how you show data: bar charts for categories, histograms or scatterplots for numbers. Getting this wrong can hide real patterns or make the data look misleading.
In modeling, knowing which variables are categorical or continuous helps decide how to prepare the data—like creating dummy variables or scaling numeric ones. Overall, understanding variable types helps keep your analysis accurate, your visuals meaningful, and your models reliable.
write.csv(df, "df_prepared_for_EDA.csv", row.names = FALSE)