setwd("C:/Users/cdaniels/Downloads")
my_data <- read.csv("train 14(in).csv", header = TRUE)
file_path <- "train 14(in).csv"
df <- read.csv(file_path, stringsAsFactors = FALSE, na.strings = c("", "NA"))
nrow(df); ncol(df)
## [1] 25121
## [1] 67
str(df)
## 'data.frame':    25121 obs. of  67 variables:
##  $ id                                                                                                                                : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ forecasting.period                                                                                                                : int  4 4 1 2 4 4 2 2 4 2 ...
##  $ net_profit_over_total_assets                                                                                                      : chr  "0.141939282" "0.147560479" "-0.024542174" "0.15931717" ...
##  $ total_liabilities_over_total_assets                                                                                               : chr  "0.450292264" "0.234839979" "0.311544325" "0.777016886" ...
##  $ working_capital_over_total_assets                                                                                                 : chr  "0.021421962" "0.708334845" "0.067830648" "0.207430847" ...
##  $ current_assets_over_short_term_liabilities                                                                                        : chr  "1.052986556" "4.016018367" "1.255247103" "1.267310049" ...
##  $ cash_plus_short_term_securities_plus_receivables_minus_short_term_liabilities_over_operating_expenses_minus_depreciation_times_365: chr  "-28.52908379" "119.4456107" "-15.01992269" "-4.892917756" ...
##  $ retained_earnings_over_total_assets                                                                                               : chr  "0.141933515" "0.457089625" "-0.657002778" "-5.19E-06" ...
##  $ EBIT_over_total_assets                                                                                                            : chr  "0.172629003" "0.186069671" "-0.024536711" "0.159321164" ...
##  $ book_value_of_equity_over_total_liabilities                                                                                       : chr  "0.990337006" "3.206444965" "2.209000358" "0.288177363" ...
##  $ sales_over_total_assets                                                                                                           : chr  "1.087355901" "1.093404091" "0.82557841" "2.283531974" ...
##  $ equity_over_total_assets                                                                                                          : chr  "0.445947831" "0.753147272" "0.68845271" "0.222975074" ...
##  $ gross_profit_plus_extraordinary_items_plus_financial_expenses_over_total_assets                                                   : chr  "0.172639901" "0.186074127" "-0.024536117" "0.401454214" ...
##  $ gross_profit_over_short_term_liabilities                                                                                          : chr  "0.428585346" "0.792273513" "-0.092083189" "0.205514237" ...
##  $ gross_profit_plus_depreciation_over_sales                                                                                         : chr  "0.098482024" "0.086799659" "0.001798631" "0.070944185" ...
##  $ gross_profit_plus_interest_over_total_assets                                                                                      : chr  "0.172623756" "0.186064405" "-0.024538477" "0.159313154" ...
##  $ total_liabilities_times_365_over_gross_profit_plus_depreciation                                                                   : chr  "737.5983611" "427.990535" "79930.04047" "1750.629791" ...
##  $ gross_profit_plus_depreciation_over_total_liabilities                                                                             : chr  "0.495021686" "0.852970183" "0.004578722" "0.208547924" ...
##  $ total_assets_over_total_liabilities                                                                                               : chr  "2.220461536" "4.257780528" "3.209402349" "1.286706862" ...
##  $ gross_profit_over_total_assets                                                                                                    : chr  "0.172636989" "0.186069814" "-0.024531282" "0.159326426" ...
##  $ gross_profit_over_sales                                                                                                           : chr  "0.076197623" "0.080696018" "-0.029641659" "0.069795748" ...
##  $ inventory_times_365_over_sales                                                                                                    : chr  "28.29928484" "1.542870004" "42.33579188" "36.81410415" ...
##  $ sales_n_over_sales_n_minus_1                                                                                                      : chr  "1.028425054" "1.051071834" "1.369948775" "?" ...
##  $ profit_on_operating_activities_over_total_assets                                                                                  : chr  "0.189883384" "0.184396365" "3.03E-06" "0.219399533" ...
##  $ net_profit_over_sales                                                                                                             : chr  "0.062674528" "0.064051056" "-0.029717478" "0.069806285" ...
##  $ gross_profit_in_3_years_over_total_assets                                                                                         : chr  "0.172627309" "0.576977247" "-0.105845027" "?" ...
##  $ equity_minus_share_capital_over_total_assets                                                                                      : chr  "0.44597852" "0.753142175" "-0.681545104" "0.126421763" ...
##  $ net_profit_plus_depreciation_over_total_liabilities                                                                               : chr  "0.426728805" "0.688728876" "0.004576887" "0.208290256" ...
##  $ profit_on_operating_activities_over_financial_expenses                                                                            : chr  "?" "0.839642289" "?" "0.829707025" ...
##  $ working_capital_over_fixed_assets                                                                                                 : chr  "0.037082538" "12.4681881" "0.101929962" "11.82908241" ...
##  $ logarithm_of_total_assets                                                                                                         : chr  "4.572500412" "4.531599778" "4.098599267" "4.094300228" ...
##  $ total_liabilities_minus_cash_over_sales                                                                                           : chr  "0.192113491" "0.062276517" "0.285022352" "0.181123207" ...
##  $ gross_profit_plus_interest_over_sales                                                                                             : chr  "0.076140262" "0.08074136" "-0.059420243" "0.069739439" ...
##  $ current_liabilities_times_365_over_cost_of_products_sold                                                                          : chr  "70.55540863" "40.79720761" "131.6358562" "137.2042179" ...
##  $ operating_expenses_over_short_term_liabilities                                                                                    : chr  "5.173970681" "8.975367733" "2.773073593" "2.662950931" ...
##  $ operating_expenses_over_total_liabilities                                                                                         : chr  "0.421744751" "0.785422271" "2.371519482" "2.656136523" ...
##  $ profit_on_sales_over_total_assets                                                                                                 : chr  "0.189890183" "0.184388267" "0.004913423" "0.219739891" ...
##  $ total_sales_over_total_assets                                                                                                     : chr  "2.290576424" "2.314395629" "0.825596769" "2.28351007" ...
##  $ current_assets_minus_inventories_over_long_term_liabilities                                                                       : chr  "5.224748458" "?" "6.267797656" "?" ...
##  $ constant_capital_over_total_assets                                                                                                : chr  "0.493564626" "0.753162904" "0.72649338" "0.222963457" ...
##  $ profit_on_sales_over_sales                                                                                                        : chr  "0.083730273" "0.079966362" "0.005919707" "0.09620642" ...
##  $ current_assets_minus_inventory_minus_receivables_over_short_term_liabilities                                                      : chr  "0.057484471" "0.40272699" "0.295161911" "0.471850147" ...
##  $ total_liabilities_over_profit_on_operating_activities_plus_depreciation_times_12_over_365                                         : chr  "0.059289849" "0.039095725" "0.400987929" "0.116465754" ...
##  $ profit_on_operating_activities_over_sales                                                                                         : chr  "0.083764648" "0.079992035" "-1.00E-05" "0.096058248" ...
##  $ rotation_receivables_plus_inventory_turnover_in_days                                                                              : chr  "64.960411" "134.2431965" "112.7638256" "98.73102072" ...
##  $ receivables_times_365_over_sales                                                                                                  : chr  "36.35313301" "132.9435579" "70.77571731" "61.64946552" ...
##  $ net_profit_over_inventory                                                                                                         : chr  "0.809084542" "14.9372214" "-0.256301524" "0.689734318" ...
##  $ current_assets_minus_inventory_over_short_term_liabilities                                                                        : chr  "0.617366834" "3.974228113" "0.894991135" "0.970343764" ...
##  $ inventory_times_365_over_cost_of_products_sold                                                                                    : chr  "30.73059918" "1.685797747" "47.24909088" "40.80691807" ...
##  $ EBITDA_profit_on_operating_activities_minus_depreciation_over_total_assets                                                        : chr  "0.139657816" "0.170146135" "-0.025969977" "0.216729403" ...
##  $ EBITDA_profit_on_operating_activities_minus_depreciation_over_sales                                                               : chr  "0.06163928" "0.073831252" "-0.031458484" "0.094920716" ...
##  $ current_assets_over_total_liabilities                                                                                             : chr  "0.941651229" "4.015087615" "1.073177262" "1.264002613" ...
##  $ short_term_liabilities_over_total_assets                                                                                          : chr  "0.402678044" "0.234846036" "0.266457526" "0.775033011" ...
##  $ short_term_liabilities_times_365_over_cost_of_products_sold                                                                       : chr  "0.191955456" "0.111752055" "0.361056749" "0.375547235" ...
##  $ equity_over_fixed_assets                                                                                                          : chr  "0.775922489" "13.26017504" "1.034522543" "12.71667874" ...
##  $ constant_capital_over_fixed_assets                                                                                                : chr  "0.856485847" "13.25665936" "1.092347601" "12.71483821" ...
##  $ working_capital                                                                                                                   : chr  "800.5424075" "24093.00049" "851.1791996" "2577.61484" ...
##  $ sales_minus_cost_of_products_sold_over_sales                                                                                      : chr  "0.079571878" "0.095217706" "0.109034526" "0.10297114" ...
##  $ current_assets_minus_inventory_minus_short_term_liabilities_over_sales_minus_gross_profit_minus_depreciation                      : chr  "0.318285559" "0.195943466" "-0.035634075" "0.714527717" ...
##  $ total_costs_overtotal_sales                                                                                                       : chr  "0.927686967" "0.907056134" "0.929825197" "0.937895435" ...
##  $ long_term_liabilities_over_equity                                                                                                 : chr  "0.106818145" "-2.93E-05" "0.055331375" "-7.68E-05" ...
##  $ sales_over_inventory                                                                                                              : chr  "12.94179729" "233.3016883" "8.589990831" "9.928221186" ...
##  $ sales_over_receivables                                                                                                            : chr  "10.04458489" "2.747415818" "5.164737918" "5.912786248" ...
##  $ short_term_liabilities_times_365_over_sales                                                                                       : chr  "64.71369972" "36.96743452" "117.9240397" "123.7526101" ...
##  $ sales_over_short_term_liabilities                                                                                                 : chr  "5.626894265" "9.813419478" "3.098272636" "2.946008382" ...
##  $ sales_over_fixed_assets                                                                                                           : chr  "3.937064101" "40.56773918" "1.239710503" "130.2083212" ...
##  $ class                                                                                                                             : int  1 0 0 0 0 0 0 0 0 0 ...
summary(df)
##        id        forecasting.period net_profit_over_total_assets
##  Min.   :    0   Min.   :1.000      Length:25121                
##  1st Qu.: 6280   1st Qu.:2.000      Class :character            
##  Median :12560   Median :3.000      Mode  :character            
##  Mean   :12560   Mean   :2.609                                  
##  3rd Qu.:18840   3rd Qu.:4.000                                  
##  Max.   :25120   Max.   :4.000                                  
##  total_liabilities_over_total_assets working_capital_over_total_assets
##  Length:25121                        Length:25121                     
##  Class :character                    Class :character                 
##  Mode  :character                    Mode  :character                 
##                                                                       
##                                                                       
##                                                                       
##  current_assets_over_short_term_liabilities
##  Length:25121                              
##  Class :character                          
##  Mode  :character                          
##                                            
##                                            
##                                            
##  cash_plus_short_term_securities_plus_receivables_minus_short_term_liabilities_over_operating_expenses_minus_depreciation_times_365
##  Length:25121                                                                                                                      
##  Class :character                                                                                                                  
##  Mode  :character                                                                                                                  
##                                                                                                                                    
##                                                                                                                                    
##                                                                                                                                    
##  retained_earnings_over_total_assets EBIT_over_total_assets
##  Length:25121                        Length:25121          
##  Class :character                    Class :character      
##  Mode  :character                    Mode  :character      
##                                                            
##                                                            
##                                                            
##  book_value_of_equity_over_total_liabilities sales_over_total_assets
##  Length:25121                                Length:25121           
##  Class :character                            Class :character       
##  Mode  :character                            Mode  :character       
##                                                                     
##                                                                     
##                                                                     
##  equity_over_total_assets
##  Length:25121            
##  Class :character        
##  Mode  :character        
##                          
##                          
##                          
##  gross_profit_plus_extraordinary_items_plus_financial_expenses_over_total_assets
##  Length:25121                                                                   
##  Class :character                                                               
##  Mode  :character                                                               
##                                                                                 
##                                                                                 
##                                                                                 
##  gross_profit_over_short_term_liabilities
##  Length:25121                            
##  Class :character                        
##  Mode  :character                        
##                                          
##                                          
##                                          
##  gross_profit_plus_depreciation_over_sales
##  Length:25121                             
##  Class :character                         
##  Mode  :character                         
##                                           
##                                           
##                                           
##  gross_profit_plus_interest_over_total_assets
##  Length:25121                                
##  Class :character                            
##  Mode  :character                            
##                                              
##                                              
##                                              
##  total_liabilities_times_365_over_gross_profit_plus_depreciation
##  Length:25121                                                   
##  Class :character                                               
##  Mode  :character                                               
##                                                                 
##                                                                 
##                                                                 
##  gross_profit_plus_depreciation_over_total_liabilities
##  Length:25121                                         
##  Class :character                                     
##  Mode  :character                                     
##                                                       
##                                                       
##                                                       
##  total_assets_over_total_liabilities gross_profit_over_total_assets
##  Length:25121                        Length:25121                  
##  Class :character                    Class :character              
##  Mode  :character                    Mode  :character              
##                                                                    
##                                                                    
##                                                                    
##  gross_profit_over_sales inventory_times_365_over_sales
##  Length:25121            Length:25121                  
##  Class :character        Class :character              
##  Mode  :character        Mode  :character              
##                                                        
##                                                        
##                                                        
##  sales_n_over_sales_n_minus_1 profit_on_operating_activities_over_total_assets
##  Length:25121                 Length:25121                                    
##  Class :character             Class :character                                
##  Mode  :character             Mode  :character                                
##                                                                               
##                                                                               
##                                                                               
##  net_profit_over_sales gross_profit_in_3_years_over_total_assets
##  Length:25121          Length:25121                             
##  Class :character      Class :character                         
##  Mode  :character      Mode  :character                         
##                                                                 
##                                                                 
##                                                                 
##  equity_minus_share_capital_over_total_assets
##  Length:25121                                
##  Class :character                            
##  Mode  :character                            
##                                              
##                                              
##                                              
##  net_profit_plus_depreciation_over_total_liabilities
##  Length:25121                                       
##  Class :character                                   
##  Mode  :character                                   
##                                                     
##                                                     
##                                                     
##  profit_on_operating_activities_over_financial_expenses
##  Length:25121                                          
##  Class :character                                      
##  Mode  :character                                      
##                                                        
##                                                        
##                                                        
##  working_capital_over_fixed_assets logarithm_of_total_assets
##  Length:25121                      Length:25121             
##  Class :character                  Class :character         
##  Mode  :character                  Mode  :character         
##                                                             
##                                                             
##                                                             
##  total_liabilities_minus_cash_over_sales gross_profit_plus_interest_over_sales
##  Length:25121                            Length:25121                         
##  Class :character                        Class :character                     
##  Mode  :character                        Mode  :character                     
##                                                                               
##                                                                               
##                                                                               
##  current_liabilities_times_365_over_cost_of_products_sold
##  Length:25121                                            
##  Class :character                                        
##  Mode  :character                                        
##                                                          
##                                                          
##                                                          
##  operating_expenses_over_short_term_liabilities
##  Length:25121                                  
##  Class :character                              
##  Mode  :character                              
##                                                
##                                                
##                                                
##  operating_expenses_over_total_liabilities profit_on_sales_over_total_assets
##  Length:25121                              Length:25121                     
##  Class :character                          Class :character                 
##  Mode  :character                          Mode  :character                 
##                                                                             
##                                                                             
##                                                                             
##  total_sales_over_total_assets
##  Length:25121                 
##  Class :character             
##  Mode  :character             
##                               
##                               
##                               
##  current_assets_minus_inventories_over_long_term_liabilities
##  Length:25121                                               
##  Class :character                                           
##  Mode  :character                                           
##                                                             
##                                                             
##                                                             
##  constant_capital_over_total_assets profit_on_sales_over_sales
##  Length:25121                       Length:25121              
##  Class :character                   Class :character          
##  Mode  :character                   Mode  :character          
##                                                               
##                                                               
##                                                               
##  current_assets_minus_inventory_minus_receivables_over_short_term_liabilities
##  Length:25121                                                                
##  Class :character                                                            
##  Mode  :character                                                            
##                                                                              
##                                                                              
##                                                                              
##  total_liabilities_over_profit_on_operating_activities_plus_depreciation_times_12_over_365
##  Length:25121                                                                             
##  Class :character                                                                         
##  Mode  :character                                                                         
##                                                                                           
##                                                                                           
##                                                                                           
##  profit_on_operating_activities_over_sales
##  Length:25121                             
##  Class :character                         
##  Mode  :character                         
##                                           
##                                           
##                                           
##  rotation_receivables_plus_inventory_turnover_in_days
##  Length:25121                                        
##  Class :character                                    
##  Mode  :character                                    
##                                                      
##                                                      
##                                                      
##  receivables_times_365_over_sales net_profit_over_inventory
##  Length:25121                     Length:25121             
##  Class :character                 Class :character         
##  Mode  :character                 Mode  :character         
##                                                            
##                                                            
##                                                            
##  current_assets_minus_inventory_over_short_term_liabilities
##  Length:25121                                              
##  Class :character                                          
##  Mode  :character                                          
##                                                            
##                                                            
##                                                            
##  inventory_times_365_over_cost_of_products_sold
##  Length:25121                                  
##  Class :character                              
##  Mode  :character                              
##                                                
##                                                
##                                                
##  EBITDA_profit_on_operating_activities_minus_depreciation_over_total_assets
##  Length:25121                                                              
##  Class :character                                                          
##  Mode  :character                                                          
##                                                                            
##                                                                            
##                                                                            
##  EBITDA_profit_on_operating_activities_minus_depreciation_over_sales
##  Length:25121                                                       
##  Class :character                                                   
##  Mode  :character                                                   
##                                                                     
##                                                                     
##                                                                     
##  current_assets_over_total_liabilities short_term_liabilities_over_total_assets
##  Length:25121                          Length:25121                            
##  Class :character                      Class :character                        
##  Mode  :character                      Mode  :character                        
##                                                                                
##                                                                                
##                                                                                
##  short_term_liabilities_times_365_over_cost_of_products_sold
##  Length:25121                                               
##  Class :character                                           
##  Mode  :character                                           
##                                                             
##                                                             
##                                                             
##  equity_over_fixed_assets constant_capital_over_fixed_assets working_capital   
##  Length:25121             Length:25121                       Length:25121      
##  Class :character         Class :character                   Class :character  
##  Mode  :character         Mode  :character                   Mode  :character  
##                                                                                
##                                                                                
##                                                                                
##  sales_minus_cost_of_products_sold_over_sales
##  Length:25121                                
##  Class :character                            
##  Mode  :character                            
##                                              
##                                              
##                                              
##  current_assets_minus_inventory_minus_short_term_liabilities_over_sales_minus_gross_profit_minus_depreciation
##  Length:25121                                                                                                
##  Class :character                                                                                            
##  Mode  :character                                                                                            
##                                                                                                              
##                                                                                                              
##                                                                                                              
##  total_costs_overtotal_sales long_term_liabilities_over_equity
##  Length:25121                Length:25121                     
##  Class :character            Class :character                 
##  Mode  :character            Mode  :character                 
##                                                               
##                                                               
##                                                               
##  sales_over_inventory sales_over_receivables
##  Length:25121         Length:25121          
##  Class :character     Class :character      
##  Mode  :character     Mode  :character      
##                                             
##                                             
##                                             
##  short_term_liabilities_times_365_over_sales sales_over_short_term_liabilities
##  Length:25121                                Length:25121                     
##  Class :character                            Class :character                 
##  Mode  :character                            Mode  :character                 
##                                                                               
##                                                                               
##                                                                               
##  sales_over_fixed_assets     class        
##  Length:25121            Min.   :0.00000  
##  Class :character        1st Qu.:0.00000  
##  Mode  :character        Median :0.00000  
##                          Mean   :0.04482  
##                          3rd Qu.:0.00000  
##                          Max.   :1.00000
head(df, 8)
tail(df, 6)
library(knitr)
# Basic type table
var_table <- data.frame(
  variable = names(df),
  R_inferred_class = sapply(df, function(x) class(x)[1]),
  n_unique = sapply(df, function(x) length(unique(x))),
  n_missing = sapply(df, function(x) sum(is.na(x))),
  stringsAsFactors = FALSE
)
kable(head(var_table, 30))
variable R_inferred_class n_unique n_missing
id id integer 25121 0
forecasting.period forecasting.period integer 4 0
net_profit_over_total_assets net_profit_over_total_assets character 25114 0
total_liabilities_over_total_assets total_liabilities_over_total_assets character 25118 0
working_capital_over_total_assets working_capital_over_total_assets character 25119 0
current_assets_over_short_term_liabilities current_assets_over_short_term_liabilities character 25044 0
cash_plus_short_term_securities_plus_receivables_minus_short_term_liabilities_over_operating_expenses_minus_depreciation_times_365 cash_plus_short_term_securities_plus_receivables_minus_short_term_liabilities_over_operating_expenses_minus_depreciation_times_365 character 25065 0
retained_earnings_over_total_assets retained_earnings_over_total_assets character 17459 0
EBIT_over_total_assets EBIT_over_total_assets character 25116 0
book_value_of_equity_over_total_liabilities book_value_of_equity_over_total_liabilities character 25072 0
sales_over_total_assets sales_over_total_assets character 25115 0
equity_over_total_assets equity_over_total_assets character 25119 0
gross_profit_plus_extraordinary_items_plus_financial_expenses_over_total_assets gross_profit_plus_extraordinary_items_plus_financial_expenses_over_total_assets character 25096 0
gross_profit_over_short_term_liabilities gross_profit_over_short_term_liabilities character 25044 0
gross_profit_plus_depreciation_over_sales gross_profit_plus_depreciation_over_sales character 25045 0
gross_profit_plus_interest_over_total_assets gross_profit_plus_interest_over_total_assets character 25117 0
total_liabilities_times_365_over_gross_profit_plus_depreciation total_liabilities_times_365_over_gross_profit_plus_depreciation character 25100 0
gross_profit_plus_depreciation_over_total_liabilities gross_profit_plus_depreciation_over_total_liabilities character 25072 0
total_assets_over_total_liabilities total_assets_over_total_liabilities character 25073 0
gross_profit_over_total_assets gross_profit_over_total_assets character 25118 0
gross_profit_over_sales gross_profit_over_sales character 25040 0
inventory_times_365_over_sales inventory_times_365_over_sales character 25045 0
sales_n_over_sales_n_minus_1 sales_n_over_sales_n_minus_1 character 21210 0
profit_on_operating_activities_over_total_assets profit_on_operating_activities_over_total_assets character 24243 0
net_profit_over_sales net_profit_over_sales character 25037 0
gross_profit_in_3_years_over_total_assets gross_profit_in_3_years_over_total_assets character 24598 0
equity_minus_share_capital_over_total_assets equity_minus_share_capital_over_total_assets character 25119 0
net_profit_plus_depreciation_over_total_liabilities net_profit_plus_depreciation_over_total_liabilities character 25071 0
profit_on_operating_activities_over_financial_expenses profit_on_operating_activities_over_financial_expenses character 23522 0
working_capital_over_fixed_assets working_capital_over_fixed_assets character 24631 0
# Save for manual review if desired
write.csv(var_table, "initial_variable_table.csv", row.names = FALSE)
df$id <- as.character(df$id)
saveRDS(df, "df_after_recoding.rds")
suggest_type <- function(x){
  if(is.numeric(x)){
    nuniq <- length(unique(na.omit(x)))
    disc_or_cont <- ifelse(nuniq <= 10, "discrete", "continuous")
    # assume ratio if non-negative and has meaningful zero
    if(all(na.omit(x) >= 0)) stevens <- "ratio (probable)" else stevens <- "interval (possible)"
    return(c("quantitative", stevens, disc_or_cont, nuniq))
  } else {
    # categorical
    uniqvals <- tolower(as.character(unique(na.omit(x))))
    ord_keywords <- c("low","medium","high","very low","very high","grade","freshman","sophomore","junior","senior","agree","disagree")
    if(any(sapply(ord_keywords, function(k) any(grepl(k, uniqvals))))) stevens <- "ordinal (probable)" else stevens <- "nominal (probable)"
    return(c("qualitative", stevens, "discrete", length(uniqvals)))
  }
}

suggestions <- t(sapply(df, suggest_type))
colnames(suggestions) <- c("quant_vs_qual", "stevens_typology", "discrete_vs_continuous", "n_unique")
suggestions <- data.frame(variable=rownames(suggestions), suggestions, row.names = NULL, stringsAsFactors = FALSE)
kable(head(suggestions, 50))
variable quant_vs_qual stevens_typology discrete_vs_continuous n_unique
id qualitative nominal (probable) discrete 25121
forecasting.period quantitative ratio (probable) discrete 4
net_profit_over_total_assets qualitative nominal (probable) discrete 25114
total_liabilities_over_total_assets qualitative nominal (probable) discrete 25118
working_capital_over_total_assets qualitative nominal (probable) discrete 25119
current_assets_over_short_term_liabilities qualitative nominal (probable) discrete 25044
cash_plus_short_term_securities_plus_receivables_minus_short_term_liabilities_over_operating_expenses_minus_depreciation_times_365 qualitative nominal (probable) discrete 25065
retained_earnings_over_total_assets qualitative nominal (probable) discrete 17459
EBIT_over_total_assets qualitative nominal (probable) discrete 25116
book_value_of_equity_over_total_liabilities qualitative nominal (probable) discrete 25072
sales_over_total_assets qualitative nominal (probable) discrete 25115
equity_over_total_assets qualitative nominal (probable) discrete 25119
gross_profit_plus_extraordinary_items_plus_financial_expenses_over_total_assets qualitative nominal (probable) discrete 25096
gross_profit_over_short_term_liabilities qualitative nominal (probable) discrete 25044
gross_profit_plus_depreciation_over_sales qualitative nominal (probable) discrete 25045
gross_profit_plus_interest_over_total_assets qualitative nominal (probable) discrete 25117
total_liabilities_times_365_over_gross_profit_plus_depreciation qualitative nominal (probable) discrete 25100
gross_profit_plus_depreciation_over_total_liabilities qualitative nominal (probable) discrete 25072
total_assets_over_total_liabilities qualitative nominal (probable) discrete 25073
gross_profit_over_total_assets qualitative nominal (probable) discrete 25118
gross_profit_over_sales qualitative nominal (probable) discrete 25040
inventory_times_365_over_sales qualitative nominal (probable) discrete 25045
sales_n_over_sales_n_minus_1 qualitative nominal (probable) discrete 21210
profit_on_operating_activities_over_total_assets qualitative nominal (probable) discrete 24243
net_profit_over_sales qualitative nominal (probable) discrete 25037
gross_profit_in_3_years_over_total_assets qualitative nominal (probable) discrete 24598
equity_minus_share_capital_over_total_assets qualitative nominal (probable) discrete 25119
net_profit_plus_depreciation_over_total_liabilities qualitative nominal (probable) discrete 25071
profit_on_operating_activities_over_financial_expenses qualitative nominal (probable) discrete 23522
working_capital_over_fixed_assets qualitative nominal (probable) discrete 24631
logarithm_of_total_assets qualitative nominal (probable) discrete 25111
total_liabilities_minus_cash_over_sales qualitative nominal (probable) discrete 25045
gross_profit_plus_interest_over_sales qualitative nominal (probable) discrete 25044
current_liabilities_times_365_over_cost_of_products_sold qualitative nominal (probable) discrete 24898
operating_expenses_over_short_term_liabilities qualitative nominal (probable) discrete 25040
operating_expenses_over_total_liabilities qualitative nominal (probable) discrete 25070
profit_on_sales_over_total_assets qualitative nominal (probable) discrete 25106
total_sales_over_total_assets qualitative nominal (probable) discrete 25119
current_assets_minus_inventories_over_long_term_liabilities qualitative nominal (probable) discrete 13969
constant_capital_over_total_assets qualitative nominal (probable) discrete 25119
profit_on_sales_over_sales qualitative nominal (probable) discrete 25024
current_assets_minus_inventory_minus_receivables_over_short_term_liabilities qualitative nominal (probable) discrete 25044
total_liabilities_over_profit_on_operating_activities_plus_depreciation_times_12_over_365 qualitative nominal (probable) discrete 24665
profit_on_operating_activities_over_sales qualitative nominal (probable) discrete 24007
rotation_receivables_plus_inventory_turnover_in_days qualitative nominal (probable) discrete 25045
receivables_times_365_over_sales qualitative nominal (probable) discrete 25045
net_profit_over_inventory qualitative nominal (probable) discrete 23843
current_assets_minus_inventory_over_short_term_liabilities qualitative nominal (probable) discrete 25043
inventory_times_365_over_cost_of_products_sold qualitative nominal (probable) discrete 24938
EBITDA_profit_on_operating_activities_minus_depreciation_over_total_assets qualitative nominal (probable) discrete 25050
write.csv(suggestions, "suggested_variable_classifications_from_R.csv", row.names = FALSE)

Why Understanding Variable Types Matters

Knowing your variable types is one of the most important parts of EDA because it shapes every decision you make in analysis. Each type—nominal, ordinal, interval, or ratio—tells you what kind of math, visuals, and models make sense. For example, you summarize categorical variables with counts or percentages, not averages, while numeric variables can use means and standard deviations.

When it comes to visuals, variable type guides how you show data: bar charts for categories, histograms or scatterplots for numbers. Getting this wrong can hide real patterns or make the data look misleading.

In modeling, knowing which variables are categorical or continuous helps decide how to prepare the data—like creating dummy variables or scaling numeric ones. Overall, understanding variable types helps keep your analysis accurate, your visuals meaningful, and your models reliable.

write.csv(df, "df_prepared_for_EDA.csv", row.names = FALSE)