I am aware of some discussion in the literature regarding the so-called base rate fallacy and similar issues wherein statistics are equivocated with the inverse. This is typically demonstrated with dichotomized variables and their probabilities. However, similar issues apply to continuous variables. Even if the expected value for B given some value of A in true linear relationship is the same that does not imply the expected value of A given some value B is actually the same between groups. Indeed, if we know the distributions of A are different we should expect A to be different conditional on B. I will briefly demonstrate this with simulated data.

Function definition to generate the correlated variables

require(MASS)
library(dplyr)
library(ggplot2)
library(stargazer)

# make this exactly reproducible
set.seed(2)

gen_correlated = function(gap,corv,labelv,n=1e4) {
  
  # means for A and B given gap in A (expressed in SDs)
  means=c(gap, gap*corv)
  
  # covariance matrix to express the relationship between A and B
  sigma=matrix(c(1, corv, corv, 1), nrow=2)
  
  # actually simulate these distributions with this function from the MASS package 
  mvrnorm(n=n, mu=means, Sigma=sigma, empirical=TRUE) %>%
    data.frame() %>%
    transmute(
      A = X1,
      B= X2,
      group=labelv
    )

}

Generate data for 4 groups with different means in the predictor

Create 4 groups with different means in the predictor variable A (0.2, 0, -.5, -1 standard deviations above the mean) with identical expected values for B conditional on A (defined as correlation of 0.2)

r=0.2
all_groups=bind_rows(
  list(
    gen_correlated(.2,r,"0.2 SD"),
    gen_correlated(0,r,"0 SD"),
    gen_correlated(-.5,r,"-0.5 SD"),
    gen_correlated(-1,r,"-1 SD")
  )
)

# arrange group factor according to mean of A (aesthetic preference)
all_groups$group = reorder(all_groups$group,all_groups$A,mean)

Plot the data for all groups as a function of A

The regression lines for each group overlap nearly perfectly (which you can’t see directly, but the lack of difference is itself apparent). This means that the expected value for B given A for all groups is essentially identical in this data (no regression towards the mean etc. by design)

ggplot(all_groups,aes(A,B,color=group)) + 
  geom_point(alpha=0.1) +
  geom_smooth(method=lm) +
  coord_cartesian(ylim=c(-3,3),xlim=c(-3,3))

Facet by group as a function of A

Facet plots bt group and insert arbitrary lines corresponding to known slope and intercept of the regressions to make this more obvious.

ggplot(all_groups,aes(A,B,color=group)) + 
  geom_point(alpha=0.1) +
  geom_abline(slope=0.2,intercept=0,color='orange',size=2) +
  geom_smooth(method=lm,color='black',size=1) +
  coord_cartesian(ylim=c(-2,2),xlim=c(-3,3)) +
  facet_wrap(~ group,ncol=2) +
  geom_hline(yintercept=0,linetype='dotted') +
  geom_vline(xintercept=0,linetype='dotted')

Regression table for B given value of A

stargazer(
  lm(B ~ A,all_groups),
  lm(B ~ A,filter(all_groups,group=='0.2 SD')),
  lm(B ~ A,filter(all_groups,group=='0 SD')),
  lm(B ~ A,filter(all_groups,group=='-0.5 SD')),
  lm(B ~ A,filter(all_groups,group=='-1 SD')),
  add.lines=list(
    c('Subset?','No','0.2 SD','0 SD','-0.5 SD','-1 SD')
  ),
  type='text',
  omit.stat = c('f','ser')
)
## 
## =========================================================
##                          Dependent variable:             
##              --------------------------------------------
##                                   B                      
##                (1)      (2)      (3)      (4)      (5)   
## ---------------------------------------------------------
## A            0.200*** 0.200*** 0.200*** 0.200*** 0.200***
##              (0.004)  (0.010)  (0.010)  (0.010)  (0.010) 
##                                                          
## Constant      -0.000   0.000    0.000    0.000    -0.000 
##              (0.005)  (0.010)  (0.010)  (0.011)  (0.014) 
##                                                          
## ---------------------------------------------------------
## Subset?         No     0.2 SD    0 SD   -0.5 SD   -1 SD  
## Observations  40,000   10,000   10,000   10,000   10,000 
## R2            0.048    0.040    0.040    0.040    0.040  
## Adjusted R2   0.048    0.040    0.040    0.040    0.040  
## =========================================================
## Note:                         *p<0.1; **p<0.05; ***p<0.01

Now let’s plot A as a function of B

As you can see here, even though the simulated data are exactly the same as what we plotted earlier and the expected value of B is effectively identical for all groups for any given value of A (as defined), the expected value of A given B is very different for each group.

ggplot(all_groups,aes(B,A,color=group)) + 
  geom_point(alpha=0.1) +
  geom_smooth(method=lm)

Drop the points to illustrate the magnitude of the difference

ggplot(all_groups,aes(B,A,color=group)) + 
  geom_smooth(method=lm) +
  geom_hline(yintercept=0,linetype='dotted') +
  geom_vline(xintercept=0,linetype='dotted')

ggplot(all_groups,aes(B,A,color=group)) + 
  geom_point(alpha=0.1) +
  #geom_abline(slope=0.2,intercept=0,color='orange',size=2) +
  geom_smooth(method=lm,color='black',size=1) +
  coord_cartesian(ylim=c(-2,2),xlim=c(-3,3)) +
  facet_wrap(~ group,ncol=2) +
  geom_hline(yintercept=0,linetype='dotted') +
  geom_vline(xintercept=0,linetype='dotted')

Indeed, even though the inverted slopes are effectively identical for each group, the intercepts are different. Moreover, the intercepts correspond quite well to differences in the means of variable A (our original predictor).

stargazer(
  lm(A ~ B,all_groups),
  lm(A ~ B,filter(all_groups,group=='0.2 SD')),
  lm(A ~ B,filter(all_groups,group=='0 SD')),
  lm(A ~ B,filter(all_groups,group=='-0.5 SD')),
  lm(A ~ B,filter(all_groups,group=='-1 SD')),
  add.lines=list(
    c('Subset?','No','0.2 SD','0 SD','-0.5 SD','-1 SD')
  ),
  type='text',
  omit.stat = c('f','ser')
)
## 
## ============================================================
##                            Dependent variable:              
##              -----------------------------------------------
##                                     A                       
##                 (1)      (2)      (3)       (4)       (5)   
## ------------------------------------------------------------
## B            0.241***  0.200*** 0.200*** 0.200***  0.200*** 
##               (0.005)  (0.010)  (0.010)   (0.010)   (0.010) 
##                                                             
## Constant     -0.309*** 0.192***  0.000   -0.480*** -0.960***
##               (0.005)  (0.010)  (0.010)   (0.010)   (0.010) 
##                                                             
## ------------------------------------------------------------
## Subset?         No      0.2 SD    0 SD    -0.5 SD    -1 SD  
## Observations  40,000    10,000   10,000   10,000    10,000  
## R2             0.048    0.040    0.040     0.040     0.040  
## Adjusted R2    0.048    0.040    0.040     0.040     0.040  
## ============================================================
## Note:                            *p<0.1; **p<0.05; ***p<0.01