—

Gelman Chapter 6 examples

Tim Hofer

10 Sep 2020

Chapter 6

Section 6.2 - Fitting simple regression to fake data

// page 82
clear
set seed 399323
qui set obs 20
gen x=_n
local a=0.2
local b=0.3
local sigma=0.5
gen y=`a' + `b'*x + `sigma'*rnormal()
qui regress y x
esttab,b(2) se wide mtitle("Coef.") scalars("rmse sigma") ///
    coef(_cons "Intercept" rmse "sigma") nonum noobs nostar var(15)
// we are going to use this output format a lot, let's store it
local gelman_output esttab,b(1) se wide mtitle("Coef.") scalars("rmse sigma") ///
    coef(_cons "Intercept" rmse "sigma") nonum noobs nostar var(15)

─────────────────────────────────────────
                       Coef.             
─────────────────────────────────────────
x                        0.3        (0.0)
Intercept                0.4        (0.2)
─────────────────────────────────────────
sigma                    0.5             
─────────────────────────────────────────
Standard errors in parentheses

predict yhat
twoway (scatter y x, ) ///
    (line yhat x), xtitle("fake x") ytitle("fake y") ///
    legend(off) text(3 10 "y=0.49 + 0.27*x", place(e))

Section 6.3 - Interpret coefficients as comparisons, not effects

// page 84
import delimited https://raw.githubusercontent.com/avehtari/ROS-Examples/master/Earnings/data/earnings.csv,  clear
qui regress earnk height male
`gelman_output'

(15 vars, 1,816 obs)

─────────────────────────────────────────
                       Coef.             
─────────────────────────────────────────
height                   0.6        (0.2)
male                    10.6        (1.5)
Intercept              -25.9       (12.0)
─────────────────────────────────────────
sigma                   21.4             
─────────────────────────────────────────
Standard errors in parentheses

. * Compare residual standard deviation to standard deviation of the data
. * residual variance is stored as the root mean squared error e(rmse)
. sum earnk        // obtain standard deviation of the y variable

    Variable │        Obs        Mean    Std. Dev.       Min        Max
─────────────┼─────────────────────────────────────────────────────────
       earnk │      1,816     21.1473    22.53177          0        400

. di round((e(rmse))^2/r(sd)^2,.01)    // calculate proportion not explained
.9

. di round(1-(e(rmse))^2/r(sd)^2,.01)    // calulate proportion explained
.1

Section 6.4 - Historical origins of regression

// page 86-87
import delimited https://raw.githubusercontent.com/avehtari/ROS-Examples/master/PearsonLee/data/Heights.txt,clear delim(" ")
li in 1/5
qui regress daughter mother
`gelman_output'

     ┌─────────────────────┐
     │ daught~t   mother~t │
     ├─────────────────────┤
  1. │     52.5       59.5 │
  2. │     52.5       59.5 │
  3. │     53.5       59.5 │
  4. │     53.5       59.5 │
  5. │     55.5       59.5 │
     └─────────────────────┘

─────────────────────────────────────────
                       Coef.             
─────────────────────────────────────────
mother_height            0.5        (0.0)
Intercept               29.8        (0.8)
─────────────────────────────────────────
sigma                    2.3             
─────────────────────────────────────────
Standard errors in parentheses

twoway (scatter daughter mother,jitter(10) msize(tiny)) ///
        (function _b[mother]*x+_b[_cons],range(54 71)) ///
        , xtitle("Mother's height (inches)") legend(off) ///
        ytitle("Adult daughter's height (inches)")

Section 6.5 Paradox of regression to the mean

clear
qui set obs 1000
set seed 3293
gen true_ability=rnormal(50,10)
gen noise_1=rnormal(0,10)
gen noise_2=rnormal(0,10)
gen midterm=true_ability + noise_1
gen final=true_ability + noise_2
qui regress final midterm
`gelman_output'

─────────────────────────────────────────
                       Coef.             
─────────────────────────────────────────
midterm                  0.5        (0.0)
Intercept               25.7        (1.5)
─────────────────────────────────────────
sigma                   12.3             
─────────────────────────────────────────
Standard errors in parentheses

. twoway (scatter final midterm,msize(tiny)) ///
>     (function _b[midterm]*x+_b[_cons],range(0 100)) ///
>     , xtitle("Midterm exam score") xlabel(,grid glstyle(minor_grid) ) ///
>     ytitle("Final exam score")  ylabel(,grid glstyle(minor_grid) ) ///
>     ysize(5) xsize(5) legend(off) scheme(s1mono)

. quietly graph export img/gr6_5.svg, replace