Gelman Chapter 7 examples

Gelman Chapter 7 examples

Tim Hofer

10 Sep 2020

local gelman_output esttab,b(1) se wide mtitle("Coef.") scalars("rmse sigma") ///
    coef(_cons "Intercept" rmse "sigma") nonum noobs nostar var(15)

Chapter 7

Section 7.1

// page 94-97
import delimited https://raw.githubusercontent.com/avehtari/ROS-Examples/master/ElectionsEconomy/data/hibbs.dat,clear delim(" ")
li in 1/5
scatter vote growth, xtitle("Economic growth") ///
    ytitle("Incumbent party's vote share'")
(5 vars, 16 obs)

     ┌─────────────────────────────────────────────────┐
     │ year   growth    vote   inc_part~e   other_ca~e │
     ├─────────────────────────────────────────────────┤
  1. │ 1952      2.4    44.6    Stevenson   Eisenhower │
  2. │ 1956     2.89   57.76   Eisenhower    Stevenson │
  3. │ 1960      .85   49.91        Nixon      Kennedy │
  4. │ 1964     4.21   61.34      Johnson    Goldwater │
  5. │ 1968     3.02    49.6     Humphrey        Nixon │
     └─────────────────────────────────────────────────┘

qui regress vote growth
// to fit a line with no constant (which does not make sense here)
// regress vote growth,nocons
`gelman_output'
─────────────────────────────────────────
                       Coef.             
─────────────────────────────────────────
growth                   3.1        (0.7)
Intercept               46.2        (1.6)
─────────────────────────────────────────
sigma                    3.8             
─────────────────────────────────────────
Standard errors in parentheses
// another way to plot the fitted line
twoway (scatter vote growth) (lfit vote growth), legend(off)  ///
    xtitle("Economic growth") ytitle("Incumbent party's vote share") ///
    ylab(45 "45%" 50 "50%" 55 "55%" 60 "60%")  ///
    xlab(0 "0%" 1 "1%" 2 "2%" 3 "3%" 4 "4%")  ///
    ysize(5) xsize(5) yline(50,lw(vthin)) scheme(s1mono)

. regress vote growth

      Source │       SS           df       MS      Number of obs   =        16
─────────────┼──────────────────────────────────   F(1, 14)        =     19.32
       Model │  273.632269         1  273.632269   Prob > F        =    0.0006
    Residual │  198.272733        14  14.1623381   R-squared       =    0.5798
─────────────┼──────────────────────────────────   Adj R-squared   =    0.5498
       Total │  471.905002        15  31.4603335   Root MSE        =    3.7633

─────────────┬────────────────────────────────────────────────────────────────
        vote │      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
─────────────┼────────────────────────────────────────────────────────────────
      growth │   3.060528   .6962739     4.40   0.001     1.567169    4.553887
       _cons │   46.24765   1.621932    28.51   0.000     42.76895    49.72635
─────────────┴────────────────────────────────────────────────────────────────

. margins ,at(growth=1.98)

Adjusted predictions                            Number of obs     =         16
Model VCE    : OLS

Expression   : Linear prediction, predict()
at           : growth          =        1.98

─────────────┬────────────────────────────────────────────────────────────────
             │            Delta-method
             │     Margin   Std. Err.      t    P>|t|     [95% Conf. Interval]
─────────────┼────────────────────────────────────────────────────────────────
       _cons │   52.30749    .942574    55.49   0.000     50.28587    54.32911
─────────────┴────────────────────────────────────────────────────────────────

. // That gives the predicted mean and the standard error *of the mean*
. // the standard deviation of the prediction is given as the rmse 
. //         in our regression table 
local above50=`:di %2.0f (1-normal((50-52.3)/3.8))*100'
twoway  (function y=normalden(x,52.3,3.8), range(40 65) ) ///
        (function y=normalden(x,52.3,3.8), ///
            range(50 65) color(gs12) recast(area) ) ///
        , yscale(off)  scheme(s1mono)  plotregion(style(none)) legend(off) ///
        xtitle("Clinton share of the two party vote") xline(`xline') ///
        text(.03 54 "Predicted" "`above50'% Chance" "of Clinton Victory")

Section 7.2

Step 1-3

//  p 97-99
import delimited https://raw.githubusercontent.com/avehtari/ROS-Examples/master/ElectionsEconomy/data/hibbs.dat,clear delim(" ")
set seed 3292
local a=46.3
local b=3.0
local sigma=3.9
gen x=growth
gen y=`a' + `b'*x + rnormal(0,`sigma')
qui regress y x
`gelman_output'
─────────────────────────────────────────
                       Coef.             
─────────────────────────────────────────
x                        3.3        (0.8)
Intercept               45.0        (2.0)
─────────────────────────────────────────
sigma                    4.5             
─────────────────────────────────────────
Standard errors in parentheses

Step 4

local b_hat=_b[x]
local b_se=_se[x]
local cover_68=abs(`b'-`b_hat')<`b_se'
local cover_95=abs(`b'-`b_hat')<2*`b_se'
di `cover_68' 
di `cover_95'

keep growth
qui save sim_data,replace

cap program drop fakesim
program define fakesim, rclass
        version 16.1
        args a b sigma
        drop _all
        use sim_data
        gen x=growth
        gen y=`a' + `b'*x + rnormal(0,`sigma')
        qui regress y x
        local b_hat=_b[x]
        local b_se=_se[x]
        return scalar cover_68=abs(`b'-`b_hat')<`b_se'
        return scalar cover_95=abs(`b'-`b_hat')<2*`b_se'
    end
simulate cover_68=r(cover_68) cover_95=r(cover_95),reps(1000) dots(100): ///
    fakesim `a' `b' `sigma' 
sum
      command:  fakesim 46.3 3 3.9
     cover_68:  r(cover_68)
     cover_95:  r(cover_95)

Simulations (1000)
────┼─── 1 ───┼─── 2 ───┼─── 3 ───┼─── 4 ───┼─── 5 
..........

    Variable │        Obs        Mean    Std. Dev.       Min        Max
─────────────┼─────────────────────────────────────────────────────────
    cover_68 │      1,000        .651    .4768925          0          1
    cover_95 │      1,000         .94    .2376057          0          1

Step 4 (t distribution instead of z)

cap program drop fakesim
program define fakesim, rclass
        version 16.1
        args a b sigma
        drop _all
        use sim_data
        gen x=growth
        gen y=`a' + `b'*x + rnormal(0,`sigma')
        qui regress y x
        local b_hat=_b[x]
        local b_se=_se[x]
        return scalar cover_68=abs(`b'-`b_hat')<invt(14,.84)*`b_se'
        return scalar cover_95=abs(`b'-`b_hat')<invt(14,.975)*`b_se'
    end
simulate cover_68=r(cover_68) cover_95=r(cover_95),reps(1000) dots(100): ///
    fakesim `a' `b' `sigma'
sum
    Variable │        Obs        Mean    Std. Dev.       Min        Max
─────────────┼─────────────────────────────────────────────────────────
    cover_68 │      1,000         .67     .470448          0          1
    cover_95 │      1,000        .948    .2221381          0          1

Section 7.3

// p 99-
import delimited https://raw.githubusercontent.com/avehtari/ROS-Examples/master/ElectionsEconomy/data/hibbs.dat,clear delim(" ")
// Section 7.3 p 99-
clear
set seed 1034492
qui set obs 20
gen y_0=rnormal(2.0, 5.0)

cap prog drop listw
prog define listw
    args var
    local end=_N
    di ">  " _c
    forval i=1/`end'    {
        di `:di %2.1f `var'[`i']' "  " _c
}
end

listw y_0
qui regress y_0
`gelman_output'
. noi listw y_0
>  4  5.8  -2  4.4  5.3  9.7  7.7  7  -6.9  -.5  11.8  1.7  6.6  1.5  .1  -5.8  6.9  4.7  .5  4.8  
─────────────────────────────────────────
                       Coef.             
─────────────────────────────────────────
Intercept                3.4        (1.1)
─────────────────────────────────────────
sigma                    4.8             
─────────────────────────────────────────
Standard errors in parentheses
qui set obs 30
set seed 33392
gen y_1=rnormal(8,5)

    sum y_0
    local y_0_mean=r(mean)
    local y_0_sem=r(sd)/sqrt(r(N))
    sum y_1
    local y_1_mean=r(mean)
    local y_1_sem=r(sd)/sqrt(r(N))
    di "Mean: "`y_1_mean'-`y_0_mean'
    di "SE: " sqrt(`y_0_sem'^2+`y_1_sem^2')
    di "True population difference is 8-2=6"
    Variable │        Obs        Mean    Std. Dev.       Min        Max
─────────────┼─────────────────────────────────────────────────────────
         y_0 │         20    3.364381    4.812439  -6.938446   11.75695

    Variable │        Obs        Mean    Std. Dev.       Min        Max
─────────────┼─────────────────────────────────────────────────────────
         y_1 │         30    7.786442    3.730021   2.245245    15.6591

Diff in means: 4.4220614

SE of difference: 1.3560914

True population difference is 8-2=6
// Now use regression to estimate the difference in means
gen id=_n
qui reshape long y_@,j(x) i(id)
qui regress y x
`gelman_output'
─────────────────────────────────────────
                       Coef.             
─────────────────────────────────────────
x                        4.4        (1.2)
Intercept                3.4        (0.9)
─────────────────────────────────────────
sigma                    4.2             
─────────────────────────────────────────
Standard errors in parentheses