{.tabset}

This page provides starter code for replication of the following readings: https://macartan.github.io/teaching/ds-hu-2023#3-the-readings

Macro processes {.tabset}

The first readings focus on longer term historical processes and on order and violence. They include studies of the impact of slavery and colonization and studies of the drivers of civil conflict and democratization.

AJR

@acemoglu2001colonial

Main result is in Table 4:

ajr_data_4 <- haven::read_dta("1_1_acemoglu_johnson_robinson/in/maketable4.dta")


ajr_table_4 <- 
  list(
 base_1 = iv_robust(logpgp95 ~ avexpr | logem4, data = filter(ajr_data_4, baseco == 1)),
 base_2 = iv_robust(logpgp95 ~ avexpr + lat_abst | logem4 + lat_abst, data = filter(ajr_data_4, baseco == 1))
 )

htmlreg(ajr_table_4)

Statistical models
	base_1	base_2
(Intercept)	1.91	1.69
	[-0.52; 4.34]	[-1.34; 4.73]
avexpr	0.94^*	1.00^*
	[ 0.58; 1.31]	[ 0.49; 1.50]
lat_abst		-0.65
		[-3.24; 1.95]
R²	0.19	0.10
Adj. R²	0.17	0.07
Num. obs.	64	64
RMSE	0.95	1.00
^* Null hypothesis value outside the confidence interval.

FL

@fearon2003ethnicity

The data is in dataverse but the original code is not there. The original code for Table 1 in stata is short and I have saved it here:

/* .do File to produce Table 1 results from replication data set */

use repdata ,clear

/* Model #1 */
logit onset warl gdpenl lpopl lmtnest ncontig Oil nwstate instab polity2l ethfrac relfrac ,nolog

/* Model #2 */
logit ethonset warl gdpenl lpopl lmtnest ncontig Oil nwstate instab polity2l ethfrac relfrac if second > .049999,nolog

/* Model #3 */
logit onset warl gdpenl lpopl lmtnest ncontig Oil nwstate instab anocl deml ethfrac relfrac ,nolog

/* Model #4 */
logit emponset empwarl empgdpenl emplpopl emplmtnest empncontig Oil nwstate instab empethfrac ,nolog

/* Model #5 */
logit cowonset cowwarl gdpenl lpopl lmtnest ncontig Oil nwstate instab anocl deml ethfrac relfrac ,nolog

Main result is in Table 4:

fl_data <- haven::read_dta("1_2_fearon_laitin/in/repdata.dta")

# One odd observation, Russia 1947
# with(fl_data, table(onset, ethonset))

fl_data <- fl_data |>
  mutate(onset =ifelse(onset ==4, 1, onset),
         ethonset =ifelse(ethonset ==4, 1, ethonset))

fl_table_1 <- 
  list(
 all =  glm(onset ~ warl +  gdpenl +  lpopl1  + lmtnest  + ncontig  + Oil  + nwstate  + instab  + 
              polity2l  + ethfrac  + relfrac, 
            data = fl_data, family = 'binomial'),
 ethnic =  glm(ethonset ~ warl +  gdpenl +  lpopl1  + lmtnest  + ncontig  + Oil  + nwstate  + instab  + 
                 polity2l  + ethfrac  + relfrac, 
               data = fl_data |> filter(second > 0.049999), family = 'binomial')
 )

htmlreg(fl_table_1)

Statistical models
	all	ethnic
(Intercept)	-6.73^***	-8.45^***
	(0.74)	(1.09)
warl	-0.95^**	-0.85^*
	(0.31)	(0.39)
gdpenl	-0.34^***	-0.38^***
	(0.07)	(0.10)
lpopl1	0.26^***	0.39^***
	(0.07)	(0.11)
lmtnest	0.22^**	0.12
	(0.08)	(0.11)
ncontig	0.44	0.48
	(0.27)	(0.40)
Oil	0.86^**	0.81^*
	(0.28)	(0.35)
nwstate	1.71^***	1.78^***
	(0.34)	(0.41)
instab	0.62^**	0.38
	(0.24)	(0.32)
polity2l	0.02	0.01
	(0.02)	(0.02)
ethfrac	0.17	0.15
	(0.37)	(0.58)
relfrac	0.29	1.53^*
	(0.51)	(0.72)
AIC	984.80	630.35
BIC	1065.83	709.00
Log Likelihood	-480.40	-303.18
Deviance	960.80	606.35
Num. obs.	6327	5186
^*p < 0.001; ^p < 0.01; ^*p < 0.05

Nunn

@nunn2008long

The Nunn archive includes data but not replication code.

OLS

df_Nunn_2008 <-  haven::read_dta("1_3_Nunn/in/slave_trade_QJE.dta") 


nunn_table_2 <-
  list(base = 
         lm_robust(ln_maddison_pcgdp2000~ ln_export_area + colony1 + colony2 +colony3+colony4+colony5 + colony6 + colony7,
          data = df_Nunn_2008, se_type = "stata"),
       controls = 
         lm_robust(ln_maddison_pcgdp2000~ ln_export_area +  colony1 + colony2 +colony3+colony4+colony5 + colony6  + colony7+
                      abs_latitude + longitude + rain_min + humid_max + low_temp +  ln_coastline_area,
          data = df_Nunn_2008, se_type = "stata"))


htmlreg(nunn_table_2)

Statistical models
	base	controls
(Intercept)	7.22^*	5.94^*
	[ 6.91; 7.52]	[ 4.24; 7.65]
ln_export_area	-0.11^*	-0.08^*
	[-0.17; -0.06]	[-0.13; -0.02]
colony1	0.36^*	0.84^*
	[ 0.06; 0.66]	[ 0.09; 1.59]
colony2	0.31	0.77
	[-0.01; 0.63]	[-0.13; 1.66]
colony3	0.21	0.45
	[-0.18; 0.60]	[-0.38; 1.28]
colony4	-0.89^*	0.08
	[-1.24; -0.54]	[-1.14; 1.30]
colony5	1.66^*	2.14^*
	[ 1.30; 2.01]	[ 1.16; 3.12]
colony6	0.86^*	1.34^*
	[ 0.48; 1.24]	[ 0.41; 2.27]
colony7	0.72^*	0.75
	[ 0.50; 0.93]	[-0.07; 1.57]
abs_latitude		0.02
		[-0.02; 0.05]
longitude		0.00
		[-0.01; 0.01]
rain_min		-0.00
		[-0.02; 0.02]
humid_max		0.01
		[-0.01; 0.03]
low_temp		-0.02
		[-0.08; 0.04]
ln_coastline_area		0.08^*
		[ 0.01; 0.16]
R²	0.50	0.60
Adj. R²	0.41	0.45
Num. obs.	52	52
RMSE	0.63	0.61
^* Null hypothesis value outside the confidence interval.

First stage:

lm_robust(ln_export_area ~ atlantic_distance_minimum + indian_distance_minimum + saharan_distance_minimum + red_sea_distance_minimum ,
          data = df_Nunn_2008, se_type = "stata") |> htmlreg()

Statistical models
	Model 1
(Intercept)	29.11^*
	[15.56; 42.66]
atlantic_distance_minimum	-1.31^*
	[-2.02; -0.61]
indian_distance_minimum	-1.10^*
	[-1.86; -0.33]
saharan_distance_minimum	-2.43^*
	[-4.07; -0.80]
red_sea_distance_minimum	-0.00
	[-1.46; 1.46]
R²	0.28
Adj. R²	0.22
Num. obs.	52
RMSE	3.45
^* 0 outside the confidence interval.

IV

nunn_table_4 <-
  list(base = 
         iv_robust(ln_maddison_pcgdp2000~ ln_export_area | atlantic_distance_minimum + indian_distance_minimum + saharan_distance_minimum + red_sea_distance_minimum,
          data = df_Nunn_2008, se_type = "stata"),
       controls = 
         iv_robust(ln_maddison_pcgdp2000 ~ ln_export_area + colony1 + colony2 +colony3+colony4+colony5 + colony6  + colony7 | 
                     atlantic_distance_minimum +  indian_distance_minimum + saharan_distance_minimum + red_sea_distance_minimum +
                     colony1 + colony2 +colony3+colony4+colony5 + colony6  + colony7,
          data = df_Nunn_2008, se_type = "stata"))


htmlreg(nunn_table_4)

Statistical models
	base	controls
(Intercept)	7.81^*	7.72^*
	[ 7.46; 8.16]	[ 7.07; 8.36]
ln_export_area	-0.21^*	-0.20^*
	[-0.30; -0.12]	[-0.31; -0.09]
colony1		0.15
		[-0.31; 0.61]
colony2		0.14
		[-0.33; 0.62]
colony3		0.04
		[-0.79; 0.87]
colony4		-1.25^*
		[-1.78; -0.72]
colony5		1.07^*
		[ 0.32; 1.81]
colony6		0.23
		[-0.57; 1.03]
colony7		0.36
		[-0.13; 0.85]
R²	0.13	0.34
Adj. R²	0.11	0.22
Num. obs.	52	52
RMSE	0.78	0.73
^* Null hypothesis value outside the confidence interval.

AJRY

@acemoglu2008income

df_AJRY_5_yr <- read_csv("1_4_acemoglu_johnson_robinson_yared/in/five_year_panel.csv", show_col_types = FALSE)


df_AJRY_5_yr <- df_AJRY_5_yr |>
  group_by(code_numeric) |>
  mutate(
    l_lrgdpch = lag(lrgdpch, order_by =year_numeric),
    l_fhpolrigaug = lag(fhpolrigaug, order_by =year_numeric))


ajry_table_2 <- list(
    
  pooled =  
    lm_robust(fhpolrigaug ~  l_lrgdpch + l_fhpolrigaug, 
            fixed_effects=  ~ year_numeric, 
            data = filter(df_AJRY_5_yr, sample == 1), 
            cluster = code_numeric), 
  
  
  fe = 
    lm_robust(fhpolrigaug ~  l_lrgdpch + l_fhpolrigaug, 
            fixed_effects=  ~ year_numeric + code_numeric, 
            data = filter(df_AJRY_5_yr, sample == 1), 
            cluster = code_numeric) 
  )



htmlreg(ajry_table_2)

Statistical models
	pooled	fe
l_lrgdpch	0.07^*	0.01
	[0.05; 0.09]	[-0.05; 0.07]
l_fhpolrigaug	0.71^*	0.38^*
	[0.64; 0.78]	[ 0.28; 0.47]
R²	0.73	0.80
Adj. R²	0.72	0.75
Num. obs.	945	945
RMSE	0.19	0.18
N Clusters	150	150
^* Null hypothesis value outside the confidence interval.

Group politics {.tabset}

Readings focus on between group politics, gender based inequalities and ways to address these.

AGN

@alesina2013origins

CD

@chattopadhyay2004women

Mousa

@mousa2020building

GHP

@gulzar2020does

Replication of Table 2

df_india <-  haven::read_dta("2_4_gulzer_haas_pasquale/in/Data.dta") 


Gulzar_Table_2 <- 
  list(
lm_robust(jobcards ~ sch +
            yv + yv2 + yv3 + yv4 +
            xv  +   xv2    + xv3    + xvyv   + xvyv2  + xv2yv  + xv4  +   xv3yv  + xv2yv2 + xvyv3 +
            pop_index_2001 + min_index_2001  + vul_index_2001  +  educ_index_2001  +  med_index_2001  +  water_index_2001  +  comm_index_2001  +  bank_index_2001  + 
            road_index_2001  +  urban_index_2001  +  irrigation_index_2001  +  agworker_index_2001  + margworker_index_2001  +  nonagworker_index_2001  + sharesc91  + 
            sharesc01  +   sharest91  +  sharest01, data = df_india |> filter(km_to_nid <= 10 & samp == 1), fixed_effects = ~ state, cluster = idgp, se_type = "stata"),



lm_robust(jobcards_st ~ sch +
            yv + yv2 + yv3 + yv4 +
            pop_index_2001 + min_index_2001  + vul_index_2001  +  educ_index_2001  +  med_index_2001  +  water_index_2001  +  comm_index_2001  +  bank_index_2001  + 
            road_index_2001  +  urban_index_2001  +  irrigation_index_2001  +  agworker_index_2001  + margworker_index_2001  +  nonagworker_index_2001  + sharesc91  + 
            sharesc01  +   sharest91  +  sharest01, data = df_india |> filter(km_to_nid <= 10 & samp == 1), fixed_effects = ~ state, cluster = idgp, se_type = "stata"))

htmlreg(Gulzar_Table_2)

Statistical models
	Model 1	Model 2
sch	0.00	0.19^*
	[ -0.03; 0.03]	[ 0.14; 0.24]
yv	-8064.17^*	797.61^*
	[-11652.80; -4475.53]	[ 398.80; 1196.41]
yv2	1098.25	-1193.66^*
	[ -1226.70; 3423.19]	[-1875.69; -511.62]
yv3	-965.97	797.31^*
	[ -1954.80; 22.86]	[ 280.53; 1314.09]
yv4	-11.02	-202.07^*
	[ -95.07; 73.03]	[ -348.57; -55.57]
xv	-180.28
	[ -3205.37; 2844.82]
xv2	-862.77
	[ -2106.70; 381.16]
xv3	384.44^*
	[ 137.84; 631.04]
xvyv	6966.26^*
	[ 4417.47; 9515.04]
xvyv2	170.24
	[ -695.90; 1036.37]
xv2yv	-2237.55^*
	[ -2944.82; -1530.28]
xv4	-48.61^*
	[ -68.86; -28.35]
xv3yv	267.73^*
	[ 193.29; 342.17]
xv2yv2	-172.35^*
	[ -299.76; -44.95]
xvyv3	322.88^*
	[ 95.84; 549.92]
pop_index_2001	-0.04^*	-0.31^*
	[ -0.07; -0.01]	[ -0.39; -0.22]
min_index_2001	0.07^*	0.44^*
	[ 0.05; 0.09]	[ 0.39; 0.49]
vul_index_2001	0.02^*	-0.02
	[ 0.01; 0.04]	[ -0.05; 0.00]
educ_index_2001	0.04^*	0.12^*
	[ 0.02; 0.05]	[ 0.08; 0.15]
med_index_2001	0.00	0.04
	[ -0.01; 0.02]	[ -0.00; 0.08]
water_index_2001	-0.04^*	-0.02
	[ -0.06; -0.03]	[ -0.05; 0.01]
comm_index_2001	-0.06^*	-0.12^*
	[ -0.07; -0.05]	[ -0.15; -0.09]
bank_index_2001	0.03^*	0.05^*
	[ 0.02; 0.04]	[ 0.03; 0.08]
road_index_2001	0.03^*	0.07^*
	[ 0.01; 0.04]	[ 0.05; 0.10]
urban_index_2001	-0.01^*	0.01
	[ -0.02; -0.00]	[ -0.01; 0.03]
irrigation_index_2001	-0.02	-0.08^*
	[ -0.04; 0.00]	[ -0.13; -0.04]
agworker_index_2001	0.15^*	0.02
	[ 0.13; 0.17]	[ -0.03; 0.06]
margworker_index_2001	-0.00	0.02^*
	[ -0.01; 0.01]	[ 0.00; 0.04]
nonagworker_index_2001	-0.00	-0.04
	[ -0.02; 0.02]	[ -0.08; 0.01]
sharesc91	0.10	0.33^*
	[ -0.03; 0.22]	[ 0.07; 0.59]
sharesc01	-0.11	-0.40^*
	[ -0.25; 0.03]	[ -0.67; -0.12]
sharest91	0.26^*	1.09^*
	[ 0.17; 0.35]	[ 0.86; 1.32]
sharest01	0.00	0.96^*
	[ -0.09; 0.09]	[ 0.73; 1.20]
R²	0.44	0.47
Adj. R²	0.44	0.46
Num. obs.	32641	32641
RMSE	0.70	1.31
N Clusters	14933	14933
^* Null hypothesis value outside the confidence interval.

Accountability {.tabset}

Readings look at drivers of political responsiveness to citizen demands. We look at one observational piece and three experimental interventions.

FF

@ferraz2011electoral

df_ff_corruption <- haven::read_dta("3_2_ferraz_finan/in/corruptiondata_aer.dta")

ff_table_4 <-
  list(
    base = lm_robust(pcorrupt ~ first, data =  df_ff_corruption |> filter( esample2==1)),
    controls = lm_robust(pcorrupt ~ first + pref_masc +  pref_idade_tse + pref_escola + 
                       party_d1  +  party_d3 + party_d4 + party_d5 + party_d6 + party_d7 + 
                       party_d8 + party_d9 + party_d10 + party_d11 + party_d12 +
                       party_d13 + party_d14 + party_d15  + party_d16 + party_d17 + party_d18, data
                     =  df_ff_corruption |> filter( esample2==1)))

htmlreg(ff_table_4)

Statistical models
	base	controls
(Intercept)	0.07^*	0.18^*
	[ 0.06; 0.09]	[ 0.09; 0.27]
first	-0.02^*	-0.02^*
	[-0.04; -0.00]	[-0.04; -0.00]
pref_masc		-0.03
		[-0.09; 0.02]
pref_idade_tse		-0.00
		[-0.00; 0.00]
pref_escola		-0.01^*
		[-0.01; -0.00]
party_d1		-0.03^*
		[-0.06; -0.00]
party_d3		0.03
		[-0.02; 0.08]
party_d4		-0.01
		[-0.04; 0.02]
party_d5		0.00
		[-0.04; 0.04]
party_d6		-0.00
		[-0.05; 0.04]
party_d7		0.01
		[-0.02; 0.03]
party_d8		0.19^*
		[ 0.16; 0.21]
party_d9		0.04
		[-0.03; 0.12]
party_d10		0.01
		[-0.06; 0.08]
party_d11		-0.03
		[-0.07; 0.01]
party_d12		-0.01
		[-0.04; 0.01]
party_d13		0.05
		[-0.08; 0.17]
party_d14		-0.04^*
		[-0.06; -0.01]
party_d15		-0.05^*
		[-0.08; -0.03]
party_d16		0.24^*
		[ 0.22; 0.27]
party_d17		-0.00
		[-0.06; 0.06]
party_d18		0.03
		[-0.00; 0.06]
R²	0.01	0.08
Adj. R²	0.01	0.03
Num. obs.	476	476
RMSE	0.10	0.10
^* Null hypothesis value outside the confidence interval.

Grossman et al

@grossman2021effect

Raffler

@raffler2022does

Fujiwara and Wantchekon

@fujiwara2013can

Interventions {.tabset}

We look at macro and micro interventions including international peacekeeping, development aid, and state-led innovations.

Blair et al

@blair2022peacekeeping

Nunn & Qian

@nunn2014us

Muralidharan et al

@muralidharan2016building

Blattman & Annan

@blattman2016can