**** Replication file for "Can Business Owners Form Accurate Counterfactuals?
**** Eliciting Treatment and Control Beliefs about Their Outcomes in the Alternative Treatment Status"
*** David McKenzie, Journal of Business & Economic Statistics

** Analysis carried out in Stata/MP 14.2

* Set directory
cd "C:\Users\wb200090\Box Sync\otherresearch\Nigeria\PublicUseNigeria\Data\"


*** Merge public use baseline with third follow-up round and second follow-up round (needed for heterogeneity)
use "ThirdFollowup.dta", clear
sort uid
merge uid using "BaselineandFirstFollowup.dta", sort
drop _merge
sort uid
merge uid using "SecondFollowup.dta", sort

*** Generate variable indicating whether they operate a firm or not
gen t_operatefirm=t_sc==1
replace t_operatefirm=. if t_sc==.
replace t_operatefirm=1 if t_operatefirm==. & t_operate==1
replace t_operatefirm=0 if t_operatefirm==. & t_operate==2
replace t_operatefirm=1 if t_a3==1 & t_operatefirm==.
replace t_operatefirm=0 if t_a3==2 & t_operatefirm==.
label var t_operatefirm "Operates a firm at time of third follow-up"

********** Table 1: Counterfactuals of the Likelihood of Operating a Business ****************

*** Generate Percent chance would be running a business
gen chancerunbusiness=t_py2a if group==2 
replace chancerunbusiness=t_py7a if group==1

*** Column 2: Realized Outcomes
sum t_operatefirm if group==2 & chancerunbusiness~=. & existing==0 
sum t_operatefirm if group==2 & chancerunbusiness~=. & existing==1 
sum t_operatefirm if group==1 & chancerunbusiness~=. & existing==0 
sum t_operatefirm if group==1 & chancerunbusiness~=. & existing==1 

*** Columns 3 and 4: Counterfactual Outcomes
sum chancerunbusiness if existing==0 & group==2, de
sum chancerunbusiness if existing==1 & group==2, de
sum chancerunbusiness if existing==0 & group==1, de
sum chancerunbusiness if existing==1 & group==1, de

*** Column 5 is calculated from Column 3 and 2
*** Column 6 comes from McKenzie (2015)


****** Figure 1: New Firms
*** Counterfactual beliefs of Control Group, with lines for Treatment Mean and Counterfactual Mean
histogram chancerunbusiness if existing==0 & group==2, discrete xline(95.9) xline(90.55)
* edited with graph editor and saved as Figure1a.gph
*** Counterfactual beliefs of Treatment Group, with lines for Control Mean and Counterfactual Mean
histogram chancerunbusiness if existing==0 & group==1, discrete xline(50.4) xline(50.7)
* edited with graph editor and saved as Figure1b.gph
**graph combine "Figure1a.gph" "Figure1b.gph", col(1)
** saved as Figure1.gph


*** Figure 2: Existing Firms
histogram chancerunbusiness if existing==1 & group==2, discrete xline(90.4) xline(96.6)
* saved as Figure2a
histogram chancerunbusiness if existing==1 & group==1, discrete  xline(76.3) xline(55.1)
* saved as Figure2b
*graph combine "Figure2a.gph" "Figure2b.gph", col(1)
** saved as Figure2.gph


*** How sensitive is this to heaping at 50%?
* randomly remove half of existing firm treated who say 50 percent
set seed 324
gen randomX=uniform()
egen rankX=rank(randomX) if existing==1 & group==1 & chancerunbusiness==50
gen chancerunbusiness2=chancerunbusiness
replace chancerunbusiness2=. if rankX<=17
sum chancerunbusiness2 if existing==1 & group==1


**************** EXPECTATIONS AT THE INTENSIVE MARGIN - SIZE OF FIRM
****** EMPLOYMENT

*** Expectations of Counterfactual Employment
gen counterfactualemp=t_py2b if group==2 
replace counterfactualemp=. if t_py2b==998
replace counterfactualemp=t_py7b if group==1 
replace counterfactualemp=. if t_py7b==998
replace counterfactualemp=0 if t_py7a==0

*** Generate actual employment
gen t_wagesalaryemps=t_ef3_1
replace t_wagesalaryemps=. if t_ef3_1==998|t_ef3_1==999
replace t_wagesalaryemps=0 if t_operatefirm==0
label var t_wagesalaryemps "Number of wage and salary employees at third follow-up"
gen t_casualdaily=t_ef3_2
replace t_casualdaily=0 if t_ef3_2==999
replace t_casualdaily=0 if t_operatefirm==0
label var t_casualdaily "Number of casual and daily employees in third follow-up"
 gen t_totalemp1=t_operatefirm+t_wagesalaryemps+t_casualdaily
 replace t_totalemp1=0 if t_operatefirm==0
 replace t_totalemp1=t_ef3_5+1 if t_ef3_5<998 & t_totalemp1==. & t_operatefirm==1
 replace t_totalemp1=t_a4 if t_a4<998 & t_totalemp1==.
 
 
 
 ******** Table 2: Counterfactuals of Number of Employees Conditional on Going into Business ***************
 ** Column 2: Realized Outcomes
 sum t_totalemp if group==2 & counterfactualemp~=. & existing==0 & t_operatefirm==1
sum t_totalemp if group==2 & counterfactualemp~=. & existing==1 & t_operatefirm==1
 sum t_totalemp if group==1 & counterfactualemp~=. & existing==0 & t_operatefirm==1
sum t_totalemp if group==1 & counterfactualemp~=. & existing==1 & t_operatefirm==1 

** Columns 3,4 and 5: Counterfactuals Expected and Trimmed Counterfactual
sum counterfactualemp if existing==0 & group==2, de
gen cfactemp_trim=counterfactualemp
replace cfactemp_trim=. if counterfactualemp>r(p95)  &  existing==0 & group==2
sum cfactemp_trim if existing==0 & group==2

sum counterfactualemp if existing==1 & group==2, de
replace cfactemp_trim=. if counterfactualemp>r(p95)  &  existing==1 & group==2
sum cfactemp_trim if existing==1 & group==2

sum counterfactualemp if existing==0 & group==1 & counterfactualemp<3000, de
replace cfactemp_trim=. if counterfactualemp>r(p95)  &  existing==0 & group==1
sum cfactemp_trim if existing==0 & group==1

sum counterfactualemp if existing==1 & group==1, de
replace cfactemp_trim=. if counterfactualemp>r(p95)  &  existing==1 & group==1
sum cfactemp_trim if existing==1 & group==1

*** Column 6 calculated from Column 2 and 3
** Column 7 from McKenzie (2015)



***** Figure 3: Comparison of Actual Distribution of Employment for Winners to Counterfactual Distribution expected by control group had they won
preserve
cumul t_totalemp if group==1  & existing==0 & t_operatefirm==1, equal gen(actualemp1)
cumul counterfactualemp if existing==0 & group==2, equal gen(expectedemp1)
keep if existing==0 
stack actualemp1 t_totalemp1 expectedemp1 counterfactualemp, into(cumemp emp)  clear
gen actualemp=cumemp if _stack==1
gen expectedemp=cumemp if _stack==2
sort emp
label var actualemp "Actual employment of winners"
label var expectedemp "Expected employment of control group"
twoway line actualemp expectedemp emp if emp<500, sort
 restore
 * edited with ExpectationsFigure3 recorder. Saved as ExpectationsFigure3a
 
preserve
cumul t_totalemp if group==1 & existing==1 & t_operatefirm==1, equal gen(actualemp1)
cumul counterfactualemp if existing==1 & group==2, equal gen(expectedemp1)
keep if existing==1 
stack actualemp1 t_totalemp1 expectedemp1 counterfactualemp, into(cumemp emp)  clear
gen actualemp=cumemp if _stack==1
gen expectedemp=cumemp if _stack==2
sort emp
label var actualemp "Actual employment of winners"
label var expectedemp "Expected employment of control group"
twoway line actualemp expectedemp emp if emp<100, sort
 restore
 * edited with ExpectationsFigure3 recorder. Saved as ExpectationsFigure3b
 
 

****** Figure 4: Comparison of Actual Distribution of Employment for Control Group to Counterfactual Distribution expected by treatment group had they lost

preserve
cumul t_totalemp if group==2 &  existing==0 & t_operatefirm==1, equal gen(actualemp1)
cumul counterfactualemp if existing==0 & group==1 & counterfactualemp<500, equal gen(expectedemp1)
keep if existing==0 
stack actualemp1 t_totalemp1 expectedemp1 counterfactualemp, into(cumemp emp)  clear
gen actualemp=cumemp if _stack==1
gen expectedemp=cumemp if _stack==2
sort emp
label var actualemp "Actual employment of control group"
label var expectedemp "Expected employment of winners"
twoway line actualemp expectedemp emp if emp<100, sort
 restore
 * edited with ExpectationsFigure3 recorder. Saved as ExpectationsFigure4a
 
preserve
cumul t_totalemp if group==2 &  existing==1 & t_operatefirm==1, equal gen(actualemp1)
cumul counterfactualemp if existing==1 & group==1, equal gen(expectedemp1)
keep if existing==1
stack actualemp1 t_totalemp1 expectedemp1 counterfactualemp, into(cumemp emp)  clear
gen actualemp=cumemp if _stack==1
gen expectedemp=cumemp if _stack==2
sort emp
label var actualemp "Actual employment of control group"
label var expectedemp "Expected employment of winners"
twoway line actualemp expectedemp emp if emp<60, sort
* saved as ExpectationsFigure4b
 restore


 
 **************** SALES
 *** Realized sales
 gen t_sales=t_bf5
label var t_sales "Monthly sales for last month in third follow-up"
replace t_sales=. if t_sales==998

** counterfactual sales
gen counterfactualsales=t_py2c if group==2 
replace counterfactualsales=. if t_py2c==998|t_py2c==9998
replace counterfactualsales=0 if t_py2a==0
 replace counterfactualsales=t_py7c if group==1 
replace counterfactualsales=. if t_py7c==998|t_py7c==9998
replace counterfactualsales=0 if t_py7a==0


*** Figure 5: Comparison of Actual Distribution of Sales for Winners to Counterfactual Distribution expected by control group had they won
preserve
cumul t_sales if group==1 &  existing==0 & t_operatefirm==1, equal gen(actualsales1)
cumul counterfactualsales if existing==0 & group==2, equal gen(expectedsales1)
keep if existing==0 
stack actualsales1 t_sales expectedsales1 counterfactualsales, into(cumsales sales)  clear
gen actualsales=cumsales if _stack==1
gen expectedsales=cumsales if _stack==2
replace sales=sales/1000000
sort sales
label var actualsales "Actual sales of treatment group"
label var expectedsales "Expected sales of control group"
label var sales "monthly sales (in millions)"
twoway line actualsales expectedsales sales if sales<100, sort
* saved as ExpectationsFigure5a
 restore

 preserve
cumul t_sales if group==1 &  existing==1 & t_operatefirm==1, equal gen(actualsales1)
cumul counterfactualsales if existing==1 & group==2, equal gen(expectedsales1)
keep if existing==1 
stack actualsales1 t_sales expectedsales1 counterfactualsales, into(cumsales sales)  clear
gen actualsales=cumsales if _stack==1
gen expectedsales=cumsales if _stack==2
replace sales=sales/1000000
sort sales
label var actualsales "Actual sales of treatment group"
label var expectedsales "Expected sales of control group"
label var sales "monthly sales (in millions)"
twoway line actualsales expectedsales sales if sales<60, sort
* saved as ExpectationsFigure5b
 restore
 
 
*** Figure 6: Comparison of Actual Distribution of Sales for Control Group to Counterfactual Distribution expected by treatment group had they lost

preserve
cumul t_sales if group==2 &  existing==0 & t_operatefirm==1, equal gen(actualsales1)
cumul counterfactualsales if existing==0 & group==1, equal gen(expectedsales1)
keep if existing==0 
stack actualsales1 t_sales expectedsales1 counterfactualsales, into(cumsales sales)  clear
gen actualsales=cumsales if _stack==1
gen expectedsales=cumsales if _stack==2
replace sales=sales/1000000
sort sales
label var actualsales "Actual sales of control group"
label var expectedsales "Expected sales of treatment group"
label var sales "monthly sales (in millions)"
twoway line actualsales expectedsales sales if sales<20, sort
* saved as ExpectationsFigure6a
 restore

 preserve
cumul t_sales if group==2 &  existing==1 & t_operatefirm==1, equal gen(actualsales1)
cumul counterfactualsales if existing==1 & group==1, equal gen(expectedsales1)
keep if existing==1 
stack actualsales1 t_sales expectedsales1 counterfactualsales, into(cumsales sales)  clear
gen actualsales=cumsales if _stack==1
gen expectedsales=cumsales if _stack==2
replace sales=sales/1000000
sort sales
label var actualsales "Actual sales of control group"
label var expectedsales "Expected sales of treatment group"
label var sales "monthly sales (in millions)"
twoway line actualsales expectedsales sales if sales<15, sort
* saved as ExpectationsFigure6b
 restore

 
******* Table 3:  Heterogeneity in Control Group New Applicant counterfactuals of the number of employees conditional on operating a businessRealized


* Digitspan recall
gen digitspan=3 if s_q13a==2
replace digitspan=4 if s_q13b==2
replace digitspan=5 if s_q13c==2
replace digitspan=6 if s_q13d==2
replace digitspan=7 if s_q13e==2
replace digitspan=8 if s_q13f==2
replace digitspan=9 if s_q13g==2
replace digitspan=10 if s_q13h==2
replace digitspan=11 if s_q13h==1
gen highdigit=digitspan>=7 & digitspan~=.
replace highdigit=. if digitspan==.

* randomization strata
egen strata=group(region existing female)
replace strata=. if experimentalsample~=1

*** By gender
drop female
gen female=sex=="Female"

***Females
sum counterfactualemp if existing==0 & group==2 & female==1, de
sum cfactemp_trim  if existing==0 & group==2 & female==1
sum t_totalemp if group==2 & counterfactualemp~=. & existing==0 & t_operatefirm==1 & female==1, de
areg t_totalemp1 assigntreat if experimentalsample==1 & existing==0 & t_operatefirm==1 & female==1 & counterfactualemp~=., a(strata) robust 

*** Males
sum counterfactualemp if existing==0 & group==2 & female==0, de
sum cfactemp_trim  if existing==0 & group==2 & female==0
sum t_totalemp if group==2 & counterfactualemp~=. & existing==0 & t_operatefirm==1 & female==0, de
areg t_totalemp1 assigntreat if experimentalsample==1 & existing==0 & t_operatefirm==1 & female==0, a(strata) robust 

*** By education
sum counterfactualemp if existing==0 & group==2 & university==1, de
sum cfactemp_trim  if existing==0 & group==2 & university==1
sum t_totalemp if group==2 & counterfactualemp~=. & existing==0 & t_operatefirm==1 & university==1, de
areg t_totalemp1 assigntreat if experimentalsample==1 & existing==0 & t_operatefirm==1 & university==1, a(strata) robust 

sum counterfactualemp if existing==0 & group==2 & university==0, de
sum cfactemp_trim  if existing==0 & group==2 & university==0
sum t_totalemp if group==2 & counterfactualemp~=. & existing==0 & t_operatefirm==1 & university==0, de
areg t_totalemp1 assigntreat if experimentalsample==1 & existing==0 & t_operatefirm==1 & university==0, a(strata) robust 

*** By digit span recall
sum counterfactualemp if existing==0 & group==2 & highdigit==1, de
sum cfactemp_trim  if existing==0 & group==2 & highdigit==1
sum t_totalemp if group==2 & counterfactualemp~=. & existing==0 & t_operatefirm==1 & highdigit==1, de
areg t_totalemp1 assigntreat if experimentalsample==1 & existing==0 & t_operatefirm==1 & highdigit==1, a(strata) robust 

sum counterfactualemp if existing==0 & group==2 & highdigit==0, de
sum cfactemp_trim  if existing==0 & group==2 & highdigit==0
sum t_totalemp if group==2 & counterfactualemp~=. & existing==0 & t_operatefirm==1 & highdigit==0, de
areg t_totalemp1 assigntreat if experimentalsample==1 & existing==0 & t_operatefirm==1 & highdigit==0, a(strata) robust 

**** By city
gen abujalagos=state_residence=="Lagos"|state_residence=="Abuja"
sum counterfactualemp if existing==0 & group==2 & abujalagos==1, de
sum cfactemp_trim  if existing==0 & group==2 & abujalagos==1
sum t_totalemp if group==2 & counterfactualemp~=. & existing==0 & t_operatefirm==1 & abujalagos==1, de
areg t_totalemp1 assigntreat if experimentalsample==1 & existing==0 & t_operatefirm==1 & abujalagos==1, a(strata) robust 

sum counterfactualemp if existing==0 & group==2 & abujalagos==0, de
sum cfactemp_trim  if existing==0 & group==2 & abujalagos==0
sum t_totalemp if group==2 & counterfactualemp~=. & existing==0 & t_operatefirm==1 & abujalagos==0, de
areg t_totalemp1 assigntreat if experimentalsample==1 & existing==0 & t_operatefirm==1 & abujalagos==0, a(strata) robust 




