clear
clear matrix
set more off, perm
set mem 500m


/*******************************************************************************
*
* Replication of Tougher et al (2018)
* 	
* 	DATA MANAGEMENT 
*
*   Last updated 28/05/2018 by ST
*
*******************************************************************************/

/*******************************************************************************
* TABLE OF CONTENTS
*
* 1. APPENDING DATA SETS - MAIN DATA SET
* 2. VARIABLE CREATION - MAIN DATA SET 
*	2.1 Dummy variable set for birth quarter
*	2.2 Study arm (detailed) and period
*	2.3 Study arm (pooled controls) and period 
*	2.4 Household SES
*	2.5 Leads and lags
*	2.6 Indexes 
*	2.7 Rescaled outcomes
* 3. VARIABLE LABELS - MAIN DATA SET
*	3.1 Variable labels for individual outcomes
*	3.2 Variable labels for families
*	3.3 Variable labels for indexes 
*	3.4 Variable labels for controls
* 4. SAVING DATA SET - MAIN
* 5. APPENDING DATA SETS - ALL PREGNANCIES DATA
* 6. VARIABLE CREATION - ALL PREGNANCIES DATA
* 	6.1 Dummy variable set for birth quarter
*	6.2 Study arm (detailed) and period
*	6.3 Study arm (pooled controls) and period 
*	6.4 Household SES 
*	6.5 Indicator for woman's earliest pregnancy
*	6.6 Indexes
*	6.7 Rescaled outcomes
* 7. VARIABLE LABELS - ALL PREGNANCIES
*	7.1 Variable labels for individual outcomes
*	7.2 Variable labels for families
*	7.3 Variable labels for indexes
* 8. SAVING DATA SET - ALL PREGNANCIES
*
*******************************************************************************/

cd "$master"


* SECTION 1 - APPENDING DATA SETS - MAIN DATA SET
/* 	Data for this evaluation were collected in two rounds of data collection. Data 
	sets from each round of data collection are appended here.
	
	The data sets appended here have one observation per eligible woman. Survey 
	questions related to ANC and delivery care pertain to the woman's more recent 
	delivery. These data are used for all of the analysis, except for outcomes 
	related to neonatal mortality (family PP3). The data required for analysis 
	of the outcomes in the PP3 family are appended in Section 5 of this .do file.*/   
	
use "build\output\master_data.dta", clear

append using "build\output\master_data_end.dta", generate(round)	
	la var round "Round of data collection"
	recode round (0=1) (1=2)

* SECTION 2 - VARIABLE CREATION - MAIN DATA SET
/* 	Variables related to individual study outcomes and sample characteristics were 
	created in seperate data management .do files for each round of data collection, 
	as one of the outputs of this project was cleaned data sets of each data 
	collection round.  
	
	Variables created here required the combined (i.e. appended) data sets. */
	
** 2.1 Dummy variable set for birth quarter
tab birth_qrt, gen(birth_qrt_)

forvalues i=1(1)14{
	local lab "Birth occured in quarter `i' of combined recall period"
	display "`lab'"
	la var birth_qrt_`i' "`lab'"
	}

** 2.2 Study arm (detailed) and period 

/*	Variables in this section indicate the type of cluster (i.e. intervention, 
	internal control or external control) and whether the birth occured before/after 
	social franchising started in the intervention cluster of the matched triplet. */

*** 2.2.1 Main analysis  
egen arm_period = group(after_ITT study_arm), label
	la var arm_period "Study arm and period (base/end)"

*** 2.2.2 Robustness check - sky placement 
egen arm_period_rob1 = group(after_ITT truearm) if triplet!=20 & triplet!=25 ///
	& triplet!=27 & triplet!=28 & triplet!=32 & triplet!=34 & triplet!=35 & triplet!=44 ///
	& triplet!=46 & triplet!=47 & triplet!=49, label

	la var arm_period_rob1 "Study arm and period (base/end) - Robustness chk1"

*** 2.2.3 Robustness check - Internal control only
egen arm_period_rob3 = group(after_ITT study_arm) if study_arm!=1, label

	la var arm_period_rob3 "Study arm and period (base/end) - Robustness chk3"

*** 2.2.4 Robustness check - 
egen arm_period_rob4 = group(after_ITT study_arm) if study_arm!=2, label

	la var arm_period_rob4 "Study arm and period (base/end) - Robustness chk4"

*** 2.2.5 Robustness check - self report
egen arm_period_rob5 = group(after_selfbrand truearm) if triplet!=20 & triplet!=25 ///
	& triplet!=27 & triplet!=28 & triplet!=32 & triplet!=34 & triplet!=35 & triplet!=44 ///
	& triplet!=46 & triplet!=47 & triplet!=49, label

	la var arm_period_rob5 "Study arm and period (base/end) - Robustness chk5"

** 2.3 Study arm (pooled controls) and period
/*	Variables in this section indicate the type of cluster (i.e. intervention or 
	control) and whether the birth occured before/after social franchising started 
	in the intervention cluster of the matched triplet. */

*** 2.3.1 Main analysis
egen interv_period = group(after_ITT intervention), label

	la var interv_period "intervention and period (base/end)"

recode interv_period (2=1) (1=2) (3=4) (4=3)
	la def interv_periodLabs 1 "Intervention - before" 2 "Control - before" ///
		3 "Intervention - after" 4 "Control - after"
	la val interv_period interv_periodLabs

*** 2.3.2 Robustness check	
egen interv_period_rob1 = group(after_ITT trueintervention) if triplet!=20 & triplet!=25 ///
	& triplet!=27 & triplet!=28 & triplet!=32 & triplet!=34 & triplet!=35 & triplet!=44 ///
	& triplet!=46 & triplet!=47 & triplet!=49, label
la var interv_period_rob1 "intervention and period (base/end) - Robustness chk1"
	recode interv_period_rob1 (2=1) (1=2) (3=4) (4=3)
		la val interv_period_rob1 interv_periodLabs

*** 2.3.3 Robustness check	
egen interv_period_rob3 = group(after_ITT intervention) if study_arm!=1, label
	la var interv_period_rob3 "intervention and period (base/end) - Robustness chk3"
	recode interv_period_rob3 (2=1) (1=2) (3=4) (4=3)
	la val interv_period_rob3 interv_periodLabs

*** 2.3.4 Robustness check	
egen interv_period_rob4 = group(after_ITT intervention) if study_arm!=2, label
	la var interv_period_rob4 "intervention and period (base/end) - Robustness chk4"
	recode interv_period_rob4 (2=1) (1=2) (3=4) (4=3)
	la val interv_period_rob4 interv_periodLabs

*** 2.3.5 Robustness check	
egen interv_period_rob5 = group(after_selfbrand trueintervention) if triplet!=20 & triplet!=25 ///
	& triplet!=27 & triplet!=28 & triplet!=32 & triplet!=34 & triplet!=35 & triplet!=44 ///
	& triplet!=46 & triplet!=47 & triplet!=49, label
	la var interv_period_rob5 "intervention and period (base/end) - Robustness chk5"
	recode interv_period_rob5 (2=1) (1=2) (3=4) (4=3)
	la val interv_period_rob5 interv_periodLabs


** 2.4 Household SES
*** 2.4.1 Computation of asset weights with principal component analysis 
pca hh_mattress hh_pressurecook hh_thermos hh_chair hh_bed hh_table hh_almirah /// 
hh_fan hh_radio hh_tv hh_dvd hh_sewingmachine hh_mobile hh_telephone hh_computer /// 
hh_fridge hh_clock hh_bicycle hh_motorcycle hh_animalcart hh_car hh_waterpump /// 
hh_thresher hh_tractor hh_cows hh_donkey hh_goat hh_sheep hh_chickens hh_electricity /// 
hh_rooms toilet1 toilet2 toilet3 toilet4 toilet5 toilet6 toilet7 toilet8 toilet9 toilet10 /// 
water1 water2 water3 water4 water5 water6 water7 water8 water9 hh_construction1 ///
hh_construction2 hh_construction3 fuel_elec fuel_lpg fuel_bio fuel_kerosene ///
fuel_coal fuel_wood fuel_straw fuel_crop fuel_dung hh_internet hh_nets, component(1)

*** 2.4.2 Assign asset score to each respondent
predict wealthscore

*** 2.4.3 Generate quintiles
xtile quintile=wealthscore, nq(5)
	la var quintile "Household wealth quintile"

tab quintile, gen (hh_ses)
	la var hh_ses1 "Poorest quintile"
	la var hh_ses2 "2nd quintile"
	la var hh_ses3 "3rd quintile"
	la var hh_ses4 "4th quintile"
	la var hh_ses5 "5th quintile"
	
** 2.5 Leads and lags
gen first_date = 211
	la var first_date "Quarter of first interview in the dataset"	
	format first_date %tq
	
gen last_date = 226
	la var last_date "Quarter of last interview in the dataset"
	format last_date %tq

gen relative_time=birth_qrt-start_whp_training_ITT_qrt
	la var relative_time "Quarters between birth and training" 
	/* 	Note: We used the date that of provider training occured in a particular 
		cluster as the start of social franchising in the main analysis. 
		
		Relative_time is positive when birth occured AFTER training; and negative 
		when the birth occured BEFORE the training */
	
foreach i of numlist 1/8 {
	gen F_`i'=(relative_time>=(-1*`i'))
	gen L_`i'=(relative_time>=`i')
	replace F_`i'=. if birth_qrt+0>last_date 
	replace F_`i'=0 if intervention==0 // Added this to make vars 0 for control areas
	replace L_`i'=0 if birth_qrt-`i'<first_date 
	replace L_`i'=0 if intervention==0 // Added this to make vars 0 for control areas
}	
la var F_1 "Birth occured 1 quarter before training or later"
la var F_2 "Birth occured 2 quarters before training or later"
la var F_3 "Birth occured 3 quarters before training or later"
la var F_4 "Birth occured 4 quarters before training or later"
la var F_5 "Birth occured 5 quarters before training or later"
la var F_6 "Birth occured 6 quarters before training or later"
la var F_7 "Birth occured 7 quarters before training or later"
la var F_8 "Birth occured 8 quarters before training or later"

la var L_1 "Birth occured 1 quarter after training or later"
la var L_2 "Birth occured 2 quarters after training or later"
la var L_3 "Birth occured 3 quarters after training or later"
la var L_4 "Birth occured 4 quarters after training or later"
la var L_5 "Birth occured 5 quarters after training or later"
la var L_6 "Birth occured 6 quarters after training or later"
la var L_7 "Birth occured 7 quarters after training or later"
la var L_8 "Birth occured 8 quarters after training or later"

** 2.6 Indexes 
	/* One of the two methos used in the evaluation to adust for multiple hypothesis 
	testing was to compute an index for each of the 14 families of outcomes. The indexes 
	are created here. */
	

*** 2.6.1 - Main analysis 

**** 2.6.1.1 Standardizing each outcome
	
	/* First, individual outcomes are standardized with respect to the pooled control 
	group at baseline (i.e. before the franchised clinic in the matched 
	cluster rec'd training  */

foreach var of global outcomes_other {
	qui sum `var' if intervention==0 & after_ITT==0
	local mean = r(mean)
	local sd = r(sd)
	gen `var'_z =(`var'-`mean')/`sd'
	}

***ST NOTE: ORIGINAL SYNTAX, GO BACK TO THIS IF ABOVE MAKES SMALL DIFFERENCE
/*foreach var of global outcomes_other {
	qui sum `var' if intervention==0 & after_ITT==0
	gen `var'_mean=r(mean)
	gen `var'_sd=r(sd)
	gen `var'_z =(`var'-`var'_mean)/`var'_sd
	}
*/

**** 2.6.1.2 Averaging z-scores within families

	/* Next, for each family , we take the average of the z-scores of the 
	outcomes within that family. */ 

foreach x of global hypoth_z{
	egen ind_`x'= rowmean($`x')
	}
	  
**** 2.6.1.3 Creating the indexes
	/* 	Finally, we standardize the variables created above with reference to the 
		control group at baseline */
foreach ind of global indices {
	qui sum `ind' if intervention==0 & after_ITT==0
	local mean=r(mean)
	local sd=r(sd)
	gen `ind'_final =(`ind'-`mean')/`sd'
	}

*** 2.6.2 Robustness check (presence of Sky provider)

**** 2.6.2.1 Standardizing each outcome 
foreach var of global outcomes_other {
	qui sum `var' if trueintervention==0 & after_ITT==0
	local mean = r(mean)
	local sd =r(sd)
	gen `var'_z_rob1 =(`var'-`mean')/`sd'
	}

**** 2.6.2.2 Averaging z-scores within families  
foreach x of global hypoth_z_rob1{
	egen ind_`x'= rowmean($`x')
	}
	  
**** 2.6.2.3 Creating the index
foreach ind of global indices_rob1 {
	qui sum `ind' if trueintervention==0 & after_ITT==0
	local mean = r(mean)
	local sd = r(sd)
	gen `ind'_final =(`ind'-`mean')/`sd'
	}
	
*** 2.6.3 Robustness check (internal control only)
**** 2.6.3.1 Standardizing each outcome 
foreach var of global outcomes_other {
	qui sum `var' if intervention==0 & after_ITT==0 & study_arm!=1
	local mean = r(mean)
	local sd = r(sd)
	gen `var'_z_rob3 =(`var'-`mean')/`sd'
	}

**** 2.6.3.2 Averaging z-scores within each family	  
foreach x of global hypoth_z_rob3{
	egen ind_`x'= rowmean($`x')
	}
	  
**** 2.6.3.3 Creating the indexes
foreach ind of global indices_rob3 {
	qui sum `ind' if intervention==0 & after_ITT==0 & study_arm!=1
	local mean = r(mean)
	local sd = r(sd)
	gen `ind'_final =(`ind'-`mean')/`sd'
	}

*** 2.6.4  Robustness check (external control only)
**** 2.6.4.1 Standardizing each outcome 
foreach var of global outcomes_other {
	qui sum `var' if intervention==0 & after_ITT==0 & study_arm!=2
	local mean = r(mean)
	local sd = r(sd)
	gen `var'_z_rob4 =(`var'-`mean')/`sd'
	}

**** 2.6.4.2 Averaging z-scores within families 
foreach x of global hypoth_z_rob4{
	egen ind_`x'= rowmean($`x')
	}
	  
**** 2.6.4.3  Creating the indexes
foreach ind of global indices_rob4 {
	qui sum `ind' if intervention==0 & after_ITT==0 & study_arm!=2
	local mean = r(mean)
	local sd = r(sd)
	gen `ind'_final =(`ind'-`mean')/`sd'
	}

*** 2.6.5 Robustness check (start date for branding reported by the provider)
**** 2.6.5.1 Standardizing each outcome 
foreach var of global outcomes_other {
	qui sum `var' if trueintervention==0 & after_selfbrand==0
	local mean = r(mean)
	local sd = r(sd)
	gen `var'_z_rob5 =(`var'-`mean')/`sd'
	}

**** 2.6.5.2 Averaging z-scores within families  
foreach x of global hypoth_z_rob5{
	egen ind_`x'= rowmean($`x')
	}
	  
**** 2.6.5.3 Creating the indexes
foreach ind of global indices_rob5 {
	qui sum `ind' if trueintervention==0 & after_selfbrand==0
	local mean = r(mean)
	local sd = r(sd)
	gen `ind'_final =(`ind'-`mean')/`sd'
	}
** 2.7. Rescaled outcomes
/* Most outcomes are binary, with the exception of the following:
 
	-Number of antenatal care consultations (visits) 
	-Knowledge of pregnancy complications (0 to 1)
	-Knowlege of delivery complications (0 to 1)
	-Birth preparedness index (0 to 1)
	-Out-of-pocket spending on delivery care (INR)
	-Birthweight (kg)
	-Neonatal mortality (per 1000)
	-One day mortality (per 1000)
	-Birthweight (kg)

In the main tables, we show the percent of women who had the outcome in the intevention 
and control areas before and after the start of social franchising in the intervention
triplet member. For expendiency in programming, percentages were calculated as mean*100. 
Treatment effects of the binary outcomes were similarly scaled (all but pp3). Therfore, 
to simplify the production of tables, we similalry multiplied means/treatment effects 
of the non-binary by 100. The recoding below scales the non-binary outcomes, so 
that means, treatment effects are in the correct scale.*/

replace anc1_numvis =  anc1_numvis/100
replace anc3_knowpregcomp = anc3_knowpregcomp/100 
replace anc3_knowdelcomp = anc3_knowdelcomp/100 
replace anc3_birthprep = anc3_knowdelcomp/100
replace del6_delspend = del6_delspend/100
replace pp3_nmr = pp3_nmr*1000 
replace pp3_onedaymort = pp3_onedaymort*1000

 
* SECTION 3. VARIABLE LABELS - MAIN DATA SET

** 3.1. Variable labels for individual outcomes
	/* Variable labels added here become the row headings for the results tables */

*** ANC1
la var anc1_3visits "Received at least 3 ANC vists (%)"
la var anc1_tri "Received ANC visit in first trimester (%)"
la var anc1_numvis "Number of ANC consultations (visits)"
la var anc1_ashavis "Received visit from ASHA (%)"

*** ANC2
la var anc2_tet "Fully immunised with tetanus toxoid (%)"
la var anc2_irongiv "Received iron supplementation (%)"
la var anc2_irontake "Iron supplementation for 100 days  (%)"
la var anc2_syphres "Received test results for syphilis (%)"
la var anc2_abdexam "Abdominal examination during ANC (%)"
la var anc2_intwrm "Received a drug for intestinal worms (%)"
la var anc2_mal "Received a drug to prevent malaria (%)"
la var anc2_mulbirth "Multiple birth pregnancy detected during ANC (%)"
la var anc2_content "ANC content of care score of six items"
	
*** ANC3
la var anc3_knowpregcomp "Mother knowledge of pregnancy complications index (0 to 1)" 
la var anc3_knowdelcomp "Mother knowledge of pregnancy complications index (0 to 1)"
la var anc3_birthprep "Birth preparedness index (0 to 1)"

*** DEL1
la var del1_fac "Gave birth in a health facility (%)"
la var del1_sba "Gave birth with a doctor, nurse or midwife (%)"
la var del1_csec "Had a caesarean section (%)"

*** DEL2
la var del2_glov "Delivery attendant used gloves (%)"
la var del2_soap "Delivery attendant washed hands with soap (%)"
la var del2_bp "Woman had her BP measured (%)"
la var del2_mob "Mobility during labour (%)"
la var del2_oralfluids "Oral fluids during labour (%)"
la var del2_hrbaby "Heart rate of baby monitored with intermittent or continuous auscultation (%)"
la var del2_nasg "Use of anti-shock garment (%)"

*** DEL3
la var del3_shave "Shaved pubic hair  (%)"
la var del3_enema "Enema given (%)"
la var del3_birthpos "Lithotomy position during labour (%)" 
la var del3_ivlab "Intravenous fluids during labour (%)"

*** DEL4
la var del4_urincath "Urinary catheter (%)"
la var del4_epidural "Pain control by epidural analgesia (%)"
la var del4_oxyaug "Oxytocin augmentation (%)"
la var del4_episiotomy "Episiotomy (%)"

*** DEL5
la var del5_labsup_alt "No support during labour (%)"
la var del5_noconsent "Medical procedure performed without consent (%)"
la var del5_shout "Shouted, scolded or humiliated by health worker (%)"
la var del5_slap "Slapped, pinched or hit by health worker (%)"
la var del5_priv "Gave birth with privacy (%)" 
la var del5_priv_alt "Gave birth without privacy (%)" 
la var del5_refcare "Refused care for inability to pay (%)" 
la var del5_held "Kept in facility for inability to pay (%)"
la var del5_disrespect "Felt disrespected or abused during facility stay (%)"

*** DEL6	
la var del6_delspend "Out-of-pocket spending on delivery care (INR)" 
la var del6_borrow "Borrowed money to pay for delivery care (%)"
la var del6_hhdebt "Household in debt to pay for delivery care (%)"
la var del6_jsy_alt "Did not receive JSY cash (%)"

*** PP1
la var pp1_woman48hrcheck "Received postpartum care within 48hr of birth (%)" 
la var pp1_baby48hrcheck "Newborn received postnatal care within 48hr of birth (%)"

*** PP2
la var pp2_cleancordcare "Clean cord care (%)"
la var pp2_thermcare "Thermal care (%)"
la var pp2_babyweighed "Baby weighed at birth (%)"
la var pp2_birthregistered "Baby registered and received certificate (%)"

*** PP3
la var pp3_nmr_alt "Neonatal survival (%)"
la var pp3_nmr "Neonatal mortality (%)"
la var pp3_onedaymort_alt  "One day survival (%)"
la var pp3_onedaymort  "One day mortality (%)"
la var pp3_birthweight "Birth weight (KG)"

*** PP4
la var pp4_breastfeed "Immediate breastfeeding within 1hr of birth (%)"
la var pp4_colostrum "Colostrum given to baby (%)"
la var pp4_breastfeed3days "Exclusive breastfeeding for 3 days (%)"

*** PP5
la var pp5_modfp "Current modern contraceptive use (%)"

** 3.2. Labels for families of outcomes
	/* Variable labels added here become the row headings for the results tables */

gen anc1 = .
	la var anc1 "Antenatal care utilisation"

gen anc2 = .
	la var anc2 "Antenatal content of care"

gen anc3 =.
	la var anc3 "Antenatal knowledge and preparedness"

gen del1 =.
	la var del1 "Delivery care utilisation"

gen del2 = .
	la var del2 "Recommended delivery care practices"

gen del3 = . 
	la var del3 "Harmful or ineffective delivery care practices"

gen del4 =.
	la var del4 "Delivery care practices frequently over-used"

gen del5 = .
	la var del5 "Disrespect and abuse"

gen del6 = .
	la var del6 "Financial consequences"

gen pp1 = .
	la var pp1 "Postpartum care"

gen pp2 = .
	la var pp2 "Newborn content of care"

gen pp3 = .
	la var pp3 "Neonatal health"

gen pp4 = .
	la var pp4 "Breastfeeding"

gen pp5 = .
	la var pp5 "Family planning"

** 3.3 Labelling indexes 

*** 3.3.1 Main analysis 
la var ind_anc1_z_final "ANC utilisation"
la var ind_anc2_z_final "ANC content of care"
la var ind_anc3_z_final "ANC knowledge and preparedness"
la var ind_del1_z_final "Delivery care utilisation"
la var ind_del2_z_final "Recommended delivery care practices"
la var ind_del3_z_final "Harmful or ineffective delivery care practices"
la var ind_del4_z_final "Delivery care practices frequently over used"
la var ind_del5_z_final "Disrespect and abuse"
la var ind_del6_z_final "Financial consequences"
la var ind_pp1_z_final "Postpartum care"
la var ind_pp2_z_final "Newborn content of care"
la var ind_pp3_z_final "Neonatal health"
la var ind_pp4_z_final "Breastfeeding"
la var ind_pp5_z_final "Family planning" 

*** 3.3.2 Robustness checks
la var ind_anc1_z_rob1_final "ANC utilisation"
la var ind_anc2_z_rob1_final "ANC content of care"
la var ind_anc3_z_rob1_final "ANC knowledge and preparedness"
la var ind_del1_z_rob1_final "Delivery care utilisation"
la var ind_del2_z_rob1_final "Recommended delivery care practices"
la var ind_del3_z_rob1_final "Harmful or ineffective delivery care practices"
la var ind_del4_z_rob1_final "Delivery care practices frequently over used"
la var ind_del5_z_rob1_final "Disrespect and abuse"
la var ind_del6_z_rob1_final "Financial consequences"
la var ind_pp1_z_rob1_final "Postpartum care"
la var ind_pp2_z_rob1_final "Newborn content of care"
la var ind_pp3_z_rob1_final "Neonatal health"
la var ind_pp4_z_rob1_final "Breastfeeding"
la var ind_pp5_z_rob1_final "Family planning" 

la var ind_anc1_z_rob3_final "ANC utilisation"
la var ind_anc2_z_rob3_final "ANC content of care"
la var ind_anc3_z_rob3_final "ANC knowledge and preparedness"
la var ind_del1_z_rob3_final "Delivery care utilisation"
la var ind_del2_z_rob3_final "Recommended delivery care practices"
la var ind_del3_z_rob3_final "Harmful or ineffective delivery care practices"
la var ind_del4_z_rob3_final "Delivery care practices frequently over used"
la var ind_del5_z_rob3_final "Disrespect and abuse"
la var ind_del6_z_rob3_final "Financial consequences"
la var ind_pp1_z_rob3_final "Postpartum care"
la var ind_pp2_z_rob3_final "Newborn content of care"
la var ind_pp3_z_rob3_final "Neonatal health"
la var ind_pp4_z_rob3_final "Breastfeeding"
la var ind_pp5_z_rob3_final "Family planning"

la var ind_anc1_z_rob4_final "ANC utilisation"
la var ind_anc2_z_rob4_final "ANC content of care"
la var ind_anc3_z_rob4_final "ANC knowledge and preparedness"
la var ind_del1_z_rob4_final "Delivery care utilisation"
la var ind_del2_z_rob4_final "Recommended delivery care practices"
la var ind_del3_z_rob4_final "Harmful or ineffective delivery care practices"
la var ind_del4_z_rob4_final "Delivery care practices frequently over used"
la var ind_del5_z_rob4_final "Disrespect and abuse"
la var ind_del6_z_rob4_final "Financaial consequences"
la var ind_pp1_z_rob4_final "Postpartum care"
la var ind_pp2_z_rob4_final "Newborn content of care"
la var ind_pp3_z_rob4_final "Neonatal health"
la var ind_pp4_z_rob4_final "Breastfeeding"
la var ind_pp5_z_rob4_final "Family planning"

la var ind_anc1_z_rob5_final "ANC utilisation"
la var ind_anc2_z_rob5_final "ANC content of care"
la var ind_anc3_z_rob5_final "ANC knowledge and preparedness"
la var ind_del1_z_rob5_final "Delivery care utilisation"
la var ind_del2_z_rob5_final "Recommended delivery care practices"
la var ind_del3_z_rob5_final "Harmful or ineffective delivery care practices"
la var ind_del4_z_rob5_final "Delivery care practices frequently over used"
la var ind_del5_z_rob5_final "Disrespect and abuse"
la var ind_del6_z_rob5_final "Financial consequences"
la var ind_pp1_z_rob5_final "Postpartum care"
la var ind_pp2_z_rob5_final "Newborn content of care"
la var ind_pp3_z_rob5_final "Neonatal health"
la var ind_pp4_z_rob5_final "Breastfeeding"
la var ind_pp5_z_rob5_final "Family planning"

** 3.4 Variable labels for controls
*** 3.4.1 labels for control variables
	/* Variable labels added here become the row headings for the results tables */
la var hh_bpl "Yes (%)" 
la var hh_apl "No (%)" 
la var hh_urban "Urban (%)" 
la var hh_rural "Rural (%)"
la var religion_hindu "Hindu (%)"
la var religion_other "Other (%)" 
la var hh_gc "General caste (%)"
la var hh_sc "Scheduled caste (%)" 
la var hh_st "Scheduled tribe (%)" 
la var hh_obc "Other backward caste (%)"
la var mat_ed_none "No education (%)"
la var mat_ed_primary "Some primary (%)"
la var mat_ed_secondary "Some secondary (%)"
la var mat_ed_above "Secondary or above (%)" 
la var parity1 "First birth (%)"
la var parity2 "Second birth (%)"
la var parity3 "Third birth (%)"
la var parity4 "Fourth birth (%)"
la var parity5 "Fifth birth or more (%)"
la var hh_mulbirth "Yes (%)"
la var hh_singbirth "No (%)"
la var qrt_since_birth "Quarter since birth (mean (sd))"
la var hh_ses1 "Poorest (%)"
la var hh_ses2 "Second (%)" 
la var hh_ses3 "Third (%)"
la var hh_ses4 "Fourth (%)"
la var hh_ses5 "Least poor (%)"

*** 3.4.2 labels for groups of controls
	/* Variable labels added here become the row headings groups of controls for 
		the results tables */

gen bpl_card = . 	
	la var bpl_card "Household has below poverty-line card"
	
gen residence = . 
	la var residence "Residence"
	
gen religion = .
	la var religion "Religion"
	
gen caste = .
	la var caste "Caste"
	
gen mat_educ = . 
	la var mat_educ "Maternal education"

gen ses = .
	la var ses "Wealth quintile"
	
gen parity = .
	la var parity "Parity"
	
gen multi = .
	la var multi "Multiple birth"
	

* SECTION 4 - SAVING MAIN DATA SET
save "build\output\master_data_complete.dta", replace


* SECTION 5 - APPENDING DATA SETS - ALL BIRTHS DATA SET
/* 	Data for this evaluation were collected in two rounds of data collection. Data 
	sets from each round of data collection are appended here.
	
	In addition to the detailed data collected on the eligible woman's most recent,
	delivery the survey also collected data on all of the woman's pregnancies since 
	2010. The data sets appended here have one observation per pregnancy. These 
	data were used for the analysis of indicators related to neonatal mortality 
	(family PP3). */
	

use "build\output\master_data_all_births.dta", clear
		
append using "build\output\master_data_end_all_births.dta", generate(round)	
	la var round "Round of data collection"
	recode round (0=1) (1=2)

* SECTION 6 - VARIABLE CREATION - MAIN DATA SET
/* 	Variables related to individual study outcomes and sample characteristics were 
	created in seperate data management .do files for each round of data collection, 
	as one of the outputs of this project was cleaned data sets of each data 
	collection round.  
	
	Variables created here required the combined (i.e. appended) data sets. */
	
** 6.1 Dummy variable set for birth quarter
tab birth_qrt, gen(birth_qrt_)
forvalues i=1(1)14{
	local lab "Birth occured in quarter `i' of combined recall period"
	display "`lab'"
	la var birth_qrt_`i' "`lab'"
	}

** 6.2 Study arm (detailed) and period 
/*	Variables in this section indicate the type of cluster (i.e. intervention, 
	internal control or external control) and whether the birth occured before/after 
	social franchising started in the intervention cluster of the matched triplet. */

*** 6.2.1 Main analysis  
egen arm_period = group(after_ITT study_arm), label
	la var arm_period "Study arm and period (base/end)"

*** 6.2.2 Robustness check
egen arm_period_rob1 = group(after_ITT truearm), label
	la var arm_period_rob1 "Study arm and period (base/end) - Robustness chk1"

*** 6.2.3 Robustness check
egen arm_period_rob3 = group(after_ITT study_arm) if study_arm!=1, label
	la var arm_period_rob3 "Study arm and period (base/end) - Robustness chk3"

*** 6.2.4 Robustness check
egen arm_period_rob4 = group(after_ITT study_arm) if study_arm!=2, label
	la var arm_period_rob4 "Study arm and period (base/end) - Robustness chk4"

*** 6.2.5 Robustness check
egen arm_period_rob5 = group(after_selfbrand truearm), label
	la var arm_period_rob5 "Study arm and period (base/end) - Robustness chk5"


** 6.3 Study arm (pooled) and period 
/*	Variables in this section indicate the type of cluster (i.e. intervention or 
	control) and whether the birth occured before/after social franchising started 
	in the intervention cluster of the matched triplet. */

*** 6.3.1 Main analysis
egen interv_period = group(after_ITT intervention), label
	la var interv_period "intervention and period (base/end)"
	recode interv_period (2=1) (1=2) (3=4) (4=3)
	la val interv_period interv_periodLabs

*** 6.3.2 Robustness checks
egen interv_period_rob1 = group(after_ITT trueintervention), label
	la var interv_period_rob1 "intervention and period (base/end) - Robustness chk1"
	recode interv_period_rob1 (2=1) (1=2) (3=4) (4=3)
	la val interv_period_rob1 interv_periodLabs

*** 6.3.3 Robustness checks
egen interv_period_rob3 = group(after_ITT intervention) if study_arm!=1, label
	la var interv_period_rob3 "intervention and period (base/end) - Robustness chk3"
	recode interv_period_rob3 (2=1) (1=2) (3=4) (4=3)
	la val interv_period_rob3 interv_periodLabs

*** 6.3.4 Robustness checks
egen interv_period_rob4 = group(after_ITT intervention) if study_arm!=2, label
	la var interv_period_rob4 "intervention and period (base/end) - Robustness chk4"
	recode interv_period_rob4 (2=1) (1=2) (3=4) (4=3)
	la val interv_period_rob4 interv_periodLabs

*** 6.3.5 Robustness checks
egen interv_period_rob5 = group(after_selfbrand trueintervention), label
	la var interv_period_rob5 "intervention and period (base/end) - Robustness chk5"
	recode interv_period_rob5 (2=1) (1=2) (3=4) (4=3)
	la val interv_period_rob5 interv_periodLabs

** 6.4 Creating a variable to identify the woman's earliest pregnancy in the recall period
sort uid2 birth_date
by uid2: gen nUid2=1 if _n==1
	la var nUid2 "First pregnancy in recall period"
	
** 6.5 Household SES
*** 6.5.1 Computation of asset weights with principal component analysi
pca hh_mattress hh_pressurecook hh_thermos hh_chair hh_bed hh_table hh_almirah /// 
hh_fan hh_radio hh_tv hh_dvd hh_sewingmachine hh_mobile hh_telephone hh_computer /// 
hh_fridge hh_clock hh_bicycle hh_motorcycle hh_animalcart hh_car hh_waterpump /// 
hh_thresher hh_tractor hh_cows hh_donkey hh_goat hh_sheep hh_chickens hh_electricity /// 
hh_rooms toilet1 toilet2 toilet3 toilet4 toilet5 toilet6 toilet7 toilet8 toilet9 toilet10 /// 
water1 water2 water3 water4 water5 water6 water7 water8 water9 hh_construction1 ///
hh_construction2 hh_construction3 fuel_elec fuel_lpg fuel_bio fuel_kerosene ///
fuel_coal fuel_wood fuel_straw fuel_crop fuel_dung hh_internet hh_nets if nUid2==1, component(1)


*** 6.5.2 Assign asset score to each respondent
predict wealthscore if nUid2==1

*** 6.5.3 Generate quintiles

**** 6.5.3.1 Create variable
xtile quintile=wealthscore, nq(5)
	la var quintile "Household wealth quintile"

**** 6.5.3.2 Copying values of wealthscore/quintile to the woman's other births
sort uid2 birth_date
by uid2: replace wealthscore = wealthscore[1]
by uid2: replace quintile = quintile[1]

**** 6.5.3.3 Creating indicator variables for each quintile
tab quintile, gen (hh_ses)
	la var hh_ses1 "Poorest quintile"
	la var hh_ses2 "2nd quintile"
	la var hh_ses3 "3rd quintile"
	la var hh_ses4 "4th quintile"
	la var hh_ses5 "5th quintile"

	
** 6.6 Indexes 
	/* One of the two methos used in the evaluation to adust for multiple hypothesis 
	testing was to compute an index for each of the 14 families of outcomes. The indexes 
	are created here. */
	
*** 6.6.1 - Main analysis 

**** 6.6.1.1 Standardizing each outcome
	
	/* First, individual outcomes are standardized with respect to the pooled control 
	group at baseline (i.e. before the franchised clinic in the matched 
	cluster rec'd training  */

foreach var of global pp3_other{
	qui sum `var' if intervention==0 & after_ITT==0
	local mean=r(mean)
	local sd=r(sd)
	gen `var'_z =(`var'-`mean')/`sd'
	}	

**** 6.6.1.2 Averaging z-scores within the family

	/* Next, for each family , we take the average of the z-scores of the 
	outcomes within that family. */ 
egen ind_pp3_z= rowmean($pp3_z)
	  
**** 6.6.1.3 Creating the index
qui sum ind_pp3_z if intervention==0 & after_ITT==0
local mean = r(mean)
local sd = r(sd)
gen ind_pp3_z_final =(ind_pp3_z-`mean')/`sd'

	
*** 6.6.2 - Robustness check
**** 6.6.2.1 Standardizing each outcome 
foreach var of global pp3_other{
	sum `var' if trueintervention==0 & after_ITT==0
	local mean = r(mean)
	local sd = r(sd)
	gen `var'_z_rob1 =(`var'-`mean')/`sd'
	}

**** 6.6.2.2 Averaging z-scores within the family  
egen ind_pp3_z_rob1= rowmean($pp3_z_rob1)
	  
**** 6.6.2.3 Creating the index
qui sum ind_pp3_z_rob1 if trueintervention==0 & after_ITT==0
local mean = r(mean)
local sd = r(sd)
gen ind_pp3_z_rob1_final =(ind_pp3_z_rob1-`mean')/`sd'

	  
*** 6.6.3 - Robustness check
**** 6.6.3.1 Standardizing each outcome 
foreach var of global pp3_other{
	sum `var' if intervention==0 & after_ITT==0 & study_arm!=1
	local mean = r(mean)
	local sd = r(sd)
	gen `var'_z_rob3 =(`var'-`mean')/`sd'
	}

**** 6.6.3.2 Averaging z-scores within the family 
egen ind_pp3_z_rob3= rowmean($pp3_z_rob3)
	  
**** 6.6.3.3 Creating the index
qui sum ind_pp3_z_rob3 if intervention==0 & after_ITT==0 & study_arm!=1
local mean = r(mean)
local sd = r(sd)
gen ind_pp3_z_rob3_final =(ind_pp3_z_rob3-`mean')/`sd'


*** 6.6.4 - Robustness check
**** 6.6.4.1 Standardizing each outcome 
foreach var of global pp3_other{
	sum `var' if intervention==0 & after_ITT==0 & study_arm!=2
	local mean = r(mean)
	local gen `var'_sd_rob4=r(sd)
	gen `var'_z_rob4 =(`var'-`mean')/`sd'
	}
	
**** 6.6.4.2 Averaging z-scores within the family 
egen ind_pp3_z_rob4= rowmean($pp3_z_rob4)
	  
**** 6.6.4.3 Creating the index
qui sum ind_pp3_z_rob4 if intervention==0 & after_ITT==0 & study_arm!=2
local mean = r(mean)
local sd = r(sd)
gen ind_pp3_z_rob4_final =(ind_pp3_z_rob4-`mean')/`sd'


*** 6.6.5 - Robustness check
**** 6.6.5.1 Standardizing each outcome 
foreach var of global pp3_other{
	sum `var' if trueintervention==0 & after_selfbrand==0
	local mean = r(mean)
	local sd = r(sd)
	gen `var'_z_rob5 =(`var'-`mean')/`sd'
	}


**** 6.6.5.2 Averaging z-scores within the family 
egen ind_pp3_z_rob5= rowmean($pp3_z_rob5)
	  
**** 6.6.5.3 Creating the index
qui sum ind_pp3_z_rob5 if trueintervention==0 & after_selfbrand==0
local mean = r(mean)
local sd = r(sd)
gen ind_pp3_z_rob5_final =(ind_pp3_z_rob5-`mean')/`sd'

** 6.7. Rescaled outcomes
/* Most outcomes are binary, with the exception of the following:
 
	-Number of antenatal care consultations (visits) 
	-Knowledge of pregnancy complications (0 to 1)
	-Knowlege of delivery complications (0 to 1)
	-Birth preparedness index (0 to 1)
	-Out-of-pocket spending on delivery care (INR)
	-Birthweight (kg)
	-Neonatal mortality (per 1000)
	-One day mortality (per 1000)
	-Birthweight (kg)

In the main tables, we show the percent of women who had the outcome in the intevention 
and control areas before and after the start of social franchising in the intervention
triplet member. For expendiency in programming, percentages were calculated as mean*100. 
Treatment effects of the binary outcomes were similarly scaled (all but pp3). Therfore, 
to simplify the production of tables, we similalry multiplied means/treatment effects 
of the non-binary by 100. The recoding below scales the non-binary outcomes, so 
that means, treatment effects are in the correct scale.*/

replace anc1_numvis =  anc1_numvis/100
replace anc3_knowpregcomp = anc3_knowpregcomp/100 
replace anc3_knowdelcomp = anc3_knowdelcomp/100 
replace anc3_birthprep = anc3_knowdelcomp/100
replace del6_delspend = del6_delspend/100
replace pp3_nmr = pp3_nmr*1000 
replace pp3_onedaymort = pp3_onedaymort*1000


* SECTION 7. VARIABLE LABELS - ALL PREGNANCIES DATA SET

** 7.1 Variable labels for individual outcome
la var pp3_nmr_alt "Neonatal survival (%)"
la var pp3_nmr "Neonatal mortality (%)"
la var pp3_onedaymort_alt  "One day survival (%)"
la var pp3_onedaymort  "One day mortality (%)"
la var pp3_birthweight "Birth weight (KG)"

** 7.2 Variable labels for families
gen pp3 = .
	la var pp3 "Neonatal health"

** 7.3 Variable labels for indexes 

la var ind_pp3_z_final "Neonatal health"

la var ind_pp3_z_rob1_final "Neonatal health"

la var ind_pp3_z_rob3_final "Neonatal health"

la var ind_pp3_z_rob4_final "Neonatal health"

la var ind_pp3_z_rob5_final "Neonatal health"


* SECTION 8. SAVING MAIN DATA SET
save "build\output\master_data_complete_all_births.dta", replace


