/****************************************************************************** PROJECT: ERC SES DATASET READ IN FILE, PRODUCES ERC_SES_COMPLETE.DTA AUTHOR: NICOLA FOSTER DATE: 2 OCTOBER 2020 ******************************************************************************/ duplicates report survey_id lab var survey "0=Survey_1 1=Survey_2" * MAKES UNIQUE IDENTIFIER gen k="-" egen newID = concat(survey k survey_id) duplicates report newID tab survey lab var survey "0 survey1 1 survey2" tab sex_final survey, missing tab survey * the cough categories include cough_2w, cough_CAT (generated), cough_c, cough_clean, coughgt2w tab fever * seperates out the household id from NewID desc newID split newID, p("-") rename newID1 NewID_survey rename newID2 NewID_cluster rename newID3 NewID_HH rename newID4 NewID_ind ******************************************************************************** * TB CASE DEFINITIONS * generating the cough variable gen cough_cleannf = cough_clean gen coughgt2wnf = coughgt2w gen cough = 1 if cough_2w == 1 replace cough = 1 if cough_c == 1 & cough == . replace cough = 1 if cough_cleannf == 1 & cough == . replace cough = 1 if coughgt2wnf == 1 & cough == . replace cough = 1 if coughgt2wnf == 2 & cough == . replace cough = 1 if sputum_c == 1 & cough == . replace cough = 0 if cough == . tab cough survey if tbcase==1 ******************************************************************************* gen symptoms_any = 1 if fever==1 | night_sweats==1 | weight_loss==1 | cough==1 | blood_sputum_c==1 replace symptoms_any = 0 if fever==0 | night_sweats==0 | weight_loss==0 | cough==0 | blood_sputum_c==0 & symptoms_any==. replace symptoms_any = 0 if symptoms_any==. tab symptoms_any survey, missing * Subclinical TB defined as patients with microbiological confirmed TB but no cough tab cough survey if tbcase==1 ******************************************************************************* * first clean the asset variables * 2007 replace floor_clay_F1 =. if survey == 0 & floor_clay_F1 ==99 | floor_clay_F1 ==9 replace wood_cook_F1 =. if survey == 0 & wood_cook_F1 ==99 | wood_cook_F1 ==9 replace stereo_cassette_F1 =. if survey == 0 & stereo_cassette_F1 ==99 | stereo_cassette_F1 ==9 replace telly_F1 =. if survey == 0 & telly_F1 ==99 | telly_F1 ==9 replace motorbike_F1 =. if survey == 0 & motorbike_F1 ==99 | motorbike_F1 ==9 replace car_F1 =. if survey == 0 & car_F1 ==99 | car_F1 ==9 replace ethnic_F1 =. if survey == 0 & ethnic_F1 ==99 | ethnic_F1 ==9 * 2017 replace car_F1 =. if survey == 1 & car_F1 ==99 | car_F1 ==9 replace motorbike_F1 =. if survey == 1 & motorbike_F1 ==99 | motorbike_F1 ==9 replace refrigerator_F1 =. if survey == 1 & refrigerator_F1 ==99 | refrigerator_F1 ==9 replace colorTV_F1 =. if survey == 1 & colorTV_F1 ==99 | colorTV_F1 ==9 replace stereo_sys_F1 =. if survey == 1 & stereo_sys_F1 ==99 | stereo_sys_F1 ==9 replace computer_F1 =. if survey == 1 & computer_F1 ==99 | computer_F1 ==9 replace air_cond_F1 =. if survey == 1 & air_cond_F1 ==99 | air_cond_F1 ==9 replace washing_m_F1 =. if survey == 1 & washing_m_F1 ==99 | washing_m_F1 ==9 replace water_heater_F1 =. if survey == 1 & water_heater_F1 ==99 | water_heater_F1 ==9 replace ethnic_F1 =. if survey == 1 & ethnic_F1 ==99 | ethnic_F1 ==9 ****************************************************************************** * IMPUTATION * Cycles through all SES variables and imputes them for all HH members * for survey1 egen cluster_hh_id0 = concat(NewID_cluster k NewID_HH k NewID_ind) if survey==0 local dvs floor_clay_F1 wood_cook_F1 stereo_cassette_F1 telly_F1 motorbike_F1 car_F1 ethnic_F1 foreach DV in `dvs' { sort cluster_hh_id0 `DV' bysort cluster_hh_id0 (`DV') : replace `DV'= `DV'[_n-1] if missing(`DV') } * for survey 20 egen cluster_hh_id1 = concat(NewID_cluster k NewID_HH k NewID_ind) if survey==1 local dvs floor_clay_F1 wood_cook_F1 car_F1 motorbike_F1 refrigerator_F1 colorTV_F1 stereo_sys_F1 computer_F1 air_cond_F1 washing_m_F1 water_heater_F1 ethnic_F1 foreach DV in `dvs' { sort cluster_hh_id1 `DV' bysort cluster_hh_id1 (`DV') : replace `DV'= `DV'[_n-1] if missing(`DV') } ****************************************************************************** * PCA * survey variable is coded 0 for the 1st survey and 1 for the 2nd survey * (Option 1) PCA keep missings gen pca_ClayFloor_miss = floor_clay_F1 replace pca_ClayFloor_miss = . if pca_ClayFloor_miss == 99 replace pca_ClayFloor_miss = . if pca_ClayFloor_miss == 9 gen pca_WoodCook_miss = wood_cook_F1 replace pca_WoodCook_miss = . if pca_WoodCook_miss == 99 replace pca_WoodCook_miss = . if pca_WoodCook_miss == 9 * combines StereoCassette (survey 1) and StereoSystem (survey 2) gen pca_Stereo_miss = stereo_cassette_F1 replace pca_Stereo_miss = stereo_sys_F1 if survey == 1 replace pca_Stereo_miss = . if pca_Stereo_miss == 99 replace pca_Stereo_miss = . if pca_Stereo_miss == 9 * combines TV (survey 1) and ColorTV (survey 2) gen pca_TV_miss = telly_F1 replace pca_TV_miss = colorTV_F1 if survey == 1 replace pca_TV_miss = . if pca_TV_miss == 99 replace pca_TV_miss = . if pca_TV_miss == 9 gen pca_MotorBike_miss = motorbike_F1 replace pca_MotorBike_miss = . if pca_MotorBike_miss == 99 replace pca_MotorBike_miss = . if pca_MotorBike_miss == 9 gen pca_Car_miss = car_F1 replace pca_Car_miss = . if pca_Car_miss == 99 replace pca_Car_miss = . if pca_Car_miss == 9 gen pca_Fridge_miss = refrigerator_F1 replace pca_Fridge_miss = . if pca_Fridge_miss == 99 replace pca_Fridge_miss = . if pca_Fridge_miss == 9 gen pca_Computer_miss = computer_F1 replace pca_Computer_miss = . if pca_Computer_miss == 99 replace pca_Computer_miss = . if pca_Computer_miss == 9 gen pca_AirConditioner_miss = air_cond_F1 replace pca_AirConditioner_miss = . if pca_AirConditioner_miss == 99 replace pca_AirConditioner_miss = . if pca_AirConditioner_miss == 9 gen pca_WashingMachine_miss = washing_m_F1 replace pca_WashingMachine_miss = . if pca_WashingMachine_miss == 99 replace pca_WashingMachine_miss = . if pca_WashingMachine_miss == 9 gen pca_WaterHeater_miss = water_heater_F1 replace pca_WaterHeater_miss = . if pca_WaterHeater_miss == 99 replace pca_WaterHeater_miss = . if pca_WaterHeater_miss == 9 * reviewing the PCA data. Check that all ones relate to an improvement in wealth gen pca_FloorClay_inv = 1 if pca_ClayFloor_miss == 0 replace pca_FloorClay_inv = 0 if pca_ClayFloor_miss == 1 gen pca_WoodCook_inv = 1 if pca_WoodCook_miss == 0 replace pca_WoodCook_inv = 0 if pca_WoodCook_miss == 1 * review asset indices tab pca_FloorClay_inv survey tab pca_WoodCook_inv survey tab pca_Stereo_miss survey tab pca_TV_miss survey tab pca_MotorBike_miss survey tab pca_Car_miss survey tab pca_Fridge_miss survey tab pca_Computer_miss survey tab pca_AirConditioner_miss survey tab pca_WashingMachine_miss survey tab pca_WaterHeater_miss survey * PCA1: MAXIMISING SAMPLE SIZE ******************************************************************************* pca pca_ClayFloor_miss pca_WoodCook_miss pca_MotorBike_miss * pca_Stereo_miss pca_TV_miss predict pca1, score hist pca1, frequency xtile pca1_CAT = pca1, nq(4) tab pca1_CAT hist pca1_CAT, frequency * PCA2: SEPARATE ASSET INDICES FOR EACH SURVEY YEAR ******************************************************************************* pca pca_ClayFloor_miss pca_WoodCook_miss pca_Stereo_miss pca_TV_miss pca_MotorBike_miss pca_Car_miss if survey==0 predict pca2A, score hist pca2A, frequency xtile pca2A_CAT = pca2, nq(5) pca pca_ClayFloor_miss pca_WoodCook_miss pca_Stereo_miss pca_TV_miss pca_MotorBike_miss pca_Car_miss pca_Fridge_miss pca_Computer_miss pca_AirConditioner_miss pca_WashingMachine_miss pca_WaterHeater_miss pca_FloorClay_inv pca_WoodCook_inv if survey==1 predict pca2B, score xtile pca2B_CAT = pca2B, nq(5) gen pca2_CAT = pca2A_CAT replace pca2_CAT = pca2B_CAT if pca2_CAT == . * DIFFERENT TYPES OF TB DISEASE ******************************************************************************** histogram pca1_CAT if tbcase==1, graphregion(color(white)) discrete by (survey) ysc(r(0 0.5)) histogram pca2_CAT if tbcase==1, graphregion(color(white)) discrete by (survey) ysc(r(0 0.5)) tab tbsub histogram pca1_CAT if tbsub==1, discrete by (survey, total) ysc(r(0 0.5)) graphregion(color(white)) tab symptoms_any histogram pca1_CAT if symptoms_any==1, discrete by (survey, total) ysc(r(0 0.5)) graphregion(color(white)) * HOUSEHOLD LEVEL DATASET ******************************************************************************** gen awe_imp = . replace awe_imp = 2374.99 if pca1_CAT == 1 replace awe_imp = 2389.79 if pca1_CAT == 2 replace awe_imp = 2412.55 if pca1_CAT == 3 replace awe_imp = 2448.18 if pca1_CAT == 4 sum awe_imp