/*Program 1: Compiling all translated survey responses into one working dataset -Download responses from Typeform.com -Import responses to Stata with no variable header and save raw data as Stata file -Seperate the languages into seperate Stata files and save as seperate Excel files -Upload each language Excel file to Google Sheets to create common variable names, confirm presence of all variables, and translate variables where necesseary. -Download each Google Sheet as Excel File and import each language file into Stata -Rename variables with common names and append all languages together -Save working doc. for use in Program 2 : "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\CCC analysis\Datasets by language\all languages appended.dta" 1. Delete all documents in C:\Users\rache\Desktop\CCC Analysis and all XLSX files in C:\Users\rache\Downloads 2. Unblock all "export" lines in this Do-File 3. Clear google drive CCC-analysis folder */ //Downloading all Typeform responses - converting into Stata file - saving to LSHTM OneDrive cd "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets" clear import excel "C:\Users\rache\Downloads\responses (4).xlsx", sheet("tm6bWbP2") save "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analys> is\Built Datasets\raw.dta", clear //Seperating the languages into seperate Stata files //En use raw.dta, clear sort B keep if B=="Hello" save "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\Languages\English.dta", replace //Es use raw.dta, clear sort B keep if B=="Hola" save "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\Languages\Spanish.dta", replace //Fr use raw.dta, clear sort B keep if B=="Bonjour" save "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\Languages\French.dta", replace //Sh use raw.dta, clear sort B keep if B=="Niaje" save "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\Languages\Sheng.dta", replace //Hi use raw.dta, clear sort B keep if B=="नमस्ते" save "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\Languages\Hindi.dta", replace //Ur use raw.dta, clear sort B keep if B=="ہیلو" save "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\Languages\Urdu.dta", replace //It use raw.dta, clear sort B keep if B=="Ciao" save "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\Languages\Italian.dta", replace //Bn use raw.dta, clear sort B keep if B=="হ্যালো" save "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\Languages\Bangla.dta", replace //Ar use raw.dta, clear sort B keep if B=="أهلا" save "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\Languages\Arabic.dta", replace use raw.dta, clear sort B keep if B=="你好" save "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\Languages\Chinese.dta", replace //Creating Excel files by language for translation - renaming and consolidating 'Live' with 'Live - other' /*make sure to delete all CCC Analysis files before running 1. Check that each language file has all variables at this stage (add in blank columns where needed) */ cd "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\Languages" //En use "English.dta", clear list //verify that variables match list below drop BE-TU rename (A B C D E F G H I J K L M N O P Q R S T U V W X Y Z AA AB AC AD AE AF AG AH AI AJ AK AL AM AN AO AP AQ AR AS AT AU AV AW AX AY AZ BA BB BC BD TV TW TX TY TZ UA UB UC UD) (ID lang eligibility consent1 consent2 consent3 consent4 consent5 q FAQs age preg live1 live2 best_a1 best_a2 best_a3 best_a4 best_a5 best_a6 best_a7 best_a8 best_a9 best_a10 best_a11 best_a12 worst_a1 worst_a2 worst_a3 worst_a4 worst_a5 worst_a6 worst_a7 worst_a8 worst_a9 worst_a10 worst_a11 worst_a12 worst_a13 gen_city_comments air_quality pollution_causes_a1 pollution_causes_a2 pollution_causes_a3 pollution_causes_a4 pollution_causes_a5 pollution_causes_a6 pollution_causes_a7 pollution_causes_a8 pollution_causes_a9 gen_air_comments nicer_city nicer_comments mayor_ans1 mayor_ans2 final_comments utm_source utm_medium utm_campaign variable_0 utm_term utm_content Start_Date Submit_Date Network_ID) replace live1 = live2 if live1=="" drop live2 rename live1 live save "English.dta", replace export excel ID-Network_ID using "English.xls", replace //Es use "Spanish.dta", clear list drop C-BD //drop En survey drop DG-TU //drop all other surveys rename (A B BE BF BG BH BI BJ BK BL BM BN BO BP BQ BR BS BT BU BV BW BX BY BZ CA CB CC CD CE CF CG CH CI CJ CK CL CM CN CO CP CQ CR CS CT CU CV CW CX CY CZ DA DB DC DD DE DF TV TW TX TY TZ UA UB UC UD) (ID lang eligibility consent1 consent2 consent3 consent4 consent5 q FAQs age preg live1 live2 best_a1 best_a2 best_a3 best_a4 best_a5 best_a6 best_a7 best_a8 best_a9 best_a10 best_a11 best_a12 worst_a1 worst_a2 worst_a3 worst_a4 worst_a5 worst_a6 worst_a7 worst_a8 worst_a9 worst_a10 worst_a11 worst_a12 worst_a13 gen_city_comments air_quality pollution_causes_a1 pollution_causes_a2 pollution_causes_a3 pollution_causes_a4 pollution_causes_a5 pollution_causes_a6 pollution_causes_a7 pollution_causes_a8 pollution_causes_a9 gen_air_comments nicer_city nicer_comments mayor_ans1 mayor_ans2 final_comments utm_source utm_medium utm_campaign utm_term utm_content variable_0 Start_Date Submit_Date Network_ID) replace live1 = live2 if live1=="" drop live2 rename live1 live save "Spanish.dta", replace export excel ID-Network_ID using "Spanish.xls", replace //Fr use "French.dta", clear list drop C-DF //drop En Es drop FI-TU //drop all other surveys rename (A B DG DH DI DJ DK DL DM DN DO DP DQ DR DS DT DU DV DW DX DY DZ EA EB EC ED EE EF EG EH EI EJ EK EL EM EN EO EP EQ ER ES ET EU EV EW EX EY EZ FA FB FC FD FE FF FG FH TV TW TX TY TZ UA UB UC UD) (ID lang eligibility consent1 consent2 consent3 consent4 consent5 q FAQs age preg live1 live2 best_a1 best_a2 best_a3 best_a4 best_a5 best_a6 best_a7 best_a8 best_a9 best_a10 best_a11 best_a12 worst_a1 worst_a2 worst_a3 worst_a4 worst_a5 worst_a6 worst_a7 worst_a8 worst_a9 worst_a10 worst_a11 worst_a12 worst_a13 gen_city_comments air_quality pollution_causes_a1 pollution_causes_a2 pollution_causes_a3 pollution_causes_a4 pollution_causes_a5 pollution_causes_a6 pollution_causes_a7 pollution_causes_a8 pollution_causes_a9 gen_air_comments nicer_city nicer_comments mayor_ans1 mayor_ans2 final_comments utm_source utm_medium utm_campaign utm_term utm_content variable_0 Start_Date Submit_Date Network_ID) replace live1 = live2 if live1=="" drop live2 rename live1 live save "French.dta", replace export excel ID-Network_ID using "French.xls", replace //Sh //will have to add in GU as air_quality if anyone answers with this use "Sheng.dta", clear list drop C-FH //drop all other surveys drop HJ-TU //drop all other surveys rename (A B FI FJ FK FL FM FN FO FP FQ FR FS FT FU FV FW FX FY FZ GA GB GC GD GE GF GG GH GI GJ GK GL GM GN GO GP GQ GR GS GT GU GV GW GX GY GZ HA HB HC HD HE HF HG HH HI TV TW TX TY TZ UA UB UC UD) (ID lang eligibility consent1 consent2 consent3 consent4 consent5 q FAQs age preg live1 live2 best_a1 best_a2 best_a3 best_a4 best_a5 best_a6 best_a7 best_a8 best_a9 best_a10 best_a11 best_a12 worst_a1 worst_a2 worst_a3 worst_a4 worst_a5 worst_a6 worst_a7 worst_a8 worst_a9 worst_a10 worst_a11 worst_a12 worst_a13 gen_city_comments pollution_causes_a1 pollution_causes_a2 pollution_causes_a3 pollution_causes_a4 pollution_causes_a5 pollution_causes_a6 pollution_causes_a7 pollution_causes_a8 pollution_causes_a9 gen_air_comments nicer_city nicer_comments mayor_ans1 mayor_ans2 final_comments utm_source utm_medium utm_campaign utm_term utm_content variable_0 Start_Date Submit_Date Network_ID) replace live1 = live2 if live1=="" drop live2 rename live1 live save "Sheng.dta", replace export excel ID-Network_ID using "Sheng.xls", replace //Hi use "Hindi.dta", clear list drop C-HI //drop all other surveys drop JL-TU //drop all other surveys rename (A B HJ HK HL HM HN HO HP HQ HR HS HT HU HV HW HX HY HZ IA IB IC ID IE IF IG IH II IJ IK IL IM IN IO IP IQ IR IS IT IU IV IW IX IY IZ JA JB JC JD JE JF JG JH JI JJ JK TV TW TX TY TZ UA UB UC UD) (ID lang eligibility consent1 consent2 consent3 consent4 consent5 q FAQs age preg live1 live2 best_a1 best_a2 best_a3 best_a4 best_a5 best_a6 best_a7 best_a8 best_a9 best_a10 best_a11 best_a12 worst_a1 worst_a2 worst_a3 worst_a4 worst_a5 worst_a6 worst_a7 worst_a8 worst_a9 worst_a10 worst_a11 worst_a12 worst_a13 gen_city_comments air_quality pollution_causes_a1 pollution_causes_a2 pollution_causes_a3 pollution_causes_a4 pollution_causes_a5 pollution_causes_a6 pollution_causes_a7 pollution_causes_a8 pollution_causes_a9 gen_air_comments nicer_city nicer_comments mayor_ans1 mayor_ans2 final_comments utm_source utm_medium utm_campaign utm_term utm_content variable_0 Start_Date Submit_Date Network_ID) replace live1 = live2 if live1=="" drop live2 rename live1 live save "Hindi.dta", replace export excel ID-Network_ID using "Hindi.xls", replace //Ur use "Urdu.dta", clear list drop C-JK //drop all other surveys drop LN-TU //drop all other surveys rename (A B JL JM JN JO JP JQ JR JS JT JU JV JW JX JY JZ KA KB KC KD KE KF KG KH KI KJ KK KL KM KN KO KP KQ KR KS KT KU KV KW KX KY KZ LA LB LC LD LE LF LG LH LI LJ LK LL LM TV TW TX TY TZ UA UB UC UD) (ID lang eligibility consent1 consent2 consent3 consent4 consent5 q FAQs age preg live1 live2 best_a1 best_a2 best_a3 best_a4 best_a5 best_a6 best_a7 best_a8 best_a9 best_a10 best_a11 best_a12 worst_a1 worst_a2 worst_a3 worst_a4 worst_a5 worst_a6 worst_a7 worst_a8 worst_a9 worst_a10 worst_a11 worst_a12 worst_a13 gen_city_comments air_quality pollution_causes_a1 pollution_causes_a2 pollution_causes_a3 pollution_causes_a4 pollution_causes_a5 pollution_causes_a6 pollution_causes_a7 pollution_causes_a8 pollution_causes_a9 gen_air_comments nicer_city nicer_comments mayor_ans1 mayor_ans2 final_comments utm_source utm_medium utm_campaign utm_term utm_content variable_0 Start_Date Submit_Date Network_ID) replace live1 = live2 if live1=="" drop live2 rename live1 live save "Urdu.dta", replace export excel ID-Network_ID using "Urdu.xls", replace //It use "Italian.dta", clear list drop C-LM //drop all other surveys drop NP-TU //drop all other surveys rename (A B LN LO LP LQ LR LS LT LU LV LW LX LY LZ MA MB MC MD ME MF MG MH MI MJ MK ML MM MN MO MP MQ MR MS MT MU MV MW MX MY MZ NA NB NC ND NE NF NG NH NI NJ NK NL NM NN NO TV TW TX TY TZ UA UB UC UD ) (ID lang eligibility consent1 consent2 consent3 consent4 consent5 q FAQs age preg live1 live2 best_a1 best_a2 best_a3 best_a4 best_a5 best_a6 best_a7 best_a8 best_a9 best_a10 best_a11 best_a12 worst_a1 worst_a2 worst_a3 worst_a4 worst_a5 worst_a6 worst_a7 worst_a8 worst_a9 worst_a10 worst_a11 worst_a12 worst_a13 gen_city_comments air_quality pollution_causes_a1 pollution_causes_a2 pollution_causes_a3 pollution_causes_a4 pollution_causes_a5 pollution_causes_a6 pollution_causes_a7 pollution_causes_a8 pollution_causes_a9 gen_air_comments nicer_city nicer_comments mayor_ans1 mayor_ans2 final_comments utm_source utm_medium utm_campaign utm_term utm_content variable_0 Start_Date Submit_Date Network_ID) replace live1 = live2 if live1=="" drop live2 rename live1 live save "Italian.dta", replace export excel ID-Network_ID using "Italian.xls", replace //Bn use "Bangla.dta", clear list drop C-NO //drop all other surveys drop PR-TU //drop all other surveys rename (A B NP NQ NR NS NT NU NV NW NX NY NZ OA OB OC OD OE OF OG OH OI OJ OK OL OM ON OO OP OQ OR OS OT OU OV OW OX OY OZ PA PB PC PD PE PF PG PH PI PJ PK PL PM PN PO PP PQ TV TW TX TY TZ UA UB UC UD ) (ID lang eligibility consent1 consent2 consent3 consent4 consent5 q FAQs age preg live1 live2 best_a1 best_a2 best_a3 best_a4 best_a5 best_a6 best_a7 best_a8 best_a9 best_a10 best_a11 best_a12 worst_a1 worst_a2 worst_a3 worst_a4 worst_a5 worst_a6 worst_a7 worst_a8 worst_a9 worst_a10 worst_a11 worst_a12 worst_a13 gen_city_comments air_quality pollution_causes_a1 pollution_causes_a2 pollution_causes_a3 pollution_causes_a4 pollution_causes_a5 pollution_causes_a6 pollution_causes_a7 pollution_causes_a8 pollution_causes_a9 gen_air_comments nicer_city nicer_comments mayor_ans1 mayor_ans2 final_comments utm_source utm_medium utm_campaign utm_term utm_content variable_0 Start_Date Submit_Date Network_ID) replace live1 = live2 if live1=="" drop live2 rename live1 live save "Bangla.dta", replace export excel ID-Network_ID using "Bangla.xls", replace //Ar use "Arabic.dta", clear list drop C-PQ //drop all other surveys drop RU-TU //drop all other surveys rename (A B PR PS PT PU PV PW PX PY PZ QA QB QC QD QE QF QG QH QI QJ QK QL QM QN QO QP QQ QR QS QT QU QV QW QX QY QZ RA RB RC RD RE RF RG RH RI RJ RK RL RM RN RO RP RQ RR RS RT TV TW TX TY TZ UA UB UC UD ) (ID lang eligibility consent1 consent2 consent3 consent4 consent5 q FAQs age preg live1 live2 best_a1 best_a2 best_a3 best_a4 best_a5 best_a6 best_a7 best_a8 best_a9 best_a10 best_a11 best_a12 worst_a1 worst_a2 worst_a3 worst_a4 worst_a5 worst_a6 worst_a7 worst_a8 worst_a9 worst_a10 worst_a11 worst_a12 worst_a13 gen_city_comments air_quality choice_1 pollution_causes_a1 pollution_causes_a2 pollution_causes_a3 pollution_causes_a4 pollution_causes_a5 pollution_causes_a6 pollution_causes_a7 pollution_causes_a8 pollution_causes_a9 gen_air_comments nicer_city nicer_comments mayor_ans1 mayor_ans2 final_comments utm_source utm_medium utm_campaign utm_term utm_content variable_0 Start_Date Submit_Date Network_ID) replace live1 = live2 if live1=="" drop live2 rename live1 live save "Arabic.dta", replace export excel ID-Network_ID using "Arabic.xls", replace //Zh use "Chinese.dta", clear list drop C-RS //drop all other surveys rename (A B RT RU RV RW RX RY RZ SA SB SC SD SE SF SG SH SI SJ SK SL SM SN SO SP SQ SR SS ST SU SV SW SX SY SZ TA TB TC TD TE TF TG TH TI TJ TK TL TM TN TO TP TQ TR TS TT TU TV TW TX TY TZ UA UB UC UD ) (ID lang eligibility consent1 consent2 consent3 consent4 consent5 consent6 q age preg live1 live2 best_a1 best_a2 best_a3 best_a4 best_a5 best_a6 best_a7 best_a8 best_a9 best_a10 best_a11 best_a12 worst_a1 worst_a2 worst_a3 worst_a4 worst_a5 worst_a6 worst_a7 worst_a8 worst_a9 worst_a10 worst_a11 worst_a12 worst_a13 gen_city_comments air_quality pollution_causes_a1 pollution_causes_a2 pollution_causes_a3 pollution_causes_a4 pollution_causes_a5 pollution_causes_a6 pollution_causes_a7 pollution_causes_a8 pollution_causes_a9 gen_air_comments nicer_city nicer_comments mayor_ans1 mayor_ans2 final_comments utm_source utm_medium utm_campaign utm_term utm_content variable_0 Start_Date Submit_Date Network_ID) replace live1 = live2 if live1=="" drop live2 rename live1 live save "Chinese.dta", replace export excel ID-Network_ID using "Chinese.xls", replace */ /*Variables that need to be translated in each excel dataset: Upload each language to Google Sheets and replace each column with the translated following (retain variable names): 1. To each language google sheet: add row 2 from https://docs.google.com/spreadsheets/d/12xpkgkrK8uW4EPK97eUPHuba12-AHEfJYGuGmAL3OEE/edit#gid=0 2. add column, and the equation: =GOOGLETRANSLATE(B32, "en", "it") in the following locations live (add M) best_a12 (add after Z) worst_a13 (add after AN) gen_city_comments (add after AP) pollution_causes_a9 (add after BA) gen_air_comments (add after BC) nicer_comments (add after BF) mayor_ans1 (add after BH) mayor_ans2 (add after BJ) final_comments (add after BD) 3. Copy paste translated column as special>"values only" 3. Delete original columns for each variable above 4. Find and replace "#VALUE!" with "" 5. Delete Row one variable names 5. Download each language sheet and import to Stata 6.Save as .dta for appending: Notes: *For Sheng, had to add a column for "other" for the 'worst things' question - no one had submitted any response for that. *For urdu - some people responded in Hindi, I only translated the above variables into Hindi, so in the qual any untranslated script will need to be manually translated. */ import excel "C:\Users\rache\Downloads\English.xlsx", sheet("Sheet1") clear rename A-BL (ID lang eligibility consent1 consent2 consent3 consent4 consent5 q FAQs age preg live best_a1 best_a2 best_a3 best_a4 best_a5 best_a6 best_a7 best_a8 best_a9 best_a10 best_a11 best_a12 worst_a1 worst_a2 worst_a3 worst_a4 worst_a5 worst_a6 worst_a7 worst_a8 worst_a9 worst_a10 worst_a11 worst_a12 worst_a13 gen_city_comments air_quality pollution_causes_a1 pollution_causes_a2 pollution_causes_a3 pollution_causes_a4 pollution_causes_a5 pollution_causes_a6 pollution_causes_a7 pollution_causes_a8 pollution_causes_a9 gen_air_comments nicer_city nicer_comments mayor_ans1 mayor_ans2 final_comments utm_source utm_medium utm_campaign utm_term utm_content variable_0 Start_Date Submit_Date Network_ID) save "English.dta", replace import excel "C:\Users\rache\Downloads\Spanish.xlsx", sheet("Sheet1") clear rename A-BL (ID lang eligibility consent1 consent2 consent3 consent4 consent5 q FAQs age preg live best_a1 best_a2 best_a3 best_a4 best_a5 best_a6 best_a7 best_a8 best_a9 best_a10 best_a11 best_a12 worst_a1 worst_a2 worst_a3 worst_a4 worst_a5 worst_a6 worst_a7 worst_a8 worst_a9 worst_a10 worst_a11 worst_a12 worst_a13 gen_city_comments air_quality pollution_causes_a1 pollution_causes_a2 pollution_causes_a3 pollution_causes_a4 pollution_causes_a5 pollution_causes_a6 pollution_causes_a7 pollution_causes_a8 pollution_causes_a9 gen_air_comments nicer_city nicer_comments mayor_ans1 mayor_ans2 final_comments utm_source utm_medium utm_campaign utm_term utm_content variable_0 Start_Date Submit_Date Network_ID) save "Spanish.dta", replace import excel "C:\Users\rache\Downloads\French.xlsx", sheet("Sheet1") clear rename A-BL (ID lang eligibility consent1 consent2 consent3 consent4 consent5 q FAQs age preg live best_a1 best_a2 best_a3 best_a4 best_a5 best_a6 best_a7 best_a8 best_a9 best_a10 best_a11 best_a12 worst_a1 worst_a2 worst_a3 worst_a4 worst_a5 worst_a6 worst_a7 worst_a8 worst_a9 worst_a10 worst_a11 worst_a12 worst_a13 gen_city_comments air_quality pollution_causes_a1 pollution_causes_a2 pollution_causes_a3 pollution_causes_a4 pollution_causes_a5 pollution_causes_a6 pollution_causes_a7 pollution_causes_a8 pollution_causes_a9 gen_air_comments nicer_city nicer_comments mayor_ans1 mayor_ans2 final_comments utm_source utm_medium utm_campaign utm_term utm_content variable_0 Start_Date Submit_Date Network_ID) save "French.dta", replace import excel "C:\Users\rache\Downloads\Sheng.xlsx", sheet("Sheet1") clear rename A-BL (ID lang eligibility consent1 consent2 consent3 consent4 consent5 q FAQs age preg live best_a1 best_a2 best_a3 best_a4 best_a5 best_a6 best_a7 best_a8 best_a9 best_a10 best_a11 best_a12 worst_a1 worst_a2 worst_a3 worst_a4 worst_a5 worst_a6 worst_a7 worst_a8 worst_a9 worst_a10 worst_a11 worst_a12 worst_a13 gen_city_comments air_quality pollution_causes_a1 pollution_causes_a2 pollution_causes_a3 pollution_causes_a4 pollution_causes_a5 pollution_causes_a6 pollution_causes_a7 pollution_causes_a8 pollution_causes_a9 gen_air_comments nicer_city nicer_comments mayor_ans1 mayor_ans2 final_comments utm_source utm_medium utm_campaign utm_term utm_content variable_0 Start_Date Submit_Date Network_ID) save "Sheng.dta", replace import excel "C:\Users\rache\Downloads\Hindi.xlsx", sheet("Sheet1") clear rename A-BL (ID lang eligibility consent1 consent2 consent3 consent4 consent5 q FAQs age preg live best_a1 best_a2 best_a3 best_a4 best_a5 best_a6 best_a7 best_a8 best_a9 best_a10 best_a11 best_a12 worst_a1 worst_a2 worst_a3 worst_a4 worst_a5 worst_a6 worst_a7 worst_a8 worst_a9 worst_a10 worst_a11 worst_a12 worst_a13 gen_city_comments air_quality pollution_causes_a1 pollution_causes_a2 pollution_causes_a3 pollution_causes_a4 pollution_causes_a5 pollution_causes_a6 pollution_causes_a7 pollution_causes_a8 pollution_causes_a9 gen_air_comments nicer_city nicer_comments mayor_ans1 mayor_ans2 final_comments utm_source utm_medium utm_campaign utm_term utm_content variable_0 Start_Date Submit_Date Network_ID) save "Hindi.dta", replace import excel "C:\Users\rache\Downloads\Urdu.xlsx", sheet("Sheet1") clear rename A-BL (ID lang eligibility consent1 consent2 consent3 consent4 consent5 q FAQs age preg live best_a1 best_a2 best_a3 best_a4 best_a5 best_a6 best_a7 best_a8 best_a9 best_a10 best_a11 best_a12 worst_a1 worst_a2 worst_a3 worst_a4 worst_a5 worst_a6 worst_a7 worst_a8 worst_a9 worst_a10 worst_a11 worst_a12 worst_a13 gen_city_comments air_quality pollution_causes_a1 pollution_causes_a2 pollution_causes_a3 pollution_causes_a4 pollution_causes_a5 pollution_causes_a6 pollution_causes_a7 pollution_causes_a8 pollution_causes_a9 gen_air_comments nicer_city nicer_comments mayor_ans1 mayor_ans2 final_comments utm_source utm_medium utm_campaign utm_term utm_content variable_0 Start_Date Submit_Date Network_ID) save "Urdu.dta", replace import excel "C:\Users\rache\Downloads\Italian.xlsx", sheet("Sheet1") clear rename A-BL (ID lang eligibility consent1 consent2 consent3 consent4 consent5 q FAQs age preg live best_a1 best_a2 best_a3 best_a4 best_a5 best_a6 best_a7 best_a8 best_a9 best_a10 best_a11 best_a12 worst_a1 worst_a2 worst_a3 worst_a4 worst_a5 worst_a6 worst_a7 worst_a8 worst_a9 worst_a10 worst_a11 worst_a12 worst_a13 gen_city_comments air_quality pollution_causes_a1 pollution_causes_a2 pollution_causes_a3 pollution_causes_a4 pollution_causes_a5 pollution_causes_a6 pollution_causes_a7 pollution_causes_a8 pollution_causes_a9 gen_air_comments nicer_city nicer_comments mayor_ans1 mayor_ans2 final_comments utm_source utm_medium utm_campaign utm_term utm_content variable_0 Start_Date Submit_Date Network_ID) save "Italian.dta", replace import excel "C:\Users\rache\Downloads\Bangla.xlsx", sheet("Sheet1") clear rename A-BL (ID lang eligibility consent1 consent2 consent3 consent4 consent5 q FAQs age preg live best_a1 best_a2 best_a3 best_a4 best_a5 best_a6 best_a7 best_a8 best_a9 best_a10 best_a11 best_a12 worst_a1 worst_a2 worst_a3 worst_a4 worst_a5 worst_a6 worst_a7 worst_a8 worst_a9 worst_a10 worst_a11 worst_a12 worst_a13 gen_city_comments air_quality pollution_causes_a1 pollution_causes_a2 pollution_causes_a3 pollution_causes_a4 pollution_causes_a5 pollution_causes_a6 pollution_causes_a7 pollution_causes_a8 pollution_causes_a9 gen_air_comments nicer_city nicer_comments mayor_ans1 mayor_ans2 final_comments utm_source utm_medium utm_campaign utm_term utm_content variable_0 Start_Date Submit_Date Network_ID) save "Bangla.dta", replace import excel "C:\Users\rache\Downloads\Arabic.xlsx", sheet("Sheet1") clear rename A-BL (ID lang eligibility consent1 consent2 consent3 consent4 consent5 q FAQs age preg live best_a1 best_a2 best_a3 best_a4 best_a5 best_a6 best_a7 best_a8 best_a9 best_a10 best_a11 best_a12 worst_a1 worst_a2 worst_a3 worst_a4 worst_a5 worst_a6 worst_a7 worst_a8 worst_a9 worst_a10 worst_a11 worst_a12 worst_a13 gen_city_comments air_quality pollution_causes_a1 pollution_causes_a2 pollution_causes_a3 pollution_causes_a4 pollution_causes_a5 pollution_causes_a6 pollution_causes_a7 pollution_causes_a8 pollution_causes_a9 gen_air_comments nicer_city nicer_comments mayor_ans1 mayor_ans2 final_comments utm_source utm_medium utm_campaign utm_term utm_content variable_0 Start_Date Submit_Date Network_ID) save "Arabic.dta", replace import excel "C:\Users\rache\Downloads\Chinese.xlsx", sheet("Sheet1") clear rename A-BL (ID lang eligibility consent1 consent2 consent3 consent4 consent5 q FAQs age preg live best_a1 best_a2 best_a3 best_a4 best_a5 best_a6 best_a7 best_a8 best_a9 best_a10 best_a11 best_a12 worst_a1 worst_a2 worst_a3 worst_a4 worst_a5 worst_a6 worst_a7 worst_a8 worst_a9 worst_a10 worst_a11 worst_a12 worst_a13 gen_city_comments air_quality pollution_causes_a1 pollution_causes_a2 pollution_causes_a3 pollution_causes_a4 pollution_causes_a5 pollution_causes_a6 pollution_causes_a7 pollution_causes_a8 pollution_causes_a9 gen_air_comments nicer_city nicer_comments mayor_ans1 mayor_ans2 final_comments utm_source utm_medium utm_campaign utm_term utm_content variable_0 Start_Date Submit_Date Network_ID) save "Chinese.dta", replace //appending and work on full document /*Check that each is in string formatting tostring pollution_causes_a9, gen(pollution_causes_a10) drop pollution_causes_a9 rename pollution_causes_a10 pollution_causes_a9*/ use English.dta, clear append using Spanish.dta, force append using French.dta, force append using Sheng.dta, force append using Hindi.dta, force append using Urdu.dta, force append using Italian.dta, force append using Bangla.dta, force append using Arabic.dta, force append using Chinese.dta, force drop if ID=="" drop utm_term save "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\1_languages appended.dta", replace **************************************FINAL FILE n=5,158 use "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\1_languages appended.dta", clear clear /*Program 2: -Generalize all demographic variables (age buckets, live: town and region) -Create eligible/consenting dataset and ineligible/nonconsenting dataset -Remove all outlier data (age) */ cd "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets" use "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\1_all languages appended.dta", clear //Consolidating demographics variables //Pregnant replace preg="pregnant" if strpos(preg, "Yes") replace preg="pregnant" if strpos(preg, "sí") replace preg="pregnant" if strpos(preg, "sì") replace preg="pregnant" if strpos(preg, "Oui") replace preg="pregnant" if strpos(preg, "हाँ") replace preg="pregnant" if strpos(preg, "جی ہاں") replace preg="pregnant" if strpos(preg, "হ্যাঁ") replace preg="pregnant" if strpos(preg, "نعم") replace preg="pregnant" if strpos(preg, "是") replace preg="not pregnant" if strpos(preg, "No") replace preg="not pregnant" if strpos(preg, "no") replace preg="not pregnant" if strpos(preg, "Non") replace preg="not pregnant" if strpos(preg, "नहीं") replace preg="not pregnant" if strpos(preg, "نہیں") replace preg="not pregnant" if strpos(preg, "না") replace preg="not pregnant" if strpos(preg, "رقم") replace preg="not pregnant" if strpos(preg, "否") replace preg="Not asked" if preg=="" tab preg //Age tab age gen age_real = real(age) drop age rename age_real age replace age=37 if age==1984 replace age=34 if age==1987 replace age=33 if age==1988 replace age=31 if age==1990 replace age=27 if age==1994 replace age=25 if age==1996 replace age=21 if age==2000 replace age=20 if age==2001 replace age=18 if age==2003 replace age=18 if age==20003 replace age=16 if age==2005 replace age=. if age>100 gen age_bucket=1 if age<13 replace age_bucket=2 if inrange(age, 13, 16) replace age_bucket=3 if inrange(age, 17, 19) replace age_bucket=4 if inrange(age, 20, 25) replace age_bucket=5 if age>25 replace age_bucket=6 if age==. label define age_bucket_label 1 "Unknown" 2 "13-16" 3 "17-19" 4 "20-25" 5 "25+" 6 "missing or unrealistic" label values age_bucket age_bucket_label tab age_bucket //Prepping location imputation variable gen utm_campaign_location="Bhubaneswar" if strpos(utm_campaign, "Bhubaneswar") replace utm_campaign_location="Dar es Salaam" if strpos(utm_campaign, "Dar es Salaam") replace utm_campaign_location="Dhaka" if strpos(utm_campaign, "Dhaka") replace utm_campaign_location="Freetown" if strpos(utm_campaign, "Freetown") replace utm_campaign_location="Glasgow" if strpos(utm_campaign, "Glasgow") replace utm_campaign_location="Harare" if strpos(utm_campaign, "Harare") replace utm_campaign_location="Jaipur" if strpos(utm_campaign, "Jaipur") replace utm_campaign_location="Lahore" if strpos(utm_campaign, "Lahore") replace utm_campaign_location="London" if strpos(utm_campaign, "London") replace utm_campaign_location="Los Angeles" if strpos(utm_campaign, "Los Angeles") replace utm_campaign_location="Mexico City" if strpos(utm_campaign, "Mexico City") replace utm_campaign_location="Milan" if strpos(utm_campaign, "Milan") replace utm_campaign_location="Nairobi" if strpos(utm_campaign, "Nairobi") replace utm_campaign_location="Quezon City" if strpos(utm_campaign, "Quezon") replace utm_campaign_location="Quito" if strpos(utm_campaign, "Quito") replace utm_campaign_location="Tamale" if strpos(utm_campaign, "Tamale") replace utm_campaign_location="missing" if utm_campaign=="" replace utm_campaign_location="missing" if utm_campaign=="xxxxx" replace utm_campaign_location="CCC Youth Survey - Test Campaign V2" if utm_campaign=="CCC Youth Survey - Test Campaign V2" tab utm_campaign_location //identifies all organic and non-campaign reponses n=341 //Live generate town=utm_campaign_location replace town="Alwar" if strpos(live, "Alwar") replace town="Atrai" if strpos(live, "atrai") replace town="Antipolo" if strpos(live, "Antipolo") replace town="Bahawalnagar" if strpos(live, "Bahawal") replace town="Baripada" if strpos(live, "Baripada") replace town="Bomet" if strpos(live, "Bomet") replace town="Bhubaneswar" if strpos(live, "Bubaneswar") replace town="Busia" if strpos(live, "Busia") replace town="Caloocan" if strpos(live, "Caloocan") replace town="Chitral" if strpos(live, "Chitral") replace town="Chitungwiza" if strpos(live, "Chitungwiza") replace town="Churu" if strpos(live, "Churu") replace town="Cobham" if strpos(live, "Cobham") replace town="Cover" if strpos(live, "Cover") replace town="Dhaka" if strpos(live, "haka") | strpos(live, "KAKA") replace town="Dar es Salaam" if strpos(live, "Dar es Salaam") replace town="Dhani Kalera" if strpos(live, "Dhani Kalera") replace town="Echague" if strpos(live, "Echague") replace town="Entebbe" if strpos(live, "Entebbe") replace town="Firozabad" if strpos(live, "Firo") replace town="Freetown" if strpos(live, "Freetown") replace town="Glasgow" if strpos(live, "Glasgow") replace town="Groningen" if strpos(live, "Groningen") replace town="Harare" if strpos(live, "arare") | strpos(live, "ARARE") replace town="Haroonabad" if strpos(live, "Haroonabad") replace town="Jaipur" if strpos(live, "Jaipur") replace town="Jhang" if strpos(live, "Jhang") replace town="Kajiado" if strpos(live, "Kajiado") replace town="Khulna" if strpos(live, "Khulna") replace town="Kuchaman" if strpos(live, "Kuchaman") replace town="Lahore" if strpos(live, "Lahore") |strpos(live, "LAHORE") replace town="London" if strpos(live, "London") replace town="Los Angeles" if strpos(live, "Los Angeles") replace town="Manila" if strpos(live, "anila") replace town="Mardan" if strpos(live, "Mardan") replace town="Muranga" if strpos(live, "Muranga") replace town="Marikina" if strpos(live, "Marikina") replace town="Masvingo" if strpos(live, "Masvingo") replace town="Mexico City" if strpos(live, "CDMX") |strpos(live, "CIUDAD DE MEXICO") | strpos(live, "City of Mexico")| strpos(live, "Mexico City")| strpos(live, "CD Mexico") replace town="Milan" if strpos(live, "Milan") replace town="Monza" if strpos(live, "Monza") replace town="Mymensingh" if strpos(live, "Mymensingh") replace town="Nairobi" if strpos(live, "airobi") replace town="Narayanganj" if strpos(live, "Narayanganj") replace town="Narowal" if strpos(live, "Narowal") replace town="New York" if strpos(live, "New York") replace town="Okara" if strpos(live, "Okara") replace town="Quezon City" if strpos(live, "Quezon")| strpos(live, "Queson") | strpos(live, "QUEZON") replace town="Quito" if strpos(live, "Quito") replace town="Rourkela" if strpos(live, "rourkela") replace town="Sargodha" if strpos(live, "SARGODHA") replace town="San Jose Del Monte" if strpos(live, "San Jose Del Monte") replace town="San Juan" if strpos(live, "San Juan") replace town="Sialkot" if strpos(live, "Sialkot") replace town="Sirsa" if strpos(live, "Sirsa") replace town="Tamale" if strpos(live, "amal") replace town="Tamale" if strpos(live, "AMAL") replace town="Valenzuela" if strpos(live, "Valenzuela") replace town="Victoria Falls" if strpos(live, "Vic Falls") replace town="Victoria Falls" if strpos(live, "Victoria falls") replace town="Yeji" if strpos(live, "Yeji") //True indication of where respondents are from missing only 236 //City sort town quietly by town: gen dup = cond(_N==1,0,_n) generate city=utm_campaign_location if dup==0 replace city=town if city=="missing" | city=="" | strpos(city, "CCC Youth") replace city="Nairobi" if city=="Bomet" | city=="Busia" | city=="Kajiado" replace city="Jaipur" if city=="Churu" replace city="Dhaka" if city=="Narayanganj" replace city="Lahore" if city=="Okara" | city=="Sialkot" replace city="Quezon City" if city=="San Jose Del Monte" |city=="Manila" replace city="Harare" if city=="Victoria Falls" //Region generate region="" replace region="India" if city=="Bhubaneswar" | city=="Jaipur" replace region="Tanzania" if city=="Dar es Salaam" replace region="Bangladesh" if city=="Dhaka" replace region="Sierra Leone" if city=="Freetown" | live=="SIERRA Leone" replace region="United Kingdom" if city=="Glasgow" | city=="London" replace region="Zimbabwe" if city=="Harare" | live=="Zimbabwe" replace region="Pakistan" if city=="Lahore" replace region="United States of America" if city=="Los Angeles" replace region="Mexico" if city=="Mexico City" | live=="Mexico" replace region="Italy" if city=="Milan" replace region="Kenya" if city=="Nairobi" replace region="Philippines" if city=="Quezon City" | live=="Philippines" replace region="Ecuador" if city=="Quito" replace region="Ghana" if city=="Tamale" replace region="missing" if city=="missing" & region=="" | strpos(city, "CCC Youth") //For those who did not report a town and utm_campaign was not available, but DID report a region replace city="Mexico City" if town=="missing" & region=="Mexico" replace city="Quezon City" if town=="missing" & region=="Philippines" replace city="Freetown" if town=="missing" & region=="Sierra Leone" replace city="Harare" if town=="missing" & region=="Zimbabwe" //Geoscheme Sub regions generate geoscheme_subregion="" replace geoscheme_subregion="Eastern Africa" if region=="Zimbabwe" | region=="Kenya" | region=="Tanzania" replace geoscheme_subregion="Western Africa" if region=="Ghana" | region=="Sierra Leone" replace geoscheme_subregion="North America" if region=="United States of America" replace geoscheme_subregion="Latin America and the Caribbean" if region=="Ecuador" | region=="Mexico" replace geoscheme_subregion="Western Europe" if region=="United Kingdom" | region=="Italy" replace geoscheme_subregion="Southeastern Asia" if region=="Philippines" replace geoscheme_subregion="Southern Asia" if region=="Bangladesh" | region=="Pakistan" | region=="India" generate geoscheme_region="" replace geoscheme_region="Afria" if geoscheme_subregion=="Eastern Africa" | geoscheme_subregion=="Western Africa" replace geoscheme_region="Americas" if geoscheme_subregion=="North America" | geoscheme_subregion=="Latin America and the Caribbean" replace geoscheme_region="Asia" if geoscheme_subregion=="Southeastern Asia" | geoscheme_subregion=="Southern Asia" replace geoscheme_region="Europe" if geoscheme_subregion=="Western Europe" //OECD generate OECD="" replace OECD="LMIC" if region=="Bangladesh" | region=="Ecuador" | region=="Ghana" | region=="India" | region=="Kenya" | region=="Mexico" | region=="Pakistan" | region=="Philippines" | region=="Sierra Leone" | region=="Tanzania" | region=="Zimbabwe" replace OECD="HIC" if region=="Italy" | region=="United Kingdom" | region=="United States of America" /* drop dup by city, sort : generate dup = _n == 1 keep if dup==1 keep OECD city region pm25 pm25_quartile tab OECD save "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\City & country indicators.dta", replace */ *******Generating incomplete response datasets //Mistake generate mistake=1 if eligibility=="A parent of someone under 13 years old" & age>25 & preg=="not pregnant" //Eligibility replace eligibility="Aged between 13-25 years old" if eligibility=="Tiene entre 13 y 25 años" replace eligibility="Aged between 13-25 years old" if eligibility=="Gé de 13 à 25 ans" replace eligibility="Aged between 13-25 years old" if eligibility=="Niko na miaka kati ya 13-25" replace eligibility="Aged between 13-25 years old" if eligibility=="उम्र 13-25 साल के बीच" replace eligibility="Aged between 13-25 years old" if eligibility=="میری عمر 13 سے 15 سال کے درمیان ہے۔" replace eligibility="Aged between 13-25 years old" if eligibility=="Hai un'età compresa tra 13 e 25 anni" replace eligibility="Aged between 13-25 years old" if eligibility=="13-25 বছর বয়সী" replace eligibility="Aged between 13-25 years old" if eligibility=="تتراوح أعمارهم بين 13-25 سنة" replace eligibility="Aged between 13-25 years old" if eligibility=="年龄在 13-25 岁之间" replace eligibility="A parent of someone under 13 years old" if eligibility=="Es un padre de alguien menor de 13 años" replace eligibility="A parent of someone under 13 years old" if eligibility=="Un parent d'une personne de moins de 13 ans" replace eligibility="A parent of someone under 13 years old" if eligibility=="Mimi ni mzazi wa mtoi ako chini ya miaka 13" replace eligibility="A parent of someone under 13 years old" if eligibility=="13 वर्ष से कम आयु के व्यक्ति के माता-पिता" replace eligibility="A parent of someone under 13 years old" if eligibility=="میں 13 سال سے کم عمر کے بچے کا والدین ہوں۔" replace eligibility="A parent of someone under 13 years old" if eligibility=="Sei genitore di un bambino di età inferiore ai 13 anni" replace eligibility="A parent of someone under 13 years old" if eligibility=="13 বছরের কম বয়সী কারো বাবা -মা, অথবা" replace eligibility="A parent of someone under 13 years old" if eligibility=="والد لشخص أقل من 13 عامًا" replace eligibility="A parent of someone under 13 years old" if eligibility=="孩子未满13岁" replace eligibility="Aged over 18, you or your partner are expecting a baby" if eligibility=="Tiene más de 18 años, usted o su pareja están esperando un bebé" replace eligibility="Aged over 18, you or your partner are expecting a baby" if eligibility=="Gé de plus de 18 ans, vous ou votre partenaire attendez un bébé" replace eligibility="Aged over 18, you or your partner are expecting a baby" if eligibility=="Niko over 18 na mimi ama partner wangu niko/ako pregnant" replace eligibility="Aged over 18, you or your partner are expecting a baby" if eligibility=="18 वर्ष से अधिक और आप या आपका साथी एक बच्चे की उम्मीद कर रहे हैं" replace eligibility="Aged over 18, you or your partner are expecting a baby" if eligibility=="میری عمر 18 سال سے زیادہ ہے اور میں ، یا میرا ساتھی بچے کی توقع کر رہا ہے۔" replace eligibility="Aged over 18, you or your partner are expecting a baby" if eligibility=="Hai più di 18 anni e tu e il tuo partner aspettate un bambino" replace eligibility="Aged over 18, you or your partner are expecting a baby" if eligibility=="18 বছর বয়সে, আপনি বা আপনার সঙ্গী একটি শিশুর প্রত্যাশা করছেন" replace eligibility="Aged over 18, you or your partner are expecting a baby" if eligibility=="إذا كان عمرك أكثر من 18 عامًا وكنت أنت أو شريكك في انتظار مولود" replace eligibility="Aged over 18, you or your partner are expecting a baby" if eligibility=="18岁以上的孕妇(或伴侣)" replace eligibility="None of these describe me" if eligibility=="Ninguno de estos me describe" replace eligibility="None of these describe me" if eligibility=="Aucun de ceux-ci ne me ressemble" replace eligibility="None of these describe me" if eligibility=="Hakuna kati ya hizi inani-represent" replace eligibility="None of these describe me" if eligibility=="इनमें से कोई भी मेरा वर्णन नहीं करता" replace eligibility="None of these describe me" if eligibility=="ان میں سے کوئی بھی اختیار مجھے بیان نہیں کرتا۔" replace eligibility="None of these describe me" if eligibility=="Nessuna delle opzioni precedenti" replace eligibility="None of these describe me" if eligibility=="এই বিকল্পগুলির কোনটিই আমাকে বর্ণনা করে না" replace eligibility="None of these describe me" if eligibility=="لا شيء من هؤلاء يصفني" replace eligibility="None of these describe me" if eligibility=="以上都不符合" replace eligibility="missing" if eligibility=="" generate eligible="eligible" if eligibility=="Aged between 13-25 years old" replace eligible="eligible" if eligibility=="Aged over 18, you or your partner are expecting a baby" replace eligible="eligible" if eligibility=="A parent of someone under 13 years old" replace eligible="not eligible" if eligibility=="None of these describe me" replace eligible="missing" if eligibility=="missing" replace eligible="not eligible" if age>25 & preg=="not pregnant" gen nonconsent="missing" if consent1=="" & eligible=="eligible" replace nonconsent="missing" if consent2=="" & eligible=="eligible" replace nonconsent="missing" if consent3=="" & eligible=="eligible" replace nonconsent="missing" if consent4=="" & eligible=="eligible" replace nonconsent="missing" if consent5=="" & eligible=="eligible" tab eligibility eligible gen target_population="young person" if eligibility=="Aged between 13-25 years old" replace target_population="Parent or expecting" if eligibility=="A parent of someone under 13 years old" | eligibility=="Aged over 18, you or your partner are expecting a baby" //Nonconsent replace nonconsent="C1_participation is voluntary" if consent1=="No, I don't want to complete the survey" replace nonconsent="C1_participation is voluntary" if consent1=="No, no quiero completar la encuesta" replace nonconsent="C1_participation is voluntary" if consent1=="Non, je ne veux pas répondre au sondage" replace nonconsent="C1_participation is voluntary" if consent1=="Hapana, sitaki kumaliza survey" replace nonconsent="C1_participation is voluntary" if consent1=="नहीं, मैं सर्वेक्षण पूरा नहीं करना चाहता" replace nonconsent="C1_participation is voluntary" if consent1=="نہیں ، میں سروے مکمل نہیں کرنا چاہتا۔" replace nonconsent="C1_participation is voluntary" if consent1=="No, non voglio completare il sondaggio" replace nonconsent="C1_participation is voluntary" if consent1=="না, আমি জরিপ সম্পূর্ণ করতে চাই না" replace nonconsent="C1_participation is voluntary" if consent1=="لا ، لا أرغب في إكمال الاستبيان" replace nonconsent="C1_participation is voluntary" if consent1=="不,我不想参加调查" replace nonconsent="C2_your answers are anonymous" if consent2=="No, I don't want to complete the survey" replace nonconsent="C2_your answers are anonymous" if consent2=="No, no quiero completar la encuesta" replace nonconsent="C2_your answers are anonymous" if consent2=="Non, je ne veux pas répondre au sondage" replace nonconsent="C2_your answers are anonymous" if consent2=="Hapana, sitaki kumaliza survey" replace nonconsent="C2_your answers are anonymous" if consent2=="नहीं, मैं सर्वेक्षण पूरा नहीं करना चाहता" replace nonconsent="C2_your answers are anonymous" if consent2=="نہیں ، میں سروے مکمل نہیں کرنا چاہتا۔" replace nonconsent="C2_your answers are anonymous" if consent2=="No, non voglio completare il sondaggio" replace nonconsent="C2_your answers are anonymous" if consent2=="না, আমি জরিপ সম্পূর্ণ করতে চাই না" replace nonconsent="C2_your answers are anonymous" if consent2=="لا ، لا أرغب في إكمال الاستبيان" replace nonconsent="C2_your answers are anonymous" if consent2=="不,我不想参加调查" replace nonconsent="C3_we will write research papers etc." if consent3=="No, I don't want to complete the survey" replace nonconsent="C3_we will write research papers etc." if consent3=="No, no quiero completar la encuesta" replace nonconsent="C3_we will write research papers etc." if consent3=="Non, je ne veux pas répondre au sondage" replace nonconsent="C3_we will write research papers etc." if consent3=="Hapana, sitaki kumaliza survey" replace nonconsent="C3_we will write research papers etc." if consent3=="नहीं, मैं सर्वेक्षण पूरा नहीं करना चाहता" replace nonconsent="C3_we will write research papers etc." if consent3=="نہیں ، میں سروے مکمل نہیں کرنا چاہتا۔" replace nonconsent="C3_we will write research papers etc." if consent3=="No, non voglio completare il sondaggio" replace nonconsent="C3_we will write research papers etc." if consent3=="না, আমি জরিপ সম্পূর্ণ করতে চাই না" replace nonconsent="C3_we will write research papers etc." if consent3=="لا ، لا أرغب في إكمال الاستبيان" replace nonconsent="C3_we will write research papers etc." if consent3=="不,我不想参加调查" replace nonconsent="C4_your data is secure" if consent4=="No, I don't want to complete the survey" replace nonconsent="C4_your data is secure" if consent4=="No, no quiero completar la encuesta" replace nonconsent="C4_your data is secure" if consent4=="Non, je ne veux pas répondre au sondage" replace nonconsent="C4_your data is secure" if consent4=="Hapana, sitaki kumaliza survey" replace nonconsent="C4_your data is secure" if consent4=="नहीं, मैं सर्वेक्षण पूरा नहीं करना चाहता" replace nonconsent="C4_your data is secure" if consent4=="نہیں ، میں سروے مکمل نہیں کرنا چاہتا۔" replace nonconsent="C4_your data is secure" if consent4=="No, non voglio completare il sondaggio" replace nonconsent="C4_your data is secure" if consent4=="না, আমি জরিপ সম্পূর্ণ করতে চাই না" replace nonconsent="C4_your data is secure" if consent4=="لا ، لا أرغب في إكمال الاستبيان" replace nonconsent="C4_your data is secure" if consent4=="不,我不想参加调查" replace nonconsent="C5_no prize" if consent5=="No, I don't want to complete the survey" replace nonconsent="C5_no prize" if consent5=="No, no quiero completar la encuesta" replace nonconsent="C5_no prize" if consent5=="Non, je ne veux pas répondre au sondage" replace nonconsent="C5_no prize" if consent5=="Hapana, sitaki kumaliza survey" replace nonconsent="C5_no prize" if consent5=="नहीं, मैं सर्वेक्षण पूरा नहीं करना चाहता" replace nonconsent="C5_no prize" if consent5=="نہیں ، میں سروے مکمل نہیں کرنا چاہتا۔" replace nonconsent="C5_no prize" if consent5=="No, non voglio completare il sondaggio" replace nonconsent="C5_no prize" if consent5=="না, আমি জরিপ সম্পূর্ণ করতে চাই না" replace nonconsent="C5_no prize" if consent5=="لا ، لا أرغب في إكمال الاستبيان" replace nonconsent="C5_no prize" if consent5=="不,我不想参加调查" replace nonconsent="consented" if nonconsent=="" generate consent="nonconsent" if nonconsent=="C1_participation is voluntary" | nonconsent=="C2_your answers are anonymous" | nonconsent=="C3_we will write research papers etc." | nonconsent=="C4_your data is secure" | nonconsent=="C5_no prize" | nonconsent=="C5_no prize" replace consent="consented" if nonconsent=="consented" replace consent="missing" if nonconsent=="missing" tab nonconsent consent //Time to completion //remove everything up to and including the last space (greedy match)gen var1 = regexr(Submit_Date,".+ ","") gen submit_time=clock(Submit_Date, "YMD hms") format submit_time %tc gen start_time=clock(Start_Date, "YMD hms") format start_time %tc gen completion_time_seconds = seconds(submit_time - start_time) gen completion_time_minutes = minutes(submit_time - start_time) list Start_Date Submit_Date if completion_time_seconds==0 //hand calculate the time of completion gen submit_hms=regexr(Submit_Date,".+ ","") if completion_time_minutes==0 gen start_hms=regexr(Start_Date,".+ ","") if completion_time_minutes==0 gen submit_hms_time=clock(submit_hms, "hms") gen start_hms_time=clock(start_hms, "hms") format submit_hms_time %tc format start_hms_time %tc gen completion_time_seconds_missing = seconds(submit_hms_time-start_hms_time) gen completion_time_minutes_missing = minutes(submit_hms_time-start_hms_time) replace completion_time_seconds=completion_time_seconds_missing if completion_time_seconds==0 replace completion_time_minutes=completion_time_minutes_missing if completion_time_minutes==0 drop completion_time_minutes_missing save "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\2_complete dataset.dta", replace ********************Making Final Dataset use "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\2_complete dataset.dta", clear gen submit_date = dofc(submit_time) drop if submit_date<22509 drop submit_date //n=32 (24 of these were utm_campaign=="CCC Youth Survey - Test Campaign V2", 2 were "xxxxx", 68 were pretest responses) drop if eligibility=="A parent of someone under 13 years old" & age>25 & preg=="not pregnant" //n=523 drop if eligible=="missing" //n=79 drop if eligible=="not eligible" //n=2,829 drop if consent=="missing" //n= 45 drop if consent=="nonconsent" //n=1,698 drop if FAQs=="Yes - get in touch" | FAQs=="Sí, ponme en contacto" | FAQs=="Oui - contactez-nous" | FAQs=="Ndio - Wasiliana na sisi" | FAQs=="हाँ - संपर्क करें" | FAQs=="ہاں - رابطہ کریں۔"| FAQs== "Sì, contattaci" | FAQs=="হ্যাঁ - যোগাযোগ করুন" | FAQs=="نعم - تواصل معنا" | FAQs=="是-联系我们" //n=560 save "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\3_complete eligible consenting dataset.dta", replace ************** Must save iteratively to create a dataset for each: ineligible, nonconsent, and complete eligible and consenting. //Test campaign use "2_complete dataset.dta", clear keep if utm_campaign=="CCC Youth Survey - Test Campaign V2" save "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\Missing data\responses from test campaign.dta", replace //Not eligible use "2_complete dataset.dta", clear keep if eligible=="not eligible" | eligible=="missing" save "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\Missing data\ineligible dataset.dta", replace //if nonconsent=="consented" they DID consent! use "2_complete dataset.dta", clear drop if nonconsent=="consented" save "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\Missing data\eligible nonconsent dataset.dta", replace //FAQs kicked them out use "2_complete dataset.dta", clear keep if FAQs=="Yes - get in touch" | FAQs=="Sí, ponme en contacto" | FAQs=="Oui - contactez-nous" | FAQs=="Ndio - Wasiliana na sisi" | FAQs=="हाँ - संपर्क करें" | FAQs=="ہاں - رابطہ کریں۔"| FAQs== "Sì, contattaci" | FAQs=="হ্যাঁ - যোগাযোগ করুন" | FAQs=="نعم - تواصل معنا" | FAQs=="是-联系我们" save "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\Missing data\FAQs noncomplete.dta", replace //>25 not pregnant kicked them out use "2_complete dataset.dta", clear keep if age>25 & preg=="not pregnant" save "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\Missing data\25+ not pregnant", replace *************Cleaning survey questions use "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\3_complete eligible consenting dataset.dta", clear //air_quality gen air_quality_real=real(air_quality) //best gen best_a1_vote="no vote" if best_a1=="" gen best_a2_vote="no vote" if best_a2=="" gen best_a3_vote="no vote" if best_a3=="" gen best_a4_vote="no vote" if best_a4=="" gen best_a5_vote="no vote" if best_a5=="" gen best_a6_vote="no vote" if best_a6=="" gen best_a7_vote="no vote" if best_a7=="" gen best_a8_vote="no vote" if best_a8=="" gen best_a9_vote="no vote" if best_a9=="" gen best_a10_vote="no vote" if best_a10=="" gen best_a11_vote="no vote" if best_a11=="" gen best_a12_vote="no vote" if best_a12=="" replace best_a1_vote="vote" if best_a1_vote=="" replace best_a2_vote="vote" if best_a2_vote=="" replace best_a3_vote="vote" if best_a3_vote=="" replace best_a4_vote="vote" if best_a4_vote=="" replace best_a5_vote="vote" if best_a5_vote=="" replace best_a6_vote="vote" if best_a6_vote=="" replace best_a7_vote="vote" if best_a7_vote=="" replace best_a8_vote="vote" if best_a8_vote=="" replace best_a9_vote="vote" if best_a9_vote=="" replace best_a10_vote="vote" if best_a10_vote=="" replace best_a11_vote="vote" if best_a11_vote=="" replace best_a12_vote="vote" if best_a12_vote=="" gen best_a1_count=1 if best_a1_vote=="vote" gen best_a2_count=2 if best_a2_vote=="vote" gen best_a3_count=3 if best_a3_vote=="vote" gen best_a4_count=4 if best_a4_vote=="vote" gen best_a5_count=5 if best_a5_vote=="vote" gen best_a6_count=6 if best_a6_vote=="vote" gen best_a7_count=7 if best_a7_vote=="vote" gen best_a8_count=8 if best_a8_vote=="vote" gen best_a9_count=9 if best_a9_vote=="vote" gen best_a10_count=10 if best_a10_vote=="vote" gen best_a11_count=11 if best_a11_vote=="vote" gen best_a12_count=12 if best_a12_vote=="vote" label variable best_a1_count "The people" label variable best_a2_count "Places to play" label variable best_a3_count "Being close to school or work" label variable best_a4_count "There are many things to do" label variable best_a5_count "The shops and restaurants" label variable best_a6_count "It's easy to get around" label variable best_a7_count "Healthcare" label variable best_a8_count "Being close to my family" label variable best_a9_count "The climate and environment" label variable best_a10_count "Work opportunities for my family" label variable best_a11_count "Access to green space, like parks" label variable best_a12_count "Other" label variable best_a1_vote "The people" label variable best_a2_vote "Places to play" label variable best_a3_vote "Being close to school or work" label variable best_a4_vote "There are many things to do" label variable best_a5_vote "The shops and restaurants" label variable best_a6_vote "It's easy to get around" label variable best_a7_vote "Healthcare" label variable best_a8_vote "Being close to my family" label variable best_a9_vote "The climate and environment" label variable best_a10_vote "Work opportunities for my family" label variable best_a11_vote "Access to green space, like parks" label variable best_a12_vote "Other" //Worst gen worst_a1_vote="no vote" if worst_a1=="" gen worst_a2_vote="no vote" if worst_a2=="" gen worst_a3_vote="no vote" if worst_a3=="" gen worst_a4_vote="no vote" if worst_a4=="" gen worst_a5_vote="no vote" if worst_a5=="" gen worst_a6_vote="no vote" if worst_a6=="" gen worst_a7_vote="no vote" if worst_a7=="" gen worst_a8_vote="no vote" if worst_a8=="" gen worst_a9_vote="no vote" if worst_a9=="" gen worst_a10_vote="no vote" if worst_a10=="" gen worst_a11_vote="no vote" if worst_a11=="" gen worst_a12_vote="no vote" if worst_a12=="" gen worst_a13_vote="no vote" if worst_a13=="" replace worst_a1_vote="vote" if worst_a1_vote=="" replace worst_a2_vote="vote" if worst_a2_vote=="" replace worst_a3_vote="vote" if worst_a3_vote=="" replace worst_a4_vote="vote" if worst_a4_vote=="" replace worst_a5_vote="vote" if worst_a5_vote=="" replace worst_a6_vote="vote" if worst_a6_vote=="" replace worst_a7_vote="vote" if worst_a7_vote=="" replace worst_a8_vote="vote" if worst_a8_vote=="" replace worst_a9_vote="vote" if worst_a9_vote=="" replace worst_a10_vote="vote" if worst_a10_vote=="" replace worst_a11_vote="vote" if worst_a11_vote=="" replace worst_a12_vote="vote" if worst_a12_vote=="" replace worst_a13_vote="vote" if worst_a13_vote=="" gen worst_a1_count=1 if worst_a1_vote=="vote" gen worst_a2_count=2 if worst_a2_vote=="vote" gen worst_a3_count=3 if worst_a3_vote=="vote" gen worst_a4_count=4 if worst_a4_vote=="vote" gen worst_a5_count=5 if worst_a5_vote=="vote" gen worst_a6_count=6 if worst_a6_vote=="vote" gen worst_a7_count=7 if worst_a7_vote=="vote" gen worst_a8_count=8 if worst_a8_vote=="vote" gen worst_a9_count=9 if worst_a9_vote=="vote" gen worst_a10_count=10 if worst_a10_vote=="vote" gen worst_a11_count=11 if worst_a11_vote=="vote" gen worst_a12_count=12 if worst_a12_vote=="vote" gen worst_a13_count=13 if worst_a13_vote=="vote" label variable worst_a1_vote "It doesn't always feel safe" label variable worst_a2_vote "The people; it's not friendly" label variable worst_a3_vote "The traffic/congestion" label variable worst_a4_vote "The noise" label variable worst_a5_vote "Not enough places to play" label variable worst_a6_vote "Not enough places to meet friends" label variable worst_a7_vote "Shortage of work opportunities for my family" label variable worst_a8_vote "It's hard to get around" label variable worst_a9_vote "Not easy enough to get healthcare" label variable worst_a10_vote "The pollution" label variable worst_a11_vote "It's too crowded" label variable worst_a12_vote "Not enough green space like parks" label variable worst_a13_vote "Other" label variable worst_a1_count "It doesn't always feel safe" label variable worst_a2_count "The people; it's not friendly" label variable worst_a3_count "The traffic/congestion" label variable worst_a4_count "The noise" label variable worst_a5_count "Not enough places to play" label variable worst_a6_count "Not enough places to meet friends" label variable worst_a7_count "Shortage of work opportunities for my family" label variable worst_a8_count "It's hard to get around" label variable worst_a9_count "Not easy enough to get healthcare" label variable worst_a10_count "The pollution" label variable worst_a11_count "It's too crowded" label variable worst_a12_count "Not enough green space like parks" label variable worst_a13_count "Other" //Pollution gen pollution_causes_a1_vote="no vote" if pollution_causes_a1=="" gen pollution_causes_a2_vote="no vote" if pollution_causes_a2=="" gen pollution_causes_a3_vote="no vote" if pollution_causes_a3=="" gen pollution_causes_a4_vote="no vote" if pollution_causes_a4=="" gen pollution_causes_a5_vote="no vote" if pollution_causes_a5=="" gen pollution_causes_a6_vote="no vote" if pollution_causes_a6=="" gen pollution_causes_a7_vote="no vote" if pollution_causes_a7=="" gen pollution_causes_a8_vote="no vote" if pollution_causes_a8=="" gen pollution_causes_a9_vote="no vote" if pollution_causes_a9=="" replace pollution_causes_a1_vote="vote" if pollution_causes_a1_vote=="" replace pollution_causes_a2_vote="vote" if pollution_causes_a2_vote=="" replace pollution_causes_a3_vote="vote" if pollution_causes_a3_vote=="" replace pollution_causes_a4_vote="vote" if pollution_causes_a4_vote=="" replace pollution_causes_a5_vote="vote" if pollution_causes_a5_vote=="" replace pollution_causes_a6_vote="vote" if pollution_causes_a6_vote=="" replace pollution_causes_a7_vote="vote" if pollution_causes_a7_vote=="" replace pollution_causes_a8_vote="vote" if pollution_causes_a8_vote=="" replace pollution_causes_a9_vote="vote" if pollution_causes_a9_vote=="" gen pollution_causes_a1_count=1 if pollution_causes_a1_vote=="vote" gen pollution_causes_a2_count=2 if pollution_causes_a2_vote=="vote" gen pollution_causes_a3_count=3 if pollution_causes_a3_vote=="vote" gen pollution_causes_a4_count=4 if pollution_causes_a4_vote=="vote" gen pollution_causes_a5_count=5 if pollution_causes_a5_vote=="vote" gen pollution_causes_a6_count=6 if pollution_causes_a6_vote=="vote" gen pollution_causes_a7_count=7 if pollution_causes_a7_vote=="vote" gen pollution_causes_a8_count=8 if pollution_causes_a8_vote=="vote" gen pollution_causes_a9_count=9 if pollution_causes_a9_vote=="vote" label variable pollution_causes_a1_vote "Factories" label variable pollution_causes_a2_vote "Burning of rubbish" label variable pollution_causes_a3_vote "Motor transport (cars, buses, lorries)" label variable pollution_causes_a4_vote "Construction/building work" label variable pollution_causes_a5_vote "Pollution blown into the city (from outside)" label variable pollution_causes_a6_vote "Household cooking (cooking fires/stoves)" label variable pollution_causes_a7_vote "Household heating (boilers, wood fires etc)" label variable pollution_causes_a8_vote "Agriculture/farming" label variable pollution_causes_a9_vote "Other" label variable pollution_causes_a1_count "Factories" label variable pollution_causes_a2_count "Burning of rubbish" label variable pollution_causes_a3_count "Motor transport (cars, buses, lorries)" label variable pollution_causes_a4_count "Construction/building work" label variable pollution_causes_a5_count "Pollution blown into the city (from outside)" label variable pollution_causes_a6_count "Household cooking (cooking fires/stoves)" label variable pollution_causes_a7_count "Household heating (boilers, wood fires etc)" label variable pollution_causes_a8_count "Agriculture/farming" label variable pollution_causes_a9_count "Other" //Nicer city gen nicer_city_count=1 if nicer_city=="Becoming a nicer place to live"| nicer_city=="Convertirse en un mejor lugar para vivir" | nicer_city=="Deviens un endroit plus agréable à vivre"| nicer_city=="Inakuwa place poa ya kuishi" | nicer_city=="रहने के लिए एक अच्छी जगह बनना"| nicer_city=="رہنے کے لئے اچھی جگہ بن چکا ہے۔" | nicer_city=="Deviens un endroit plus agréable à vivre"| nicer_city=="বসবাসের জন্য একটি সুন্দর জায়গা হয়ে উঠছে"| nicer_city=="أن تصبح مكانًا أجمل للعيش فيه"| nicer_city=="成为更宜居的地方" replace nicer_city_count=2 if nicer_city=="Staying the same" | nicer_city=="Permanecer igual" | nicer_city=="Rester le même"| nicer_city=="Imebaki the same" | nicer_city=="वही रहना"| nicer_city=="ابھی تک ویسا ہی ہے۔" | nicer_city=="Deviens un endroit plus agréable à vivre"| nicer_city=="একই থাকছে"| nicer_city=="البقاء على نفس"| nicer_city=="保持不变" replace nicer_city_count=3 if nicer_city=="Becoming a worse place to live" | nicer_city=="Convertirse en un peor lugar para vivir" | nicer_city=="Deviens un pire endroit où vivre"| nicer_city=="Inakuwa place mbaya ya kuishi" | nicer_city=="रहने के लिए एक बदतर जगह बनना"| nicer_city=="پہلے کے مقابلے میں اور ہی بدتر جگہ ہو چکا ہے۔" | nicer_city=="Deviens un endroit plus agréable à vivre"| nicer_city=="বসবাসের জন্য একটি খারাপ জায়গা হয়ে উঠছে"| nicer_city=="أن تصبح مكانًا أسوأ للعيش فيه"| nicer_city=="变得更糟糕的居住地" label define nicer_city_label 1 "Becoming a nicer place to live" 2 "Staying the same" 3 "Becoming a worse place to live" label values nicer_city_count nicer_city_label save "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\4_Full cleaned dataset", replace clear //Creating quartiles of pm2.5 within WHO dataset import excel "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\External pm2.5 Datasets\WHO_2018_v14_aap_air_quality_database.xlsx", sheet("latest availble PM25 (measured)") firstrow clear rename PM25Annualmeanugm3 city_pm25 rename town city keep city region city_pm25 replace city="Milan" if city=="Milano" xtile city_pm25_quartile = city_pm25, nq(4) //Create quartiles by average annual pm2.5 at the city level cd "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\Merge with external pm2.5 datasets" save "WHO PM 2.5 dataset by city.dta", replace //City level merge of WHO pm2.5 quintiles with survey data merge 1:m city region using "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\4_Full cleaned dataset" drop if _merge==1 drop _merge tab city_pm25_quartile // to make "Percentage of Observed Cities by pm2.5 Quintile" graph tab city_pm25_quartile, summarize(city_pm25) // to make average city_pm25 by quintile //bysort city_pm25_quintile : outreg2 using "average city pm25 by quintile.doc", replace sum(log) keep(city_pm25) eqdrop(min max) save "Merged with 2.5 city.dta", replace //Creating country level pm2.5 quintiles use "WHO PM 2.5 dataset by city.dta", clear bysort region : egen region_pm25=mean(city_pm25) by region, sort: gen nvals = _n == 1 drop if nvals==0 drop nvals xtile region_pm25_quartile = region_pm25, nq(4) drop city city_pm25 city_pm25_quartile //merge of WHO pm2.5 quartiles with suvey data (to fill in where cities were not available in the WHO dataset) merge 1:m region using "Merged with 2.5 city.dta" drop if _merge==1 tab region if _merge==2 drop _merge tab region_pm25_quartile tab region_pm25_quartile, summarize(region_pm25) save "Merged with 2.5 city and country.dta", replace tab region if region_pm25_quartile==. /* Need to use the World bank numbers for these countries because they didn't appear in the WHO dataset Will need to add these country average pm2.5 into the WHO dataset and recalculate the average pm2.5 quartile */ //Extracting average country pm2.5 from World Bank Dataset import delimited "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\External pm2.5 Datasets\World_Bank_database.csv", varnames (1) clear keep datasource v62 drop in 1/2 rename datasource region rename v62 region_pm25 keep if region=="Sierra Leone" | region=="Tanzania" | region=="Zimbabwe" save "Countries missing pm2.5.dta", replace //Appending World Bank's country level average annual pm2.5 for countries excluded from WHO dataset TO the WHO dataset use "Countries missing pm2.5.dta", clear append using "WHO PM 2.5 dataset by city.dta" //Redo the quartiles with included World Bank country information// bysort region : egen region_pm25_temp=mean(city_pm25) replace region_pm25=region_pm25_temp if region_pm25==. drop region_pm25_temp xtile region_pm25_quartile = region_pm25, nq(4) order city_pm25 city_pm25_quartile region_pm25 region_pm25_quartile save "WHO+World Bank PM 2.5 dataset by city.dta", replace //City level merge of WHO+World Bank pm2.5 quartiles with survey data merge 1:m city region using "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\4_Full cleaned dataset" drop if _merge==1 drop _merge save "Merged with 2.5 city.dta (version 2)", replace //Country level merge of WHO+World Bank pm2.5 quartiles with suvey data (to fill in where cities were not available in the WHO dataset) use "WHO+World Bank PM 2.5 dataset by city.dta", clear by region, sort: gen nvals = _n == 1 drop if nvals==0 drop city city_pm25 city_pm25_quartile nvals merge 1:m region using "Merged with 2.5 city.dta (version 2)" drop if _merge==1 drop _merge generate pm25=city_pm25 replace pm25=region_pm25 if pm25==. generate pm25_quartile=city_pm25_quartile replace pm25_quartile=region_pm25_quartile if pm25_quartile==. //Respondents by pm2.5 quartile and descriptive stats tab pm25_quartile // to make "Percentage of Observed Cities by pm2.5 Quartile" graph tab pm25_quartile, summarize(pm25) // to make average city_pm25 by quartile bysort city: tab pm25_quartile, summarize (pm25) //Dropping missing demographic location & age drop if age==. drop if region_pm25==. save "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\5_Full sample with pm2.5.dta", replace //city by pm2.5 quartile and descriptive stats by city, sort: gen nvals = _n == 1 drop if nvals==0 tab pm25_quartile tab pm25_quartile, summarize(pm25) *************FINAL DATASET use "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\5_Full sample with pm2.5.dta", clear clear cd "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets" use "5_Full sample with pm2.5.dta", clear ***********************Subpopulation Datasets use "5_Full sample with pm2.5.dta", clear drop if best_a1=="" & best_a2=="" & best_a3=="" & best_a4=="" & best_a5=="" & best_a6=="" & best_a7=="" & best_a8=="" & best_a9=="" & best_a10=="" save "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\Subpopulations\Best subpopulation.dta", replace use "5_Full sample with pm2.5.dta", clear drop if worst_a1=="" & worst_a2=="" & worst_a3=="" & worst_a4=="" & worst_a5=="" & worst_a6=="" & worst_a7=="" & worst_a8=="" & worst_a9=="" & worst_a10=="" & worst_a11=="" & worst_a12=="" & worst_a13=="" save "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\Subpopulations\Worst subpopulation.dta", replace use "5_Full sample with pm2.5.dta", clear drop if pollution_causes_a1=="" & pollution_causes_a2=="" & pollution_causes_a3=="" & pollution_causes_a4=="" & pollution_causes_a5=="" & pollution_causes_a6=="" & pollution_causes_a7=="" & pollution_causes_a8=="" & pollution_causes_a9=="" save "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\Subpopulations\Pollution subpopulation.dta", replace use "5_Full sample with pm2.5.dta", clear drop if nicer_city=="" save "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\Subpopulations\Nicer City subpopulation.dta", replace use "5_Full sample with pm2.5.dta", clear drop if air_quality_real==. save "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\Subpopulations\Air Quality subpopulation.dta", replace ****************************Reshaped Datasets use "5_Full sample with pm2.5.dta", clear //Analyzing Best, Worst, Pollution keep ID lang age age_bucket town best_a1_count-best_a12_count worst_a1_count-worst_a13_count pollution_causes_a1_count-pollution_causes_a9_count sort ID quietly by ID : gen dup = cond(_N==1,0,_n) reshape long best worst pollution, i(ID) j(question, string) label define bestlabel 1 "The people" 2 "Places to play" 3 "Being close to school or work" 4 "There are many things to do" 5 "The shops and restaurants" 6 "It's easy to get around" 7 "Healthcare" 8 "Being close to my family" 9 "The climate and environment" 10 "Work opportunities for my family" 11 "Access to green space, like parks" 12 "Other" label values best bestlabel label define worstlabel 1 "It doesn't always feel safe" 2 "The people; it's not friendly" 3 "The traffic/congestion" 4 "The noise" 5 "Not enough places to play" 6 "Not enough places to meet friends" 7 "Shortage of work opportunities for my family" 8 "It's hard to get around" 9 "Not easy enough to get healthcare" 10 "The pollution" 11 "It's too crowded" 12 "Not enough green space like parks" 13 "Other" label values worst worstlabel label define pollutionlabel 1 "Factories" 2 "Burning of rubbish" 3 "Motor transport (cars, buses, lorries)" 4 "Construction/building work" 5 "Pollution blown into the city (from outside)" 6 "Household cooking (cooking fires/stoves)" 7 "Household heating (boilers, wood fires etc)" 8 "Agriculture/farming" 9 "Other" label values pollution pollutionlabel save "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\Reshaped dataset for best_worst_pollution.dta", replace tab best tab worst tab pollution //Generating Qual dataset use "5_Full sample with pm2.5.dta", clear keep ID eligibility age age_bucket preg town region best_a1_vote-best_a11_vote best_a12 worst_a1_vote-worst_a12_vote worst_a13 gen_city_comments air_quality pollution_causes_a1_vote-pollution_causes_a8_vote pollution_causes_a9 gen_air_comments nicer_city_count nicer_comments mayor_ans1 mayor_ans2 final_comments pm25 pm25_quartile submit_time drop if best_a12=="" & worst_a13=="" & gen_city_comments=="" & pollution_causes_a9=="" & gen_air_comments=="" & nicer_comments=="" & mayor_ans1=="" & mayor_ans2=="" & final_comments=="" sort best_a12 worst_a13 gen_city_comments pollution_causes_a9 gen_air_comments nicer_comments mayor_ans1 mayor_ans2 final_comments save "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\qualitative dataset.dta", replace export excel region-pm25_quartile using "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\Qual Data with pm25.xls", replace /* //Adding in new qual!! use "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\qualitative dataset.dta", clear gen newvar=1 sort ID order ID by ID, sort : generate dup = _n == 1 sort newvar order ID dup newvar drop if dup==0 & newvar==1 drop if newvar==. drop dup newvar order region pm25 town ID eligibility preg best_a12 worst_a13 gen_city_comments air_quality pollution_causes_a9 gen_air_comments nicer_comments mayor_ans1 mayor_ans2 final_comments age age_bucket best_a1_vote-best_a11_vote worst_a1_vote-worst_a12_vote pollution_causes_a1_vote-pollution_causes_a8_vote nicer_city_count pm25_quintile export excel region-pm25_quintile using "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\Qual Data with pm25 NEW.xls", replace */ clear *******************ADDING IN James's data cd "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\CCC\Important documents for CCC survey methods slide deck\CCC Analysis" import excel "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\CCC\Important documents for CCC survey methods slide deck\CCC Analysis\CCC_city_indicators.xlsx", sheet("Cities") firstrow clear save "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\CCC\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\Country Level\CCC_city_indicators.dta", replace rename City city merge 1:m city using "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\CCC\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\5_Full sample with pm2.5.dta" save "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\CCC\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\5_Full sample with pm2.5.dta", replace ssc install outreg2 //To pull down all figures for final report cd "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets" use "5_Full sample with pm2.5.dta", clear drop region_pm25 region_pm25_quartile city_pm25 city_pm25_quartile live eligible q FAQs variable_0 consent1 consent2 consent3 consent4 consent5 consent mistake dup nonconsent lang utm_source utm_medium utm_campaign utm_campaign_location utm_content Start_Date Submit_Date submit_time start_time Network_ID submit_hms start_hms submit_hms_time start_hms_time completion_time_minutes completion_time_seconds completion_time_seconds_missing town drop best_a1-best_a11 best*count worst_a1-worst_a12 worst*count pollution_causes_a1-pollution_causes_a8 pollution*count nicer_city air_quality order ID geoscheme_region geoscheme_subregion OECD city region pm25 pm25_quartile eligibility target_population age age_bucket preg nicer_city_count air_quality_real best_a1_vote-best_a12_vote worst_a1_vote-worst_a13_vote pollution_causes_a1_vote-pollution_causes_a9_vote best_a12 worst_a13 pollution_causes_a9 nicer_comments gen_city_comments gen_air_comments final_comments save "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\7_anonymized data.dta", replace export excel using "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\Anonymized data.xls", firstrow(variables) replace *****************Full sample descriptive univariates //Age //ages that are in date (year) format were converted to the age in years the respondent would be, at the time of analysis (September 13, 2021) if they were born mid-year (June, 1) of the reported year tab age tab age_bucket su age, detail //Target population tab eligibility //Median Completion Time su completion_time_minutes, detail //Geographic location by pm25_quartile encode city, gen(city_encoded) encode region, gen(region_encoded) tab city tab region //outreg2 town using "demographics table.doc", append cross asdoc bysort pm25_quartile : tab city_encoded, c(city_encoded) replace by pm25_quartile, sort: tab region if city=="missing" //PM2.5 by observation tab pm25_quartile tabstat pm25, stat(n mean min max sd p50) by (pm25_quartile) //PM2.5 by city tab city, su(pm25) by city, sort: gen nvals = _n == 1 drop if nvals==0 tab pm25_quartile tab pm25_quartile, summarize(pm25) //Income by pm2.5 by city, sort: gen nvals = _n == 1 drop if nvals==0 sort pm25_quartile by pm25_quartile : su CityGDPpercap ***************Subpopulation 'Best' cd "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\Subpopulations" use "Best subpopulation.dta", clear //Univariates su age //outreg2 using "demographics table.doc", append sum(log) keep(age) tab age_bucket tab eligibility tabm best_a1_vote-best_a11_vote, column //Each item made up what percent of the overall votes for 'best' things? tabm best_a1_vote-best_a11_vote, row //What percent of people voted for each item? tabm best_a1_vote-best_a12_vote, column row //heat maps sort pm25_quartile by pm25_quartile : tab city tabulate city best_a1_vote, row tabulate city best_a2_vote, row tabulate city best_a3_vote, row tabulate city best_a4_vote, row tabulate city best_a5_vote, row tabulate city best_a6_vote, row tabulate city best_a7_vote, row tabulate city best_a8_vote, row tabulate city best_a9_vote, row tabulate city best_a10_vote, row tabulate city best_a11_vote, row tabulate city best_a12_vote, row by pm25_quartile : tab region tabulate region best_a1_vote, row tabulate region best_a2_vote, row tabulate region best_a3_vote, row tabulate region best_a4_vote, row tabulate region best_a5_vote, row tabulate region best_a6_vote, row tabulate region best_a7_vote, row tabulate region best_a8_vote, row tabulate region best_a9_vote, row tabulate region best_a10_vote, row tabulate region best_a11_vote, row tabulate region best_a12_vote, row tabulate pm25_quartile best_a1_vote, row tabulate pm25_quartile best_a2_vote, row tabulate pm25_quartile best_a3_vote, row tabulate pm25_quartile best_a4_vote, row tabulate pm25_quartile best_a5_vote, row tabulate pm25_quartile best_a6_vote, row tabulate pm25_quartile best_a7_vote, row tabulate pm25_quartile best_a8_vote, row tabulate pm25_quartile best_a9_vote, row tabulate pm25_quartile best_a10_vote, row tabulate pm25_quartile best_a11_vote, row tabulate pm25_quartile best_a12_vote, row tabulate target_population best_a1_vote, row tabulate target_population best_a2_vote, row tabulate target_population best_a3_vote, row tabulate target_population best_a4_vote, row tabulate target_population best_a5_vote, row tabulate target_population best_a6_vote, row tabulate target_population best_a7_vote, row tabulate target_population best_a8_vote, row tabulate target_population best_a9_vote, row tabulate target_population best_a10_vote, row tabulate target_population best_a11_vote, row tabulate target_population best_a12_vote, row tabulate age_bucket best_a1_vote, row tabulate age_bucket best_a2_vote, row tabulate age_bucket best_a3_vote, row tabulate age_bucket best_a4_vote, row tabulate age_bucket best_a5_vote, row tabulate age_bucket best_a6_vote, row tabulate age_bucket best_a7_vote, row tabulate age_bucket best_a8_vote, row tabulate age_bucket best_a9_vote, row tabulate age_bucket best_a10_vote, row tabulate age_bucket best_a11_vote, row tabulate age_bucket best_a12_vote, row /*The most common 'best' things were 'Being close to my family' at 14.40% of the 4,492 votes cast (with individuals recieving more than one vote), 'Being close to school' at 13.42% of the vote, and that 'There are many things to do' at 12.62% of the vote. 29.60%, 27.58%, and 25.94% of the 2,186 respondents marked these items respectively as being in the top three 'best' things about their cities. */ //Multivariates ***************Subpopulation 'Worst' use "Worst subpopulation.dta", clear //Univariates su age //tab age_bucket tab eligibility tabm worst_a1_vote-worst_a13_vote, column //Each item made up what percent of the overall votes for 'worst' things? tabm worst_a1_vote-worst_a13_vote, row //What percent of people voted for each item? tabm worst_a1_vote-worst_a13_vote, column row /*The most common 'worst' things were 'The traffic/congestion' at 21.18% of the 5,128 votes cast (with individuals recieving more than one vote), 'The pollution' at 16.63% of the vote, and 'Shortage of work opportunities for my family' at 10.92% of the vote. 47.93%, 37.64%, and 24.71% of the 2,266 respondents marked these items respectively as being in the top three 'worst' things about their cities. */ //heat maps sort pm25_quartile by pm25_quartile : tab city tabulate city worst_a1_vote, row tabulate city worst_a2_vote, row tabulate city worst_a3_vote, row tabulate city worst_a4_vote, row tabulate city worst_a5_vote, row tabulate city worst_a6_vote, row tabulate city worst_a7_vote, row tabulate city worst_a8_vote, row tabulate city worst_a9_vote, row tabulate city worst_a10_vote, row tabulate city worst_a11_vote, row tabulate city worst_a12_vote, row tabulate city worst_a13_vote, row tabulate target_population worst_a1_vote, row tabulate target_population worst_a2_vote, row tabulate target_population worst_a3_vote, row tabulate target_population worst_a4_vote, row tabulate target_population worst_a5_vote, row tabulate target_population worst_a6_vote, row tabulate target_population worst_a7_vote, row tabulate target_population worst_a8_vote, row tabulate target_population worst_a9_vote, row tabulate target_population worst_a10_vote, row tabulate target_population worst_a11_vote, row tabulate target_population worst_a12_vote, row tabulate target_population worst_a13_vote, row tabulate pm25_quartile worst_a1_vote, row tabulate pm25_quartile worst_a2_vote, row tabulate pm25_quartile worst_a3_vote, row tabulate pm25_quartile worst_a4_vote, row tabulate pm25_quartile worst_a5_vote, row tabulate pm25_quartile worst_a6_vote, row tabulate pm25_quartile worst_a7_vote, row tabulate pm25_quartile worst_a8_vote, row tabulate pm25_quartile worst_a9_vote, row tabulate pm25_quartile worst_a10_vote, row tabulate pm25_quartile worst_a11_vote, row tabulate pm25_quartile worst_a12_vote, row tabulate pm25_quartile worst_a13_vote, row tabulate age_bucket worst_a1_vote, row tabulate age_bucket worst_a2_vote, row tabulate age_bucket worst_a3_vote, row tabulate age_bucket worst_a4_vote, row tabulate age_bucket worst_a5_vote, row tabulate age_bucket worst_a6_vote, row tabulate age_bucket worst_a7_vote, row tabulate age_bucket worst_a8_vote, row tabulate age_bucket worst_a9_vote, row tabulate age_bucket worst_a10_vote, row tabulate age_bucket worst_a11_vote, row tabulate age_bucket worst_a12_vote, row tabulate age_bucket worst_a13_vote, row ***************Subpopulation 'Pollution' cd "C:\Users\rache\OneDrive - London School of Hygiene and Tropical Medicine\Important documents for CCC survey methods slide deck\CCC Analysis\Built Datasets\Subpopulations" use "Pollution subpopulation.dta", clear su age //tab age_bucket tab eligibility tabm pollution_causes_a1_vote-pollution_causes_a9_vote, column //Each item made up what percent of the overall votes for 'worst' things? tabm pollution_causes_a1_vote-pollution_causes_a9_vote, row //What percent of people voted for each item? tabm pollution_causes_a1_vote-pollution_causes_a9_vote, column row sort pm25_quartile by pm25_quartile : tab city tabulate city pollution_causes_a1_vote, row tabulate city pollution_causes_a2_vote, row tabulate city pollution_causes_a3_vote, row tabulate city pollution_causes_a4_vote, row tabulate city pollution_causes_a5_vote, row tabulate city pollution_causes_a6_vote, row tabulate city pollution_causes_a7_vote, row tabulate city pollution_causes_a8_vote, row tabulate city pollution_causes_a9_vote, row tabulate target_population pollution_causes_a1_vote, row tabulate target_population pollution_causes_a2_vote, row tabulate target_population pollution_causes_a3_vote, row tabulate target_population pollution_causes_a4_vote, row tabulate target_population pollution_causes_a5_vote, row tabulate target_population pollution_causes_a6_vote, row tabulate target_population pollution_causes_a7_vote, row tabulate target_population pollution_causes_a8_vote, row tabulate target_population pollution_causes_a9_vote, row tabulate pm25_quartile pollution_causes_a1_vote, row tabulate pm25_quartile pollution_causes_a2_vote, row tabulate pm25_quartile pollution_causes_a3_vote, row tabulate pm25_quartile pollution_causes_a4_vote, row tabulate pm25_quartile pollution_causes_a5_vote, row tabulate pm25_quartile pollution_causes_a6_vote, row tabulate pm25_quartile pollution_causes_a7_vote, row tabulate pm25_quartile pollution_causes_a8_vote, row tabulate pm25_quartile pollution_causes_a9_vote, row tabulate age_bucket pollution_causes_a1_vote, row tabulate age_bucket pollution_causes_a2_vote, row tabulate age_bucket pollution_causes_a3_vote, row tabulate age_bucket pollution_causes_a4_vote, row tabulate age_bucket pollution_causes_a5_vote, row tabulate age_bucket pollution_causes_a6_vote, row tabulate age_bucket pollution_causes_a7_vote, row tabulate age_bucket pollution_causes_a8_vote, row tabulate age_bucket pollution_causes_a9_vote, row ***************Subpopulation 'air quality' use "Air Quality subpopulation.dta", clear su age //tab age_bucket tab eligibility tabstat air_quality_real, by(pm25_quartile) stat(n mean sd) tabstat air_quality_real, by(age_bucket) stat(n mean sd) tabstat air_quality_real, by(target_population) stat(n mean sd) tabstat air_quality_real, by(city) stat(n mean sd) bysort region: outreg2 using "full sample.xls", replace sum(log) keep(air_quality_real) eqkeep(N mean sd) bysort town: outreg2 using "full sample.xls", replace sum(log) keep(air_quality_real) eqkeep(N mean sd) bysort OECD: outreg2 using "full sample.xls", replace sum(log) keep(air_quality_real) eqkeep(N mean sd) ***************Subpopulation 'Nicer city' use "Nicer City subpopulation.dta", clear su age //tab age_bucket tab eligibility sort target_population by target_population : tab nicer_city_count sort age_bucket by age_bucket : tab nicer_city_count table age_bucket nicer_city_count tab nicer_city_count sort pm25_quartile by pm25_quartile : tab nicer_city_count by OECD : tab nicer_city_count by OECD : su pm25 clear