############################################################################## # # PROJECT: ERC ANALYSIS # AUTHOR: NICOLA FOSTER # LAST UPDATED: 31 JANUARY 2021 # # FIGURE-SOCIOECONOMIC POSITION # ############################################################################## # loads libraries if(!require("haven")){install.packages("haven") library("haven")} if(!require("readxl")){install.packages("readxl") library("readxl")} if(!require("tidyverse")){install.packages("tidyverse") library("tidyverse")} if(!require("plyr")){install.packages("plyr") library("plyr")} if(!require("dplyr")){install.packages("dplyr") library("dplyr")} if(!require("ggplot")){install.packages("ggplot") library("ggplot")} if(!require("readr")){install.packages("readr") library("readr")} if(!require("complete")){install.packages("complete") library("complete")} if(!require("ggpubr")){install.packages("ggpubr") library("ggpubr")} if(!require("scales")){install.packages("scales") library("scales")} # load STATA dataset main <- read_dta("~/Documents/...dta") attach(main) # Use tbcase variable as the TB case definition smain_pca <- data.frame(main$newID, main$Wtotps, main$tbcase, main$survey, main$symptoms_any, main$pca_continuous, main$pca_quartile) # estimates a pca set for each of the surveys separately smain_pca_s1 <- subset(smain_pca, main.survey==0) # View(smain_pca_s1) smain_pca_s2 <- subset(smain_pca, main.survey==1) # View(smain_pca_s2) # TB prevalence sum_main <- ddply(smain_pca, c("main.pca_quartile"), summarise, N = length(main.tbcase), mean = mean(main.tbcase), sd = sd(main.tbcase), se = sd / sqrt(N)) # Sub-clinical TB sum_sub <- ddply(smain_pca, c("main.pca_quartile"), summarise, N = length(main.tbsub), mean = mean(main.tbsub), sd = sd(main.tbsub), se = sd / sqrt(N)) ############################################################################### ############################################################################### # MICRO-BIOLOGICALLY CONFIRMED TB sum_s1 <- ddply(smain_pca_s1, c("main.pca_quartile"), summarise, N = length(main.tbcase), mean = mean(main.tbcase), sd = sd(main.tbcase), se = sd / sqrt(N)) sum_s2 <- ddply(smain_pca_s2, c("main.pca_quartile"), summarise, N = length(main.tbcase), mean = mean(main.tbcase), sd = sd(main.tbcase), se = sd / sqrt(N)) # SUBCLINICAL TB sum_s1_sx <- ddply(smain_pca_s1, c("main.pca_quartile"), summarise, N = length(main.tbsub), mean = mean(main.tbsub), sd = sd(main.tbsub), se = sd / sqrt(N)) sum_s2_sx <- ddply(smain_pca_s2, c("main.pca_quartile"), summarise, N = length(main.tbsub), mean = mean(main.tbsub), sd = sd(main.tbsub), se = sd / sqrt(N)) ################################################################################ ################################################################################ # FIG2A. TB PREVALENCE BY SEP prev_ses_s1 <- ggplot(sum_s1, aes(x=main.pca_quartile, y=mean)) + geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.1) + theme_bw() + geom_line() + geom_point()+ labs(title = "2007", x ="Household SEP (1=poor, 4=wealthy)", y = "TB prevalence in 2007")+ #scale_y_continuous(name="TB prevalence in 2007")+ coord_cartesian(ylim = c(0, 0.004)) prev_ses_s1 prev_ses_s2 <- ggplot(sum_s2, aes(x=main.pca_quartile, y=mean)) + geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.1) + theme_bw() + geom_line() + geom_point()+ labs(title = "2017", x ="Household SEP (1=poor, 4=wealthy)", y = "TB prevalence in 2017")+ #scale_y_continuous(name=" ")+ coord_cartesian(ylim = c(0, 0.004)) prev_ses_s2 prev_by_ses <- ggarrange(prev_ses_s1, prev_ses_s2, label.x = 0, label.y = 1, #ncol = 2, nrow = 1, widths = c(15, 15)) prev_by_ses ggsave(filename = "prev_by_ses.png", prev_by_ses, width = 10, height = 5, dpi = 1000) ################################################################################ ################################################################################ # FIG2B. SUBCLINICAL TB PREVALENCE BY SEP prev_ses_sub1 <- ggplot(sum_s1_sx, aes(x=main.pca_quartile, y=mean)) + geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.1) + theme_bw() + geom_line() + geom_point()+ labs(x ="socio-economic position")+ scale_y_continuous(name="Sub-clinical TB prevalence 2007")+ coord_cartesian(ylim = c(0, 0.008)) prev_ses_sub1 prev_ses_sub2 <- ggplot(sum_s2_sx, aes(x=main.pca_quartile, y=mean)) + geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.1) + theme_bw() + geom_line() + geom_point()+ labs(x ="socio-economic position")+ scale_y_continuous(name="Sub-clinical TB prevalence 2017")+ coord_cartesian(ylim = c(0, 0.008)) prev_ses_sub2 prev_by_ses2 <- ggarrange(prev_ses_sub1, prev_ses_sub2, labels = c("2007", "2017"), label.x = 0, label.y = 1, ncol = 2, nrow = 1) prev_by_ses2 ggsave(filename = "prev_by_ses.png", prev_by_ses, width = 10, height = 5, dpi = 1000) # combines the graphs prev_by_ses_6 <- ggarrange(prev_ses_s1, prev_ses_s2, prev_ses_sub1, prev_ses_sub2, labels = c("2007(A)", "2017(A)", "2007(B)", "2017(B)"), label.x = 0, label.y = 1, hjust = -0.5, vjust = 1.5, heights = 2, ncol = 2, nrow = 2) prev_by_ses_6 ggsave(filename = "prev_by_ses_6.png", prev_by_ses_6, width = 10, height = 5, dpi = 1000) ############################################################################### ############################################################################### # BARGRAPH OF DISTRIBUTION OF PROPORTION OF SAMPLE BY SEP # generates figures to show the distribution of tb symptoms by socio-economic status, plots absolute number of people by ses ses_prop_s1 <- ggplot(smain_pca_s1, aes(main.pca_quartile)) + geom_bar(aes(y=..count../sum(..count..))) + theme_bw() + labs(title = "2007", x ="Household SEP (1=poor, 4=wealthy)")+ scale_y_continuous(name="Proportion of sample in 2007 survey") + #,labels=percent_format())+ coord_cartesian(ylim = c(0, 0.4)) ses_prop_s2 <- ggplot(smain_pca_s2, aes(main.pca_quartile)) + geom_bar(aes(y=..count../sum(..count..))) + theme_bw() + labs(title = "2017", x ="Household SEP (1=poor, 4=wealthy)") + scale_y_continuous(name="Proportion of sample in 2017 survey") + #,labels=percent_format())+ coord_cartesian(ylim = c(0, 0.4)) props_by_year <- ggarrange(ses_prop_s1,ses_prop_s2, ncol = 2, nrow = 1) props_by_year ggsave(filename = "props_by_year.png", props_by_year, width = 10, height = 5, dpi = 1000)