########################################################################################################
# Paper : Vaccine Confidence and Hesitancy at the start of COVID-19 vaccine deployment in the UK: An embedded mixed-methods study
# Authors : Chrissy h Roberts[1]*, Hannah Brindle[1], Nina T Rogers[2], Rosalind M Eggo[3], Luisa Enria[4] and Shelley Lees[4]
#
# Affiliations
#             1  Department of Clinical Research, London School of Hygiene and Tropical Medicine, London, UK
#             2  MRC Epidemiology Unit, University of Cambridge School of Clinical Medicine, United Kingdom
#             3 Department of Infectious Disease Epidemiology, London School of Hygiene and Tropical Medicine, London, UK
#             4 Department of Global Health and Development, London School of Hygiene and Tropical Medicine, London, UK
#
# R Scripts for "Vaccine Confidence and Hesitancy at the start of COVID-19 vaccine deployment in the UK: An embedded mixed-methods study"
# (c) Chrissy h Roberts
# This work is licensed under a
# Creative Commons Attribution 4.0 International License.
# You should have received a copy of the license along with this
# work. If not, see <http://creativecommons.org/licenses/by/4.0/>.
########################################################################################################

########################################################################################################
# Set seed for reproducible analysis
########################################################################################################
set.seed(21345)

########################################################################################################
# Load Libraries
########################################################################################################

{
  library(arsenal)
  library(data.table)
  library(flextable)
  library(gridExtra)
  library(gt)
  library(gtsummary)
  library(htmlwidgets)
  library(kableExtra)
  library(maptools)
  library(nnet)
  library(plotly)
  library(ranger)
  library(RColorBrewer)
  library(reshape2)
  library(rgdal)
  library(sjmisc)
  library(sp)
  library(stm)
  library(summarytools)
  library(tidystm)
  library(tidytext)
  library(tidyverse)
  library(tm)
  require(mice)
}


########################################################################################################
# Tidy and Impute data
########################################################################################################

#LOAD THE DATA
df<-read.csv("LSHTM_COVID_19_SURVEY_VACCINES_ROBERTS_2020-12-18_DC.csv", na.strings = c("NA",""),stringsAsFactors = T)
df$SubmissionDate<-as.Date(df$SubmissionDate)
df<-filter(df,SubmissionDate>"2020-12-08")

#SELECT VARIABLES THAT WILL BE USED
df<-select(df,
           -SubmissionDate,
           -start,
           -end,
           -deviceid,
           -subscriberid,
           -simserial,
           -phonenumber,
           -generated_note_name_10,
           -agree,
           dem.gender,
           dem.age,
           dem.people_in_home,
           dem.marital,
           dem.school_age_kids,
           dem.education,
           dem.employment,
           dem.employment_changed,
           dem.disabled,
           eth.ethnicity,
           -eth.ethnicity_white,
           -eth.ethnicity_black,
           -eth.ethnicity_mixed,
           -eth.ethnicity_asian,
           -eth.ethnicity_other,
           covid.symptoms,
           -covid.symptoms_date,
           -covid.tested_me,
           -covid.tested_member,
           -covid.tested_no,
           -covid.positive,
           -symptoms_isolate.note_symptoms_isolate,
           -symptoms_isolate.reserved_name_for_field_list_labels_41,
           -symptoms_isolate.adult_symptoms,
           -symptoms_isolate.children_symptoms,
           -symptoms_isolate.adult_no_symptoms,
           -symptoms_isolate.children_no_symptoms,
           -isolation.time_isolate,
           -isolation.isolate_comply,
           -isolation.time_isolate_actual,
           -isolation.isolation_approach,
           -app.covid_app,
           -app.covid_app_whynot,
           -app.covid_app_concerns,
           -app.alert,
           gov_dec.gov_right_decision,
           -gov_dec.gov_right_control,
           -gov_dec.gov_wrong_control,
           -gov_dec.devolved_gov.note_devolved_govs,
           -gov_dec.devolved_gov.reserved_name_for_field_list_labels_66,
           -gov_dec.devolved_gov.scotland,
           -gov_dec.devolved_gov.wales,
           -gov_dec.devolved_gov.n_ireland,
           -measures,
           trustgov,
           trust.trust_level,
           -trust.trust_change,
           -priorities,
           health_overall,
           -depression.note_depression,
           depression.depressed_before,
           depression.depressed_recent,
           -experience_lockdown,
           -phys_activity.note_phys_activity,
           phys_activity.phys_activity_before,
           phys_activity.phys_activity_recent,
           -weight_describe.note_weight,
           weight_describe.weight_before,
           weight_describe.weight_recent,
           -alcohol_1.note_drinking,
           alcohol_1.alcohol_before,
           alcohol_1.alcohol_recent,
           -cage_before.note_CAGE1,
           -cage_before.cut_down,
           -cage_before.annoy,
           -cage_before.guilty,
           -cage_before.morning,
           -cage_recent.note_CAGE2,
           -cage_recent.cut_down2,
           -cage_recent.annoy2,
           -cage_recent.guilty2,
           -cage_recent.morning2,
           -cage_recent.note_aa,
           -smoking.note_smoking,
           smoking.smoking_before,
           smoking.smoking_recent,
           -sleep_hours.note_sleep,
           sleep_hours.sleep_before,
           sleep_hours.sleep_after,
           -specific_conditions.note_specific_conditions,
           -specific_conditions.reserved_name_for_field_list_labels_128,
           specific_conditions.diabetes_type1,
           specific_conditions.diabetes_type2,
           specific_conditions.ashtma,
           specific_conditions.lungdisease,
           specific_conditions.cancer,
           specific_conditions.stroke,
           specific_conditions.heartdisease,
           specific_conditions.hypertension,
           specific_conditions.obesity,
           specific_conditions.depression,
           specific_conditions.anxiety,
           -adls.note_adls,
           -adls.reserved_name_for_field_list_labels_142,
           adls.dressing,
           adls.walking,
           adls.bathing,
           adls.eating,
           adls.bed,
           adls.toilet,
           gbv.violence,
           -gbv.perpetrated,
           -gbv.gender_abuser,
           -gbv.violence_before,
           -gbv.violence_continued,
           -gbv.help,
           -gbv.help_from,
           -gbv.note_helpline,
           vaccination.vaccinated,
           -vaccination.vaccinated_yes,
           -vaccination.vaccinated_no,
           vaccination.feelings,
           vaccination.flu_vaccine,
           demographics3.income,
           demographics3.hardship,
           demographics3.political_opinion,
           postcode,
           -meta.instanceID,
           -KEY,
           -SubmitterID,
           -SubmitterName,
           -AttachmentsPresent,
           -AttachmentsExpected,
           -Status
)


########################################################################################################
# Recode Education to make fewer, more informative classes
########################################################################################################


df$dem.education[which(df$dem.education=="do_not_know")]<-NA

df$dem.education<-recode(df$dem.education,
                         "alevel" = "school",
                         "further" = "further",
                         "gcse"  = "school",
                         "higher"  = "higher",
                         "postgrad"  = "higher",
                         "primary" = "school",
)

df$dem.education<-factor(df$dem.education)

########################################################################################################
# Set as factors for vaccination behaviours
########################################################################################################

df$vaccination.feelings<-factor(df$vaccination.feelings)
df$vaccination.flu_vaccine<-factor(df$vaccination.flu_vaccine)

########################################################################################################
# Recode age groups to match previous publication [https://doi.org/10.1371/journal.pone.0239247]
########################################################################################################

df$dem.age2<-NA
df$dem.age2[which(df$dem.age < 34)]<-"18-34"
df$dem.age2[which(df$dem.age >=35 & df$dem.age < 55)]<-"35-54"
df$dem.age2[which(df$dem.age >=55 & df$dem.age < 70)]<-"55-69"
df$dem.age2[which(df$dem.age >=70)]<-"70+"
df$dem.age<-as.factor(df$dem.age2)

df$dem.age<-factor(df$dem.age,levels = c("18-34","35-54","55-69","70+"))
df$dem.age2<-NULL

########################################################################################################
# Recode Postcodes to regions
########################################################################################################

df$region [df$postcode=="Blackburn"|df$postcode=="Bradford"|df$postcode=="Bolton"|df$postcode=="Carlisle"|df$postcode=="Chester"|df$postcode=="Crewe"|df$postcode=="Blackpool"|
             df$postcode=="Huddersfield"|df$postcode=="Halifax"|df$postcode=="Liverpool"|df$postcode=="Lancaster"|df$postcode=="Manchester"|df$postcode=="Oldham"|df$postcode=="Preston"|df$postcode=="Stockport"|df$postcode=="Warrington"|df$postcode=="Wigan"]<-"North West"
df$region [df$postcode=="Aberdeen"|df$postcode=="Dundee"|df$postcode=="Dumfries and Galloway"|df$postcode=="Edinburgh"|df$postcode=="Falkirk and Stirling"|df$postcode=="Glasgow"|df$postcode=="Outer Hebrides"|df$postcode=="Inverness"|df$postcode=="Kilmarnock"|df$postcode=="Kirkwall"|df$postcode=="Kirkcaldy"|df$postcode=="Motherwell"|df$postcode=="Paisley"|df$postcode=="Perth"|df$postcode=="Galashiels"|df$postcode=="Lerwick"]<-"Scotland"
df$region [df$postcode=="Bath"|df$postcode=="Bournemouth"|df$postcode=="Bristol"|df$postcode=="Dorchester"|df$postcode=="Exeter"|df$postcode=="Gloucester"|df$postcode=="Plymouth"|df$postcode=="Swindon"|df$postcode=="Salisbury"|df$postcode=="Taunton"|df$postcode=="Torquay"|df$postcode=="Truro"]<-"South West"
df$region [df$postcode=="Brighton"|df$postcode=="Canterbury"|df$postcode=="Guildford"|df$postcode=="Rochester"|df$postcode=="Milton Keynes"|df$postcode=="Oxford"|df$postcode=="Portsmouth"|df$postcode=="Reading"|df$postcode=="Redhill"|df$postcode=="Slough"|df$postcode=="Southamptom"|df$postcode=="Tonbridge"]<-"South East"
df$region [df$postcode=="Cardiff"|df$postcode=="Llandrindod Wells"|df$postcode=="Llandudno"|df$postcode=="Newport"|df$postcode=="Swansea"|df$postcode=="Shrewsbury"]<-"Wales"
df$region [df$postcode=="Birmingham"|df$postcode=="Coventry"|df$postcode=="Dudley"|df$postcode=="Hereford"|df$postcode=="Northampton"|df$postcode=="Stoke-on-Trent"|df$postcode=="Telford"|df$postcode=="Worcester"|df$postcode=="Walsall"|df$postcode=="Wolverhampton"]<-"West Midlands"
df$region [df$postcode=="Derby"|df$postcode=="Doncaster"|df$postcode=="Leicester"|df$postcode=="Lincoln"|df$postcode=="Nottingham"|df$postcode=="Sheffield"]<-"East Midlands"
df$region [df$postcode=="Derby"|df$postcode=="Doncaster"|df$postcode=="Leicester"|df$postcode=="Lincoln"|df$postcode=="Nottingham"|df$postcode=="Sheffield"]<-"East Midlands"
df$region [df$postcode=="St Albans"|df$postcode=="Cambridge"|df$postcode=="Chelmsford"|df$postcode=="Colchester"|df$postcode=="Hemel Hempstead"|df$postcode=="Ipswich"|df$postcode=="Luton"|df$postcode=="Norwich"|df$postcode=="Peterborough"|df$postcode=="Stevenage"|df$postcode=="Southend-on-Sea"]<-"East of England"
df$region [df$postcode=="Bromley"|df$postcode=="Croydon"|df$postcode=="Dartford"|df$postcode=="East London"|df$postcode=="Central London EC"|df$postcode=="Enfield"|df$postcode=="Harrow"|df$postcode=="Ilford"|df$postcode=="Kingston upon Thames"|df$postcode=="North London"|df$postcode=="North West London"|df$postcode=="Romford"|df$postcode=="South East London"|df$postcode=="Sutton"|df$postcode=="South West London"|df$postcode=="Sutton"|df$postcode=="South West London"|df$postcode=="Twickenham"|df$postcode=="Southall"|df$postcode=="West London"|df$postcode=="Central London WC"|df$postcode=="Watford"]<-"London"
df$region [df$postcode=="Durham"|df$postcode=="Darlington"|df$postcode=="Harrogate"|df$postcode=="Hull"|df$postcode=="Leeds"|df$postcode=="Newcastle upon Tyne"|df$postcode=="Sunderland"|df$postcode=="Cleveland"|df$postcode=="Wakefield"|df$postcode=="York"]<-"North East"
df$region [df$postcode=="Northern Ireland"]<-"Northern Ireland"
df$region [df$postcode=="British Forces"]<-"British Forces"
df$region<-as.character(df$region)
df$region[which(is.na(df$region))]<-"Unknown"
df$region [df$postcode=="British Forces"]<-NA
df$region<-factor(df$region, levels = c("London","East Midlands","East of England", "North East", "North West", "Northern Ireland", "Scotland", "South East", "South West", "Wales", "West Midlands", "Unknown"))
df$postcode<-NULL

########################################################################################################
# As factor employment and employment changes
########################################################################################################

df$dem.employment<-factor(df$dem.employment,levels = c("fulltime","parttime","retired","student","homemaker","unemployed"))


df$dem.employment_changed<-factor(df$dem.employment_changed,levels=c("no_change","retired","furlough__returned","furlough_current","lost_job","new_job","something_else"))

########################################################################################################
# Recode Ethnicity
########################################################################################################

df$eth.ethnicity<-recode(df$eth.ethnicity,
                         "white" = "White",
                         "asian" = "Asian",
                         "black" = "Black",
                         "mixed" = "Mixed",
                         "other" = "Another Ethnic Group"
)

df$eth.ethnicity<-factor(df$eth.ethnicity,levels=c("White","Asian","Black","Mixed","Another Ethnic Group"))


########################################################################################################
# Recode Incomes
########################################################################################################

df$demographics3.income<-as.character(df$demographics3.income)

df$demographics3.income[which(df$demographics3.income=="-9")]<-NA


df$demographics3.income<-factor(df$demographics3.income)
df$demographics3.income<-recode(df$demographics3.income,
                                "1"="Less than £15,000",
                                "2"="£15,000 - £24,999",
                                "3"="£25,000 - £39,999",
                                "4"="£40,000 - £59,999",
                                "5"="£60,000 - £99,999",
                                "6"="More than £100,000"
)


########################################################################################################
# Recode change job data
########################################################################################################

df$dem.employment_changed<-recode(df$dem.employment_changed,
                                  "no_change" = "No change",
                                  "retired" = "Retired",
                                  "furlough__returned" = "Furlough (current or previous)",
                                  "furlough_current" = "Furlough (current or previous)",
                                  "lost_job" = "Lost job",
                                  "new_job" = "New job",
                                  "something_else" = "Other employment changes"
)

########################################################################################################
# Recode covid symptoms data
########################################################################################################

df$covid.symptoms<-recode(df$covid.symptoms,
                          "me" = "Yes",
                          "me_and_others" = "Yes",
                          "member" = "Yes",
                          "none" = "No"
)
df$covid.symptoms<-factor(df$covid.symptoms,levels=c("No","Yes"))

########################################################################################################
# Recode trust
########################################################################################################

df$trustgov[which(df$trustgov=="donotknow")]<-NA
df$trustgov<-factor(df$trustgov,levels=c("mostly","always","sometimes","almost_never","never"))

########################################################################################################
#Recode marital stats
########################################################################################################

df$dem.marital<-recode(df$dem.marital,
                       "married" = "Married/Partnership",
                       "single" = "Single",
                       "partnership" = "Married/Partnership",
                       "Other" = "Other arrangement"
)

########################################################################################################
# Recode ethnicity
########################################################################################################

df$eth.ethnicity<-recode(df$eth.ethnicity,
                         "White"="White",
                         "Asian"="Ethnic Minority",
                         "Black"="Ethnic Minority",
                         "Mixed"="Ethnic Minority",
                         "Another Ethnic Group"="Ethnic Minority"
)

df$trust.trust_level<-factor(df$trust.trust_level,levels=c("same","increased","decreased"))


########################################################################################################
# Create descriptive table in markdown and html
########################################################################################################

print(dfSummary(df,na.col = T),file="01_raw_data.html")
tab1 <- dfSummary(df, max.distinct.values = 5, style = "grid",graph.col = F)
summarytools::view(tab1, file = "tab1.md")
summarytools::view(tab1, file = "tab1.html")
tab1


########################################################################################################
# Prepare data for imputation and set seed for reproducible analysis
########################################################################################################

df.for.imp<-df
set.seed(21345)


########################################################################################################
# Read in ancillary function that makes parallel implementation of mice work properly on modern OS X
# (may not be needed in future releases of R but fixes a bug in mice package in early 2021)
########################################################################################################

source("parlmice_commands.R")

########################################################################################################
# PARALLEL IMPUTATION
# Ensure that you set n.core to a sensible number based on CPU availability on your system. If in doubt, use 4.
########################################################################################################

df.imp<-parlmice2(df.for.imp,maxit=50,meth='pmm',cluster.seed = 500,n.core = 15,n.imp.core = 1,cl.type = "FORK")


########################################################################################################
# Merge imputations (with library sjmisc) using modal values for factors | set seed for reproduction
########################################################################################################

set.seed(21345)
df.completion<-merge_imputations(df.imp$data, df.imp)

########################################################################################################
# Save a copy to avoid having to do this stuff again and again
########################################################################################################

saveRDS(object = df.imp,file = "RAA2_imputed.data.vaccine.all.RDS")
saveRDS(object = df.completion,file = "RAA2_imputed.data.vaccine.single.RDS")


########################################################################################################
# Read back the data just saved, creating a start point for the second phase of analysis
########################################################################################################

set.seed(21345)
df.completion<-readRDS("RAA2_imputed.data.vaccine.single.RDS")

########################################################################################################
# Remove some variables that won't be used in this analysis
########################################################################################################
df.completion<-select(df.completion,

                      -dem.people_in_home,
                      -dem.marital        ,
                      -dem.school_age_kids ,
                      -dem.employment_changed,
                      -trust.trust_level          ,
                      -depression.depressed_before  ,
                      -phys_activity.phys_activity_before,
                      -phys_activity.phys_activity_recent,
                      -weight_describe.weight_before     ,
                      -smoking.smoking_before,
                      -weight_describe.weight_recent      ,
                      -alcohol_1.alcohol_before          ,
                      -alcohol_1.alcohol_recent           ,
                      -sleep_hours.sleep_before          ,
                      -sleep_hours.sleep_after            ,
                      -adls.dressing                      ,
                      -adls.walking                      ,
                      -adls.bathing                       ,
                      -adls.eating                       ,
                      -adls.bed                           ,
                      -adls.toilet                       ,
                      -gbv.violence

                      )


########################################################################################################
# Recode various variables to make data more descriptive and set sensible reference groups
########################################################################################################

df.completion$vaccination.vaccinated<-recode(df.completion$vaccination.vaccinated,
       "no" = "Reject_Vaccine",
       "yes" = "Accept_Vaccine")

df.completion$vaccination.feelings<-recode(df.completion$vaccination.feelings,
                                             "0" = "No Vaccine concerns",
                                             "1" = "Vaccine concerns",
                                           "2" = "Vaccine concerns & Rejected vaccine offers")

df.completion$vaccination.flu_vaccine<-recode(df.completion$vaccination.flu_vaccine,
                                           "0" = "Does not get flu vaccine",
                                           "1" = "Sometimes gets flu vaccine",
                                           "2" = "Always gets flu vaccine")

df.completion$vaccination.vaccinated<-relevel(df.completion$vaccination.vaccinated,ref = "Accept_Vaccine")

df.completion$demographics3.income<-factor(df.completion$demographics3.income,levels=c(
  "Less than £15,000",
  "£15,000 - £24,999",
  "£25,000 - £39,999",
  "£40,000 - £59,999",
  "£60,000 - £99,999",
  "More than £100,000"
))

df.completion$demographics3.political_opinion<-factor(df.completion$demographics3.political_opinion,levels=c(
  "floating",
  "conservative",
  "liberal"
))

df.completion$vaccination.flu_vaccine<-factor(df.completion$vaccination.flu_vaccine,levels=c("Always gets flu vaccine","Sometimes gets flu vaccine","Does not get flu vaccine"))

df.completion$smoking.smoking_recent<-factor(df.completion$smoking.smoking_recent,levels=c("non_smoker","light_smoker","moderate_smoker","heavy_smoker"))

df.completion$depression.depressed_recent<-factor(df.completion$depression.depressed_recent,levels=c("not_at_all" , "several_days", "half_the_days","every_day" ))

df.completion$trustgov<-factor(df.completion$trustgov,levels=c("always","mostly","sometimes","almost_never","never"))

df.completion$eth.ethnicity<-recode(df.completion$eth.ethnicity,
                                    "Black/Asian/Mixed/Other Ethnicity"="Ethnic Minority",
                                    "White" = "White")
df.completion$gov_dec.gov_right_decision<-relevel(df.completion$gov_dec.gov_right_decision,ref = "yes")

df.completion$health_overall<-factor(df.completion$health_overall,levels=c("very_good","good","fair","bad","very_bad"))


########################################################################################################
# Make second set of descriptive tables (i.e. with imputed data)
########################################################################################################


tab1b <- dfSummary(df.completion, max.distinct.values = 5, style = "grid",graph.col = F,)
summarytools::view(tab1b, file = "tab1b.md")
summarytools::view(tab1b, file = "tab1b.html")

tab1b

########################################################################################################
# table 2 - differences after covid19
########################################################################################################

tab2 <- tableby(vaccination.vaccinated ~ ., data=df.completion,cat.stats = c("countrowpct"))
tab2summary<-as.data.frame(summary(tab2))
names(tab2summary)[1]<-"factor"
tab2summary<-tab2summary[,c(1,4,2,3)]

tab2b <- tableby(vaccination.vaccinated ~ ., data=df.completion,cat.stats = c("countpct"))
tab2bsummary<-as.data.frame(summary(tab2b))
names(tab2bsummary)[1]<-"factor"

tab2summary$`Total (N=4535)`<-tab2bsummary$`Total (N=4535)`

tab2summary<-flextable(tab2summary)
save_as_html(tab2summary, path = "tab2.html")


########################################################################################################
# Univariate Analysis
########################################################################################################

gender<-coef(summary(glm(data = df.completion,formula = vaccination.vaccinated~dem.gender,family="binomial")))
age<-coef(summary(glm(data = df.completion,formula = vaccination.vaccinated~dem.age,family="binomial")))

set.seed(21345)
univariate_results<-df.completion %>%
  tbl_uvregression(
    method = glm,
    y = vaccination.vaccinated,
    method.args = list(family = binomial),
    exponentiate = TRUE,
    pvalue_fun = ~style_pvalue(.x, digits = 2)
  ) %>%

  add_global_p() %>%  # add global p-value
  add_nevent() %>%    # add number of events of the outcome
  add_q() %>%         # adjusts global p-values for multiple testing
  bold_p() %>%        # bold p-values under a given threshold (default 0.05)
  bold_p(t = 0.10, q = TRUE) %>% # now bold q-values under the threshold of 0.10
  bold_labels()

univariate_results<-univariate_results %>%as_flex_table()
save_as_html(univariate_results, path = "tab3.html")


########################################################################################################
# Multivariate Analysis
########################################################################################################

set.seed(21345)
df.completion.multivariate<-select(df.completion,
                                   dem.gender,
                                   dem.age,
                                   dem.education,
                                   dem.employment,
                                   dem.disabled,
                                   eth.ethnicity,
                                   covid.symptoms,
                                   gov_dec.gov_right_decision,
                                   trustgov,
                                   health_overall,
                                   depression.depressed_recent,
                                   smoking.smoking_recent,
                                   specific_conditions.hypertension,
                                   specific_conditions.obesity,
                                   vaccination.feelings,
                                   vaccination.flu_vaccine,
                                   demographics3.income,
                                   demographics3.hardship,
                                   demographics3.political_opinion,
                                   vaccination.vaccinated
                                     )
m1 <- glm(vaccination.vaccinated ~ ., data = df.completion.multivariate, family = binomial)
summary(m1)$coefficients
table4<-tbl_regression(m1, exponentiate = TRUE)
multivariate_results<-table4 %>%as_flex_table()
multivariate_results
save_as_html(multivariate_results, path = "tab4.html")

########################################################################################################
# Calculate 95% CIs
########################################################################################################

test<-summary(m1)
test<-as.data.frame(coefficients(test))

test$OR<-exp(as.numeric(test$Estimate))
CIs<-confint(m1)
CIs<-exp(CIs)
test<-cbind(test,CIs)
test$variable<-rownames(test)

########################################################################################################
# Checking the model
########################################################################################################
model.summary<-test
model.summary<-select(model.summary,variable,OR,"2.5 %","97.5 %","Pr(>|z|)")

########################################################################################################
# Add some reference points
########################################################################################################

model.summary$variable<-as.character(model.summary$variable)
model.summary[nrow(model.summary)+1,]<-c("dem.genderFemale (ref)",1,1,1,1)
model.summary[nrow(model.summary)+1,]<-c("dem.age18-34 (ref)",1,1,1,1)
model.summary[nrow(model.summary)+1,]<-c("dem.educationschool (ref)",1,1,1,1)
model.summary[nrow(model.summary)+1,]<-c("dem.employmentfulltime (ref)",1,1,1,1)
model.summary[nrow(model.summary)+1,]<-c("eth.ethnicityWhite (ref)",1,1,1,1)
model.summary[nrow(model.summary)+1,]<-c("trustgovalways (ref)",1,1,1,1)
model.summary[nrow(model.summary)+1,]<-c("depression.depressed_recentnot_at_all (ref)",1,1,1,1)
model.summary[nrow(model.summary)+1,]<-c("smoking.smoking_recentnon_smoker	(ref)",1,1,1,1)
model.summary[nrow(model.summary)+1,]<-c("demographics3.incomeLess than £15,000 (ref)",1,1,1,1)
model.summary[nrow(model.summary)+1,]<-c("demographics3.political_opinioncentre (ref)",1,1,1,1)
model.summary[nrow(model.summary)+1,]<-c("vaccination.flu_vaccineAlways gets flu vaccine (ref)",1,1,1,1)
model.summary[nrow(model.summary)+1,]<-c("vaccination.feelingsNo Vaccine concerns (ref)",1,1,1,1)
model.summary[nrow(model.summary)+1,]<-c("healthoverallvery_good (ref)",1,1,1,1)


########################################################################################################
# Remap variables so that they print nicely
########################################################################################################

model.summary$variable<-factor(model.summary$variable)
model.summary.plot<-model.summary
model.summary.plot<-arrange(model.summary,variable)
model.summary.plot$OR<-as.numeric(model.summary.plot$OR)
model.summary.plot$`2.5 %`<-as.numeric(model.summary.plot$`2.5 %`)
model.summary.plot$`97.5 %`<-as.numeric(model.summary.plot$`97.5 %`)


########################################################################################################
# Create the ggplot of Odds Ratios for rejecting vaccine
########################################################################################################


ggplot(model.summary.plot,aes(x = fct_rev(variable),y=OR))+
  geom_point(aes(),position = position_dodge(width=0.5))+theme_bw()+
  theme(axis.text=element_text(size=10),
        axis.title=element_text(size=10),
        axis.text.x = element_text(angle = 90, hjust = 1),
        plot.title = element_text(size=10),
        axis.title.y=element_blank(),
        legend.position="none"
  )+

  geom_hline(yintercept=1,lty=2,col="Red")+
  scale_y_log10(limits=c(0.1,40),labels = c(0.01,0.1,0.25,0.5,1,2,5,10,20,40),breaks = c(0.01,0.1,0.25,0.5,1,2,5,10,20,40))+
  geom_errorbar(aes(x=variable, ymax=`97.5 %`, ymin=`2.5 %`), na.rm=TRUE, position=position_dodge(),width=0.3,lty=1)+
  coord_flip()


ggsave("fig1.tiff")
ggsave("fig1.png")
ggsave("fig1.pdf")


########################################################################################################
# Prepare for STM analysis by binding in free text
########################################################################################################

df<-read.csv("COVID19_V2.csv", na.strings = c("NA",""),stringsAsFactors = T)
df$SubmissionDate<-as.Date(df$SubmissionDate)
df<-filter(df,SubmissionDate>"2020-12-08")

df<-select(df,
vaccination.vaccinated_yes,
vaccination.vaccinated_no
)

df.for_stm<-bind_cols(df.completion.multivariate,df)


########################################################################################################
# Make df for each group, only include people who gave an answer to the free text question
########################################################################################################
df.stm.reject<-df.for_stm[which(df.for_stm$vaccination.vaccinated=="Reject_Vaccine"),]
df.stm.reject<-select(df.stm.reject,-vaccination.vaccinated_yes)

df.stm.accept<-df.for_stm[which(df.for_stm$vaccination.vaccinated=="Accept_Vaccine"),]
df.stm.accept<-select(df.stm.accept,-vaccination.vaccinated_no)


########################################################################################################
########################################################################################################
# Run STM on group who would reject vaccine
########################################################################################################
########################################################################################################


########################################################################################################
# Create corpus
########################################################################################################

{
set.seed(21345)
processed <- textProcessor(df.stm.reject$vaccination.vaccinated_no, metadata = df.stm.reject,stem = FALSE,customstopwords = c("dont","etc","don't","things","also","can","will","don’t","way","made","just","one","seem","much","avoid","feel","need","else","one","vaccines","yet","want","vaccination","vaccine","vaccinations","vaccinated","covid"))
out <- prepDocuments(processed$documents, processed$vocab, processed$meta)
docs <- out$documents
vocab <- out$vocab
meta <-out$meta
}

########################################################################################################
# find best number of topics
########################################################################################################
{
set.seed(21345)
storage <- searchK(out$documents, out$vocab, K = c(2:10,15,20,25,30,35,40), data = meta,cores = 15)
}

########################################################################################################
# add a chart of results based on this - [https://juliasilge.com/blog/evaluating-stm/](https://juliasilge.com/blog/evaluating-stm/)
########################################################################################################

pdf("vaccine_reject_searchK.pdf",useDingbats = F)
plot(storage)
dev.off()

########################################################################################################
# choose a number of topics that maintains high semantic coherence whilst maximising value of the held-out likelihood and minimising residuals
# run the stm with the optimal number of topics (add more covariates that could affect prior probabilty of topics being discussed by using the prevalence command. Add more using covariates=~ dem.age2 + dem.gender + ...)
########################################################################################################

{
set.seed(21345)
fit <- stm(documents = out$documents, vocab = out$vocab,K = 20,max.em.its = 1500, data = out$meta,init.type = "Spectral")
}

########################################################################################################
#ANALYSIS AND VISUALISATION (COMMANDS IN PARENTHESIS)
# Displaying words associated with topics (labelTopics, plot.STM(,type = "labels"), sageLabels, plot.STM(,type = "perspectives")) or documents highly associated with particular topics (findThoughts, plotQuote).
# Make a data table of the metadata topic proportions
########################################################################################################

topicprop<-make.dt(fit, meta)

########################################################################################################
# Plot the map estimates of document-topic proportions
########################################################################################################

plot.STM(fit, "hist")

########################################################################################################
# plot a summary of the topics, with 5 top words
########################################################################################################

plot.STM(fit, "summary", n=5)# distribution and top 5 words per topic

########################################################################################################
# Also show the frequencies of the key words between different topics
########################################################################################################

td_beta <- tidytext::tidy(fit)
options(repr.plot.width=7, repr.plot.height=8, repr.plot.res=100)

td_beta %>%
  group_by(topic) %>%
  top_n(10, beta) %>%
  ungroup() %>%
  mutate(topic = paste0("Topic ", topic),
         term = reorder_within(term, beta, topic)) %>%
  ggplot(aes(term, beta, fill = as.factor(topic))) +
  geom_col(alpha = 0.8, show.legend = FALSE) +
  theme_bw() +
  facet_wrap(~ topic, scales = "free_y") +
  coord_flip() +
  scale_x_reordered() +
  labs(x = NULL, y = expression(beta),
       title = "Highest word probabilities for each topic for those who would not accept a COVID-19 vaccine",
       subtitle = "Different words are associated with different topics")
ggsave("vaccine_reject_word_content.pdf")


########################################################################################################
# you can also show detailed wordlist for specific topic
# would be good to go through each topic and identify excess stopwords and add to the custom list above
# at this stage you might want to remove words that are not semantically coherent. Go back and add some custom stopwords to the stm function
# may have to iterate on this process
# The theta object in the stm model output has the posterior probability of a topic given a document that this function uses. i.e. theta is the probability that a response belongs to a specific topic
# if wanted, plot out a selection (here 1:100) of documents and look at how theta patterns = theta is the probability of a document being in that topic
# if you don't have fairly strong theta for one or two topics, maybe reduce number of topics to enhance semantic coherence
########################################################################################################


########################################################################################################
# Plot theta values for each topic
########################################################################################################

td_theta <- tidytext::tidy(fit, matrix = "theta")

selectiontdthteta<-td_theta[td_theta$document%in%c(1:4000),]#select the first 30 documents. be careful to select a sensible interval, as attempting to load a very huge corpus might crash the kernel

thetaplot1<-ggplot(selectiontdthteta, aes(y=gamma, x=document, fill = as.factor(topic))) +
  geom_bar(stat="identity",alpha = 0.8, show.legend = FALSE) +
  facet_wrap(~ topic, ncol = 3) +
  labs(title = "Theta values per document",
       y = expression(theta), x = "Topic")


thetaplot1
ggsave("vaccine_reject_theta_scores.pdf")


########################################################################################################
# worth considering if any of the topics have few documents that have high theta values. For instance here, topic 5 and topic 7 seem less focussed in terms of theta
# To examine documents that are highly representative of topics the findThoughts function
# This function will print the documents highly associated with each topic
########################################################################################################

thoughts<-findThoughts(model = fit,texts = as.character(meta$vaccination.vaccinated_no), n=100, topics = 1:20,thresh=0.3)$docs

#write.csv(thoughts,"thoughts_reject.csv",quote=T)
write.csv(thoughts$`Topic 1`,"thoughts_reject_A.csv",quote = T)
write.csv(thoughts$`Topic 2`,"thoughts_reject_Bi.csv",quote = T)
write.csv(thoughts$`Topic 3`,"thoughts_reject_C.csv",quote = T)
write.csv(thoughts$`Topic 4`,"thoughts_reject_D.csv",quote = T)
write.csv(thoughts$`Topic 5`,"thoughts_reject_E.csv",quote = T)
write.csv(thoughts$`Topic 6`,"thoughts_reject_F.csv",quote = T)
write.csv(thoughts$`Topic 7`,"thoughts_reject_G.csv",quote = T)
write.csv(thoughts$`Topic 8`,"thoughts_reject_Bii.csv",quote = T)
write.csv(thoughts$`Topic 9`,"thoughts_reject_Hi.csv",quote = T)
write.csv(thoughts$`Topic 10`,"thoughts_reject_I.csv",quote = T)
write.csv(thoughts$`Topic 11`,"thoughts_reject_J.csv",quote = T)
write.csv(thoughts$`Topic 12`,"thoughts_reject_Biii.csv",quote = T)
write.csv(thoughts$`Topic 13`,"thoughts_reject_K.csv",quote = T)
write.csv(thoughts$`Topic 14`,"thoughts_reject_L.csv",quote = T)
write.csv(thoughts$`Topic 15`,"thoughts_reject_Biv.csv",quote = T)
write.csv(thoughts$`Topic 16`,"thoughts_reject_M.csv",quote = T)
write.csv(thoughts$`Topic 17`,"thoughts_reject_N.csv",quote = T)
write.csv(thoughts$`Topic 18`,"thoughts_reject_Hii.csv",quote = T)
write.csv(thoughts$`Topic 19`,"thoughts_reject_O.csv",quote = T)
write.csv(thoughts$`Topic 20`,"thoughts_reject_P.csv",quote = T)


########################################################################################################
# Plot Correlation chart
########################################################################################################

fit.corr<- topicCorr(fit,cutoff = 0.25)
pdf("thoughts_reject_correlations.pdf",useDingbats = F)
plot(fit.corr)
dev.off()
plot(fit.corr)


########################################################################################################
# Make table of topics and add titles (Manual process based on reading quotes)
########################################################################################################

reject.topics<-1:20
reject.topics<-as.data.frame(reject.topics)
names(reject.topics)<-"Assignment"
reject.topics$Topic<-NA
reject.topics$Title<-NA
reject.topics$Topic<-c("A","Bi","C","D","E","F","G","Bii","H","I","J","Biii","K","L","Biv","M","N","Hii","O","P")

reject.topics$n.0.3<-c(25,6,11,18,21,10,19,21,6,18,25,22,13,29,12,24,9,16,12,4)
reject.topics$Title[1]<-"Healthy, so can rely on own immune system"
reject.topics$Title[2]<-"No trust in government, pharma industry and vaccine"
reject.topics$Title[3]<-"Concerns about unknown safety and effectiveness"
reject.topics$Title[4]<-"Lack of need for COVID-19 vaccine given low overall mortality rate"
reject.topics$Title[5]<-"Undecided and/or concerned about side-effects and personal medical history"
reject.topics$Title[6]<-"Risks of relatively untested vaccine Vs benefits of vaccinating generally healthy people"
reject.topics$Title[7]<-"Contextualises COVID-19 vaccines in personal history of flu vaccines and adverse reactions"
reject.topics$Title[8]<-"Mistrust of pharma and government in context of rapid development of vaccines"
reject.topics$Title[9]<-"Concerns about side effects"
reject.topics$Title[10]<-"Previous bad reactions to vaccines and/or allergies to penicillin"
reject.topics$Title[11]<-"Concerned about quality of research during rapid vaccine development"
reject.topics$Title[12]<-"Vaccines have not been tested enough"
reject.topics$Title[13]<-"Pregnant, breastfeeding, ineligible or unwilling to be vaccinated"
reject.topics$Title[14]<-"Potentially willing to be vaccinated, but expressed hesitancy or concerns"
reject.topics$Title[15]<-"Not enough testing, specific concerns of adverse reactions and unforeseen complications"
reject.topics$Title[16]<-"Allergies and reactions to medicines"
reject.topics$Title[17]<-"References to previously failed trials, legal cases and medical scandals"
reject.topics$Title[18]<-"Concerns about long term side effects"
reject.topics$Title[19]<-"Ethical or medical oncerns about derivation, formulation and effects of vaccine"
reject.topics$Title[20]<-"[Fewer than five quotes with theta > 0.3]"

reject.topics<-arrange(reject.topics,Topic)
reject.topics<-flextable(reject.topics)
save_as_html(reject.topics, path = "tab5.html")


########################################################################################################
########################################################################################################
# RUN STM on subset that planned to accept vaccine
########################################################################################################
########################################################################################################


########################################################################################################
# Create corpus
########################################################################################################

{
set.seed(21345)
processed <- textProcessor(df.stm.accept$vaccination.vaccinated_yes, metadata = df.stm.accept,stem = FALSE,customstopwords = c("dont","etc","don't","things","also","can","will","don’t","way","made","just","one","seem","much","avoid","feel","need","else","one","vaccines","yet","want","vaccination","vaccine","vaccinations","vaccinated","covid"))
out <- prepDocuments(processed$documents, processed$vocab, processed$meta)
docs <- out$documents
vocab <- out$vocab
meta <-out$meta
}

########################################################################################################
# Find best number of topics
########################################################################################################

{
set.seed(21345)
storage2 <- searchK(out$documents, out$vocab, K = c(2:10,15,20,25,30,35,40), data = meta,cores = 15)
}


########################################################################################################
# Add a chart of results based on this - [https://juliasilge.com/blog/evaluating-stm/](https://juliasilge.com/blog/evaluating-stm/)
########################################################################################################

plot(storage2)
pdf("trust_level_change_searchK.pdf",useDingbats = F)
plot(storage2)
dev.off()

########################################################################################################
# choose a number of topics that maintains high semantic coherence whilst maximising value of the held-out likelihood and minimising residuals
# run the stm with the optimal number of topics (add more covariates that could affect prior probabilty of topics being discussed by using the prevalence command. Add more using covariates=~ dem.age2 + dem.gender + ...)
########################################################################################################

{
set.seed(21345)
fit2 <- stm(documents = out$documents, vocab = out$vocab,K = 25,max.em.its = 1500, data = out$meta,init.type = "Spectral")
}


########################################################################################################
#ANALYSIS AND VISUALISATION (COMMANDS IN PARENTHESIS)
# Displaying words associated with topics (labelTopics, plot.STM(,type = "labels"), sageLabels, plot.STM(,type = "perspectives")) or documents highly associated with particular topics (findThoughts, plotQuote).
# Show words associated with topics
# After reading the thoughts document you can update this with a list of text like c("T1 : Stuff","T2 : Other Stuff")
########################################################################################################


########################################################################################################
# Make a data table of the metadata topic proportions
########################################################################################################

topicprop2<-make.dt(fit2, meta)

########################################################################################################
# Plot the map estimates of document-topic proportions
########################################################################################################

plot.STM(fit2, "hist")


########################################################################################################
# Plot a summary of the topics, with 5 top words
########################################################################################################

plot.STM(fit2, "summary", n=5)# distribution and top 5 words per topic

########################################################################################################
# Also show the frequencies of the key words between different topics
########################################################################################################
td_beta <- tidytext::tidy(fit2)
options(repr.plot.width=7, repr.plot.height=8, repr.plot.res=100)

td_beta %>%
  group_by(topic) %>%
  top_n(10, beta) %>%
  ungroup() %>%
  mutate(topic = paste0("Topic ", topic),
         term = reorder_within(term, beta, topic)) %>%
  ggplot(aes(term, beta, fill = as.factor(topic))) +
  geom_col(alpha = 0.8, show.legend = FALSE) +
  theme_bw() +
  facet_wrap(~ topic, scales = "free_y") +
  coord_flip() +
  scale_x_reordered() +
  labs(x = NULL, y = expression(beta),
       title = "Highest word probabilities for each topic for those who would accept a COVID-19 vaccine",
       subtitle = "Different words are associated with different topics")
ggsave("vaccine_accept_word_content.pdf")

########################################################################################################
# you can also show detailed wordlist for specific topic
# would be good to go through each topic and identify excess stopwords and add to the custom list above
# at this stage you might want to remove words that are not semantically coherent. Go back and add some custom stopwords to the stm function
# may have to iterate on this process
# The theta object in the stm model output has the posterior probability of a topic given a document that this function uses. i.e. theta is the probability that a response belongs to a specific topic
# if wanted, plot out a selection (here 1:100) of documents and look at how theta patterns = theta is the probability of a document being in that topic
# if you don't have fairly strong theta for one or two topics, maybe reduce number of topics to enhance semantic coherence
########################################################################################################

########################################################################################################
# Plot the theta values for each topic
########################################################################################################

td_theta <- tidytext::tidy(fit2, matrix = "theta")

selectiontdthteta<-td_theta[td_theta$document%in%c(1:4000),]#select the first 30 documents. be careful to select a sensible interval, as attempting to load a very huge corpus might crash the kernel

thetaplot1<-ggplot(selectiontdthteta, aes(y=gamma, x=document, fill = as.factor(topic))) +
  geom_bar(stat="identity",alpha = 0.8, show.legend = FALSE) +
  facet_wrap(~ topic, ncol = 3) +
  labs(title = "Theta values per document",
       y = expression(theta), x = "Topic")


thetaplot1
ggsave("vaccine_accept_theta_scores.pdf")


########################################################################################################
# Worth considering if any of the topics have few documents that have high theta values. For instance here, topic 5 and topic 7 seem less focussed in terms of theta
# To examine documents that are highly representative of topics the findThoughts function
# This function will print the documents highly associated with each topic
########################################################################################################

thoughts<-findThoughts(model = fit2,texts = as.character(meta$vaccination.vaccinated_yes), n=100, topics = 1:25,thresh=0.3)$docs

write.csv(thoughts$`Topic 1`,"thoughts_accept_T01.csv",quote = T)
write.csv(thoughts$`Topic 2`,"thoughts_accept_T02i.csv",quote = T)
write.csv(thoughts$`Topic 3`,"thoughts_accept_T03.csv",quote = T)
write.csv(thoughts$`Topic 4`,"thoughts_accept_T04i.csv",quote = T)
write.csv(thoughts$`Topic 5`,"thoughts_accept_T05i.csv",quote = T)
write.csv(thoughts$`Topic 6`,"thoughts_accept_T06.csv",quote = T)
write.csv(thoughts$`Topic 7`,"thoughts_accept_T07.csv",quote = T)
write.csv(thoughts$`Topic 8`,"thoughts_accept_T05ii.csv",quote = T)
write.csv(thoughts$`Topic 9`,"thoughts_accept_T04ii.csv",quote = T)
write.csv(thoughts$`Topic 10`,"thoughts_accept_T08.csv",quote = T)
write.csv(thoughts$`Topic 11`,"thoughts_accept_T04iii.csv",quote = T)
write.csv(thoughts$`Topic 12`,"thoughts_accept_T04iv.csv",quote = T)
write.csv(thoughts$`Topic 13`,"thoughts_accept_T09.csv",quote = T)
write.csv(thoughts$`Topic 14`,"thoughts_accept_T05iii.csv",quote = T)
write.csv(thoughts$`Topic 15`,"thoughts_accept_T04v.csv",quote = T)
write.csv(thoughts$`Topic 16`,"thoughts_accept_T10.csv",quote = T)
write.csv(thoughts$`Topic 17`,"thoughts_accept_T11.csv",quote = T)
write.csv(thoughts$`Topic 18`,"thoughts_accept_T12.csv",quote = T)
write.csv(thoughts$`Topic 19`,"thoughts_accept_T05iv.csv",quote = T)
write.csv(thoughts$`Topic 20`,"thoughts_accept_T13.csv",quote = T)
write.csv(thoughts$`Topic 21`,"thoughts_accept_T14.csv",quote = T)
write.csv(thoughts$`Topic 22`,"thoughts_accept_T15.csv",quote = T)
write.csv(thoughts$`Topic 23`,"thoughts_accept_T02ii.csv",quote = T)
write.csv(thoughts$`Topic 24`,"thoughts_accept_T16.csv",quote = T)
write.csv(thoughts$`Topic 25`,"thoughts_accept_T05v.csv",quote = T)


########################################################################################################
# Plot the correlation chart
########################################################################################################

fit.corr2<- topicCorr(fit2,cutoff = 0.25)
plot(fit.corr2)
pdf("thoughts_accept_correlations.pdf",useDingbats = F)
plot(fit.corr2)
dev.off()


########################################################################################################
# Make table of topics and add titles (manual process based on reading quotes)
########################################################################################################

accept.topics<-1:25
accept.topics<-as.data.frame(accept.topics)
names(accept.topics)<-"Assignment"
accept.topics$Topic<-NA
accept.topics$Title<-NA
accept.topics$Topic<-c("1","2i","3","4i","5i","6","7","5ii","4ii","8","4iii","4iv","9","5iii","4v","10","11","12","5iv","13","14","15","2ii","16","5v")

accept.topics$n.0.3<-c(37,40,39,55,0,52,13,0,7,58,1,7,27,2,0,20,17,34,8,54,10,13,1,72,1)
accept.topics$Title[1]<-"Recognises value of vaccinations and compares COVID-19 to flu vaccines"
accept.topics$Title[2]<-"Wants to get back to normal life"
accept.topics$Title[3]<-"Aware of risks and concerns of others, but on balance sees benefit of COVID-19 vaccines"
accept.topics$Title[4]<-"Wants to protect loved ones / vulnerable people, and travel to see friends and family"
accept.topics$Title[5]<-"[Fewer than five quotes with theta > 0.3]"
accept.topics$Title[6]<-"Trusts the science, scientists and the scientific process"
accept.topics$Title[7]<-"Has some concerns, but on balance believes acceptance the correct choice"
accept.topics$Title[8]<-"[Fewer than five quotes with theta > 0.3]"
accept.topics$Title[9]<-"Protect self and others"
accept.topics$Title[10]<-"To help build herd immunity"
accept.topics$Title[11]<-"[Fewer than five quotes with theta > 0.3]"
accept.topics$Title[12]<-"Wishes to mix in social, cultural and work contexts"
accept.topics$Title[13]<-"Recognises vaccination as a social responsibility and public good"
accept.topics$Title[14]<-"[Fewer than five quotes with theta > 0.3]"
accept.topics$Title[15]<-"[Fewer than five quotes with theta > 0.3]"
accept.topics$Title[16]<-"Healthcare professionals and/or visit care homes"
accept.topics$Title[17]<-"Responsibilities to the wider community"
accept.topics$Title[18]<-"Works in NHS, Schools, Pharma and with vulnerable people"
accept.topics$Title[19]<-"To prevent contracting and spreading infection"
accept.topics$Title[20]<-"Participant and/or family member in high risk group because of age or medical condition"
accept.topics$Title[21]<-"Vaccination to be safe and make others safe, some with negative sentiments on UK government"
accept.topics$Title[22]<-"Prevention better than cure, referencing success of historical vaccine programmes"
accept.topics$Title[23]<-"[Fewer than five quotes with theta > 0.3]"
accept.topics$Title[24]<-"It is the sensible and responsible thing to do"
accept.topics$Title[25]<-"[Fewer than five quotes with theta > 0.3]"

accept.topics<-arrange(accept.topics,Topic)
accept.topics<-flextable(accept.topics)
save_as_html(accept.topics, path = "tab6.html")
accept.topics
##############################################################################


##############################################################################
# END OF ANALYSIS
##############################################################################