
use "/Volumes/Untitled/2025_11_25_NAKO_raw_data_orig_COMPLETE.dta", clear

*Recoding missings and transforming and labeling variables
recode d_qol2 9=. 8=.
label variable d_qol2 "Ordinal SRH"
label define OrdinalSRH 1 "Excellent" 2 "Very good" 3 "Good" 4 "Less good" 5 "Poor" 9 "I don't know" 8 "Not specified"
label values d_qol2 OrdinalSRH
recode d_qol2 1/3 = 0 4/5 = 1, generate(srh)
label variable srh "Binary SRH"
label define SRH 0 "Good or better SRH" 1 "Fair or poor SRH"
label values srh SRH
recode d_qol2 1/2=0 3=1 4/5=2, generate(srh3)
label variable srh3 "Three-category SRH"
label define MultiCatSRH 0 "Excellent or very good" 1 "Good" 2 "Fair or poor"
label values srh3 MultiCatSRH

rename basis_age age
label variable age "Age"

summarize age, detail
local age_10th = r(p10)
local age_median = r(p50)
local age_90th = r(p90)

display "10th percentile: `age_10th'"
display "Median (50th percentile): `age_median'"
display "90th percentile: `age_90th'"

replace age = 51 if age == .

mkspline age_spline = age, cubic knots (30 51 66)

recode basis_sex 1=0 2=1, generate(sex)
label variable basis_sex "Sex"
label define Sex 0 "Male" 1 "Female"
label values sex Sex

tabulate sex
replace sex = 1 if sex == .

recode a_ses_famst -9=., generate(marital)
label variable marital "Marital status"
label define MaritalStatusFull 1 "Single" 2 "Married" 3 "Separated" 4 "Divorced" 5 "Widowed"
label values marital MaritalStatusFull

tabulate marital
replace marital = 2 if marital == .

recode a_ses_isced11_level -9=.
recode a_ses_isced11_level 8=0 7=1 6=2 4=3 3=4 2=5 1=6 -88=7 -99=8, generate(education)
label variable education "Level of education"
label define Education2 0 "Doctorate" 1 "Master's or equivalent" 2 "Bachelor's or equivalent" 3 "Post-secondary non-tertiary level" 4 "Upper secondary" 5 "Lower secondary" 6 "Primary" 7 "Still in general education" 8 "Still in vocational training"
label values education Education2

tabulate education
replace education = 4 if education == .

recode a_ses_mig_status -9=., generate(migrant)
label variable a_ses_mig_status "Migration background"
label define Migration 0 "No migration background" 1 "Migration background"
label values migrant Migration

tabulate migrant
replace migrant = 0 if migrant == .

recode g_lden_traf_eio_g_17 0=0 40=0 55=1 60=1 65=1 70=1 75=1, generate(bi_noise)
label variable g_lden_traf_eio_g_17 "Ambient traffic noise 2017 multi-category"
label define NoiseLevel 40 "Less than 55 Lden" 55 "Between 55 and 60 Lden" 60 "Between 60 and 65 Lden" 65 "Between 65 and 70 Lden" 70 "Between 70 and 75 Lden" 75 "75 Lden or higher"
label values g_lden_traf_eio_g_17 NoiseLevel
label variable bi_noise "Ambient traffic noise 2017 - binary"
label define ObjNoise 0 "Low noise" 1 "High noise"
label values bi_noise ObjNoise
recode g_lden_traf_eio_g_17 0=40 40=40 55=55 60=60 65=65 70=70 75=75, generate(alt_noise)
label values alt_noise NoiseLevel

recode d_umw2_j 7777=. 8888=. 9999=.
label variable d_umw2_j "Years living in home"

recode d_umw5 8=. 9=.
label variable d_umw5 "Nighttime traffic noise annoyance - ordinal"
recode d_umw5 1=0 2/3=1 4/5=2, generate(annoy)
label variable annoy "Nighttime traffic noise annoyance - 3 groups"
label define NoiseAnnoy 0 "Not at all annoyed" 1 "Slightly/somewhat annoyed" 2 "Very/extremely annoyed"
label values annoy NoiseAnnoy
recode d_umw5 1=0 2/5=1, generate(bi_annoy)
label variable bi_annoy "Nighttime traffic noise annoyance - 2 groups"
label define BiNoiseAnnoy 0 "Not at all annoyed" 1 "Any annoyance"
label values bi_annoy BiNoiseAnnoy
recode d_umw5 1/2=0 3/4=1 5=2, generate(alt_annoy)
label define AltAnnoy 0 "Not at all or slightly annoyed" 1 "Moderately or strongly annoyed" 2 "Extremely annoyed"
label values alt_annoy AltAnnoy

rename g_dgu_eu_m_u1 urban
label variable urban "Urbanization"
label define Urban 1 "Urban" 2 "Suburban" 3 "Rural"
label values urban Urban

tabulate urban
replace urban = 1 if urban == .

rename g_gisd_rki_m_17 gisd
label variable gisd "German Index of Social Deprivation"

summarize gisd, detail
replace gisd = .62295 if gisd == .

gen site = basis_uort
label variable site "Study site"
* merge 3 sub-sites into Neubrandenburg
recode site 811=81 812=81 813=81
* merge 3 Berlin sites together
recode site 52=51 53=51
label define Studybasis_uort 11 "Augsburg" 12 "Regensburg" 21 "Mannheim" 22 "Freiburg" 23 "Saarbruecken" 31 "Essen" 32 "Muenster" 33 "Duesseldorf" 41 "Halle" 42 "Leipzig" 51 "Berlin" 61 "Hannover" 62 "Hamburg" 63 "Bremen" 71 "Kiel" 81 "Neubrandenburg"
label values site Studybasis_uort

tabulate site
replace site = 51 if site == .

gen year = substr(basis_udat, 3, 2)
gen year_num = real(year)

gen pm25 = .
gen no2 = .
gen ndvi = .

destring g_no2_exp_g_14-g_no2_exp_g_19, replace force
destring g_pm25_exp_g_14-g_pm25_exp_g_19, replace force

replace no2 = g_no2_exp_g_14 if year_num == 14
replace pm25 = g_pm25_exp_g_14 if year_num == 14

replace no2 = g_no2_exp_g_15 if year_num == 15
replace pm25 = g_pm25_exp_g_15 if year_num == 15

replace no2 = g_no2_exp_g_16 if year_num == 16
replace pm25 = g_pm25_exp_g_16 if year_num == 16

replace no2 = g_no2_exp_g_17 if year_num == 17
replace pm25 = g_pm25_exp_g_17 if year_num == 17

replace no2 = g_no2_exp_g_18 if year_num == 18
replace pm25 = g_pm25_exp_g_18 if year_num == 18

replace no2 = g_no2_exp_g_19 if year_num == 19
replace pm25 = g_pm25_exp_g_19 if year_num == 19

summarize pm25, detail
replace pm25 = 12.44 if pm25 == .
summarize no2, detail
replace no2 = 24.69 if no2 == .

replace ndvi = g_ndvis_mod_g_14 if year_num == 14
replace ndvi = g_ndvis_mod_g_15 if year_num == 15
replace ndvi = g_ndvis_mod_g_16 if year_num == 16
replace ndvi = g_ndvis_mod_g_17 if year_num == 17
replace ndvi = g_ndvis_mod_g_18 if year_num == 18
replace ndvi = g_ndvis_mod_g_19 if year_num == 19

summarize ndvi, detail
replace ndvi = .55 if ndvi == .

*Dropped participants descriptives
tabulate srh if bi_noise == . | annoy == .
summarize age if bi_noise == . | annoy == . | srh == ., detail
tabulate sex if bi_noise == . | annoy == . | srh == .
tabulate marital if bi_noise == . | annoy == . | srh == .
tabulate education if bi_noise == . | annoy == . | srh == .
tabulate migrant if bi_noise == . | annoy == . | srh == .
tabulate annoy if bi_noise == . | srh == .
tabulate bi_noise if annoy == . | srh == .
summarize pm25 if bi_noise == . | annoy == . | srh == ., detail
summarize no2 if bi_noise == . | annoy == . | srh == ., detail
summarize ndvi if bi_noise == . | annoy == . | srh == ., detail


*Drop participants with missing outcome or exposure
drop if srh == .
drop if bi_noise == .
drop if annoy == .

*Descriptive analysis
tabulate srh
tabulate d_qol2
tabulate d_qol2 if srh == 0
tabulate d_qol2 if srh == 1
summarize age, detail
by srh, sort: summarize age, detail
tabulate sex
by srh, sort: tabulate sex
tabulate marital
by srh, sort: tabulate marital
tabulate education
by srh, sort: tabulate education
tabulate migrant
by srh, sort: tabulate migrant
tabulate annoy
by srh, sort: tabulate annoy
tabulate bi_noise
by srh, sort: tabulate bi_noise
tabulate annoy bi_noise, col chi2 V
tabulate site
by srh, sort: tabulate site
summarize pm25, detail
tabstat pm25, statistics(p25 p75 iqr)
*PM2.5 IQR = 2.09
by srh, sort: summarize pm25, detail
summarize no2, detail
tabstat no2, statistics(p25 p75 iqr)
*NO2 IQR = 12.17
by srh, sort: summarize no2, detail
summarize ndvi, detail
tabstat ndvi, statistics(p25 p75 iqr)
*NDVI IQR = 0.14
by srh, sort: summarize ndvi, detail

tabulate g_lden_traf_eio_g_17
tabulate  g_lden_traf_eio_g_17 annoy, row
tabulate bi_noise
tabulate  bi_noise annoy, row

gen pm25_iqr = pm25 / 2.09
gen no2_iqr = no2 / 12.17
gen ndvi_iqr = ndvi / 0.14

gen ndvi_iqr_reversed = -1 * ndvi_iqr


*Correlations - chi squared and cramer's v for all categorical variables
preserve
set more off
tempname results
postfile `results' str20(var1) str20(var2) chi2 p cramer_v using chi2_cramersV_results.dta, replace

local catvars srh sex marital education migrant annoy bi_noise g_lden_traf_eio_g_17 urban site

foreach var1 of local catvars {
    foreach var2 of local catvars {
        if "`var1'" != "`var2'" {  
            capture tab `var1' `var2', chi2  
            if _rc == 0 {  
                scalar chi2 = r(chi2)  
                scalar n = r(N)  
                local r = r(r)  
                local c = r(c)  
                
                scalar min_dim = min(`=`r'-1', `=`c'-1')  
                scalar v = sqrt(chi2 / (n * min_dim))  

                post `results' ("`var1'") ("`var2'") (chi2) (r(p)) (v)  
            }
        }
    }
}

postclose `results'
use chi2_cramersV_results.dta, clear
list
restore

*Correlations - continuous age and continuous or binary variables - Spearman's correlation
spearman age srh
spearman age bi_noise
spearman age sex
spearman age migrant
spearman age gisd
spearman age pm25
spearman age no2
spearman age ndvi
spearman gisd srh
spearman gisd bi_noise
spearman gisd sex
spearman gisd migrant
spearman gisd pm25
spearman gisd no2
spearman gisd ndvi
spearman pm25 srh
spearman pm25 bi_noise
spearman pm25 sex
spearman pm25 migrant
spearman pm25 no2
spearman pm25 ndvi
spearman no2 srh
spearman no2 bi_noise
spearman no2 sex
spearman no2 migrant
spearman no2 ndvi
spearman ndvi srh
spearman ndvi bi_noise
spearman ndvi sex
spearman ndvi migrant

*ANOVA - continuous and multi-category variables
anova age marital
estat esize

anova age education
estat esize

anova age urban
estat esize

anova age site
estat esize

anova age annoy
estat esize

anova gisd marital
estat esize

anova gisd education
estat esize

anova gisd urban
estat esize

anova gisd site
estat esize

anova gisd annoy
estat esize

anova pm25 marital
estat esize

anova pm25 education
estat esize

anova pm25 urban
estat esize

anova pm25 site
estat esize

anova pm25 annoy
estat esize

anova no2 marital
estat esize

anova no2 education
estat esize

anova no2 urban
estat esize

anova no2 site
estat esize

anova no2 annoy
estat esize

anova ndvi marital
estat esize

anova ndvi education
estat esize

anova ndvi urban
estat esize

anova ndvi site
estat esize

anova ndvi annoy
estat esize


*Relationship between age and SRH
* Estimate logistic regression with cubic spline on age
logit srh age_spline*

* Store coefficients
matrix b = e(b)

preserve
    clear
    set obs 56
    gen age = 18 + _n  // age 19 to 74

    * Generate spline basis variables matching original model
    mkspline age_spline = age, cubic knots(18 30 51 66 75)

    * Calculate linear predictor: constant plus spline terms
    gen xb = b[1,1]*age_spline1 + ///
             b[1,2]*age_spline2 + ///
             b[1,3]              // intercept _cons

    * Calculate predicted probability using inverse logit
    gen prob = invlogit(xb)

    * Plot predicted probability against age
    twoway (line prob age, lcolor(red)), ///
        ytitle("Probability of SRH = 1 (Fair or Poor Health)") ///
        xtitle("Age") ///
        title("Predicted Probability of Poor/Fair SRH by Age") ///
        legend(off)

restore

**----------- assessment of multi-collinearity -------------**
* full main model
regress srh i.bi_noise i.annoy age_spline* i.sex i.marital i.education i.migrant ///
        i.site pm25_iqr no2_iqr ndvi_iqr_reversed
estat vif

*-----------------------------*
*Main Analysis
*Logistic regression - minimum models - noise only
logistic srh i.bi_noise age_spline* i.sex
logistic srh i.annoy age_spline* i.sex
logistic srh i.bi_noise i.annoy age_spline* i.sex

*Logistic regression - main models - noise only
logistic srh i.bi_noise age_spline* i.sex i.marital i.education i.migrant i.site
logistic srh i.annoy age_spline* i.sex i.marital i.education i.migrant i.site
logistic srh i.bi_noise i.annoy age_spline* i.sex i.marital i.education i.migrant i.site 

*Logistic regression - main models with co-exposures
logistic srh i.bi_noise age_spline* i.sex i.marital i.education i.migrant i.site pm25_iqr no2_iqr ndvi_iqr_reversed
logistic srh i.annoy age_spline* i.sex i.marital i.education i.migrant i.site pm25_iqr no2_iqr ndvi_iqr_reversed
logistic srh i.bi_noise i.annoy age_spline* i.sex i.marital i.education i.migrant i.site pm25_iqr no2_iqr ndvi_iqr_reversed

*Logistic regression - main models stratified by site
by site, sort: logistic srh i.bi_noise age_spline* i.sex i.marital i.education i.migrant pm25_iqr no2_iqr ndvi_iqr_reversed
by site, sort: logistic srh i.annoy age_spline* i.sex i.marital i.education i.migrant pm25_iqr no2_iqr ndvi_iqr_reversed

*------------------------------*
*Main analysis stratified by sex: Logistic regression - main models
by sex, sort: logistic srh i.bi_noise age_spline* i.marital i.education i.migrant i.site pm25_iqr no2_iqr ndvi_iqr_reversed
by sex, sort: logistic srh i.annoy age_spline* i.marital i.education i.migrant i.site pm25_iqr no2_iqr ndvi_iqr_reversed
by sex, sort: logistic srh i.bi_noise i.annoy age_spline* i.marital i.education i.migrant i.site pm25_iqr no2_iqr ndvi_iqr_reversed

*Multiplicative interactions between exposures and sex
logistic srh i.sex##i.bi_noise age_spline* i.marital i.education i.migrant i.site pm25_iqr no2_iqr ndvi_iqr_reversed 
logistic srh i.annoy##i.sex age_spline* i.marital i.education i.migrant i.site pm25_iqr no2_iqr ndvi_iqr_reversed
logistic srh i.sex##i.bi_noise i.sex##i.annoy age_spline* i.marital i.education i.migrant i.site pm25_iqr no2_iqr ndvi_iqr_reversed
	

*========================================================*
*Sensitivity analyses
**At least five years in house
logistic srh i.bi_noise age_spline* i.sex i.marital i.education i.migrant i.site pm25_iqr no2_iqr ndvi_iqr_reversed if d_umw2_j > 4
logistic srh i.annoy age_spline* i.sex i.marital i.education i.migrant i.site pm25_iqr no2_iqr ndvi_iqr_reversed if d_umw2_j > 4

**Multi-category objective noise variable
logistic srh i.alt_noise age_spline* i.sex i.marital i.education i.migrant i.site pm25_iqr no2_iqr ndvi_iqr_reversed

**Alternative annoyance variable
logistic srh i.alt_annoy age_spline* i.sex i.marital i.education i.migrant i.site pm25_iqr no2_iqr ndvi_iqr_reversed

**Alternative SRH variable
mlogit srh3 i.bi_noise age_spline* i.sex i.marital i.education i.migrant i.site pm25_iqr no2_iqr ndvi_iqr_reversed, rrr base(0)
mlogit srh3 i.annoy age_spline* i.sex i.marital i.education i.migrant i.site pm25_iqr no2_iqr ndvi_iqr_reversed, rrr base(0)

**Requested by reviewers:
*Drop participants without noise measurement instead of re-categorizing them to 40
recode g_lden_traf_eio_g_17 0=. 40=0 55=1 60=1 65=1 70=1 75=1, generate(sensi_noise)
label values sensi_noise ObjNoise

tabulate sensi_noise

logistic srh i.sensi_noise age_spline* i.sex i.marital i.education i.migrant i.site pm25_iqr no2_iqr ndvi_iqr_reversed
logistic srh i.sensi_noise i.annoy age_spline* i.sex i.marital i.education i.migrant i.site pm25_iqr no2_iqr ndvi_iqr_reversed 

*Adding year
logistic srh i.bi_noise age_spline* i.sex i.marital i.education i.migrant i.site i.year_num pm25_iqr no2_iqr ndvi_iqr_reversed
logistic srh i.annoy age_spline* i.sex i.marital i.education i.migrant i.site i.year_num pm25_iqr no2_iqr ndvi_iqr_reversed
logistic srh i.bi_noise i.annoy age_spline* i.sex i.marital i.education i.migrant i.site i.year_num pm25_iqr no2_iqr ndvi_iqr_reversed

*RERI analysis
reri logistic srh bi_noise bi_annoy age_spline* i.sex i.marital i.education i.migrant i.site pm25_iqr no2_iqr ndvi_iqr_reversed
