3 Data Construction
Data for this manuscript come from the 2016 and 2018 DC Area Surveys. More information about the 2016 DC Area Survey can be found here and the 2018 DC Area Survey can be found here.
load('../data/dcas/DCAS_2016_weighted.Rdata')
dcas16 <- dcas
load('../data/dcas/DCAreaSurvey2018.Rdata')
dcas18 <- dcas
rm(dcas)3.1 Respondent Selection
Respondents Living in Multiracial Neighborhoods (DCAS 2016). The DCAS 2016 data represent two populations: those living in multiracial neighborhoods and those living in disproportionately Latino neighborhoods. This manuscript only used data from the multiracial neighborhoods. I create a variable quad to represent these variables (‘quad’ comes from my previous description of stable multiracial neighborhoods as ‘quadrivial neighborhoods’).
dcas16$quad <- dcas16$neighborhood=='Global Neighborhood'
table(dcas16[,c('neighborhood','quad')]) ##Check variable## quad
## neighborhood FALSE TRUE
## Global Neighborhood 0 674
## Latino Enclave 548 0
dcas16$sample_tract <- droplevels(dcas16$sample_tract)# dcas16$quad <- dcas16$neighborhood=='Global Neighborhood'
# table(dcas16[,c('neighborhood','quad')])
# N_norace <- sum(is.na(dcas$dem.race) & dcas$quad==TRUE)
# dcas <- dcas[!is.na(dcas$dem.race) & dcas$quad==TRUE,]
# dcas$sample_tract <- droplevels(dcas$sample_tract)
# N_quad <- length(dcas$studycase)
# N <- sum(!is.na(dcas$dem.race))
# texcmds[['N']] <- N3.2 Variable Construction
3.2.1 Dependent Variables
Neighborhood Satisfaction. Create neighborhood satisfaction variable (satisfied) to represent responses of “Extremely Satisfied” or “Very Satisfied”. Check that the variable was created correctly.
dcas16$satisfied <- as.numeric(dcas16$nhd.satisfaction %in% c(
'Extremely satisfied','Very satisfied')*1)
dcas16$nhdsat <- gsub(" satisfied", "", dcas16$nhd.satisfaction) %>%
ordered(levels = c("Not at all", "Somewhat", "Very", "Extremely"))
table(dcas16[, c('nhd.satisfaction','satisfied')]) ##Check variable## satisfied
## nhd.satisfaction 0 1
## Extremely satisfied 0 231
## Very satisfied 0 591
## Somewhat satisfied 362 0
## Not at all satisfied 26 0
table(dcas16[, c('nhd.satisfaction','nhdsat')]) ##Check variable## nhdsat
## nhd.satisfaction Not at all Somewhat Very Extremely
## Extremely satisfied 0 0 0 231
## Very satisfied 0 0 591 0
## Somewhat satisfied 0 362 0 0
## Not at all satisfied 26 0 0 0
dcas18$satisfied <- dcas18$nhdsat %in% c('Extremely', 'Very')*1
table(dcas18[, c('nhdsat', 'satisfied')]) ##Checkvariable## satisfied
## nhdsat 0 1
## Not at all 22 0
## Somewhat 214 0
## Very 0 468
## Extremely 0 351
Neighborhood improvement. Create variable (better) measuring whether respondent perceives that the neighborhood has improved. Include responses “Much better” and “Somewhat better”. Check that variable was created correctly.
betterlabs <- c("Much better", "Somewhat better")
changelabs <- c(
"Much worse" = "Worse",
"Somewhat worse" = "Worse",
"About the same" = "Same",
"Somewhat better" = "Better",
"Much better" = "Better"
)
dcas16 <- dcas16 %>%
mutate(
better = ifelse(
is.na(nhd.change), NA,
dcas16$nhd.change %in% betterlabs),
nhdchg = recode_factor(nhd.change, !!!changelabs)
)
table(dcas16[, c("nhd.change", "better")], useNA="always") ##Check variable## better
## nhd.change FALSE TRUE <NA>
## Much better 0 155 0
## Somewhat better 0 285 0
## About the same 624 0 0
## Somewhat worse 124 0 0
## Much worse 15 0 0
## <NA> 0 0 19
table(dcas16[, c("nhd.change", "nhdchg")], useNA="always") ##Check variable## nhdchg
## nhd.change Worse Same Better <NA>
## Much better 0 0 155 0
## Somewhat better 0 0 285 0
## About the same 0 624 0 0
## Somewhat worse 124 0 0 0
## Much worse 15 0 0 0
## <NA> 0 0 0 19
dcas18 <- dcas18 %>%
mutate(
nhdchgbak = nhdchg,
better = ifelse(
is.na(nhdchgbak), NA,
nhdchgbak %in% betterlabs),
nhdchg = recode_factor(nhdchgbak, !!!changelabs)
)
table(dcas18[, c("nhdchgbak", "better")], useNA="always") ##Check variable## better
## nhdchgbak FALSE TRUE <NA>
## Much worse 16 0 0
## Somewhat worse 84 0 0
## About the same 482 0 0
## Somewhat better 0 260 0
## Much better 0 138 0
## <NA> 0 0 81
table(dcas18[, c("nhdchgbak", "nhdchg")], useNA="always") ##Check variable## nhdchg
## nhdchgbak Worse Same Better <NA>
## Much worse 16 0 0 0
## Somewhat worse 84 0 0 0
## About the same 0 482 0 0
## Somewhat better 0 0 260 0
## Much better 0 0 138 0
## <NA> 0 0 0 81
3.2.2 Independent Variable
Race. Create a race variable measuring four-category, mutually exclusive racial categories.
fourraces <- c('white', 'asian', 'black', 'latino')
dcas16 <- dcas16 %>%
mutate(
raceeth = recode_factor(dem.race, api = "asian"),
raceeth = relevel(raceeth, ref="white"),
raceeth_mi = is.na(raceeth),
anarace = raceeth %in% fourraces
)
table(dcas16[, c('dem.race', 'raceeth')], useNA='always') ##Check variable## raceeth
## dem.race white asian black latino <NA>
## api 0 277 0 0 0
## black 0 0 253 0 0
## latino 0 0 0 215 0
## white 417 0 0 0 0
## <NA> 0 0 0 0 60
table(dcas16[, c('dem.race', 'raceeth_mi')], useNA="always")## raceeth_mi
## dem.race FALSE TRUE <NA>
## api 277 0 0
## black 253 0 0
## latino 215 0 0
## white 417 0 0
## <NA> 0 60 0
table(dcas16[, c('dem.race', 'anarace')], useNA="always")## anarace
## dem.race FALSE TRUE <NA>
## api 0 277 0
## black 0 253 0
## latino 0 215 0
## white 0 417 0
## <NA> 60 0 0
racelabs <- c(
"White" = "white",
"Asian/Pac. Islander" = "asian",
"Black" = "black",
"Latinx" = "latino",
"Native American" = "other",
"Other" = "other"
)
dcas18 <- dcas18 %>%
mutate(
raceethbak = raceeth,
raceeth = recode_factor(raceeth, !!!racelabs),
raceeth = relevel(raceeth, ref="white"),
raceeth_mi = is.na(raceeth),
anarace = raceeth %in% fourraces
)
table(dcas18[, c('raceethbak', 'raceeth')], useNA="always") ##Check variable## raceeth
## raceethbak white asian black latino other <NA>
## Asian/Pac. Islander 0 93 0 0 0 0
## Black 0 0 308 0 0 0
## Latinx 0 0 0 75 0 0
## Native American 0 0 0 0 8 0
## Other 0 0 0 0 21 0
## White 513 0 0 0 0 0
## <NA> 0 0 0 0 0 43
table(dcas18[, c('raceethbak', 'raceeth_mi')], useNA="always")## raceeth_mi
## raceethbak FALSE TRUE <NA>
## Asian/Pac. Islander 93 0 0
## Black 308 0 0
## Latinx 75 0 0
## Native American 8 0 0
## Other 21 0 0
## White 513 0 0
## <NA> 0 43 0
table(dcas18[, c('raceethbak', 'anarace')], useNA="always")## anarace
## raceethbak FALSE TRUE <NA>
## Asian/Pac. Islander 0 93 0
## Black 0 308 0
## Latinx 0 75 0
## Native American 8 0 0
## Other 21 0 0
## White 0 513 0
## <NA> 43 0 0
3.2.3 Control Variables
3.2.3.1 Demographic variables
Create variables for demographic controls.
Age. The age variable (age) was calculated by subtracting birth year from 2016. Represents age on December 31, 2016. Center age at 50.
dcas16$age <- dcas16$dem.age - 50 ## Center at age 50
qplot(dcas16$age, labs=c(title="2016"))## Warning: Ignoring unknown parameters: labs
## Warning: Removed 58 rows containing non-finite values (stat_bin).

dcas18$age <- dcas18$age - 50
qplot(dcas18$age, labs=c(title="2018"))## Warning: Ignoring unknown parameters: labs
## Warning: Removed 49 rows containing non-finite values (stat_bin).

Foreign born. Variable (forborn) measuring whether respondent reported being born outside of the United States (reference=no).
dcas16$forborn <- dcas16$dem.forborn
table(dcas16[, c("dem.forborn", "forborn")], useNA='always') ##Check variable## forborn
## dem.forborn FALSE TRUE <NA>
## FALSE 657 0 0
## TRUE 0 530 0
## <NA> 0 0 35
dcas18 <- dcas18 %>%
mutate(forborn = ifelse(is.na(usborn), NA, usborn=="Another country"))
table(dcas18[, c("usborn", "forborn")], useNA="always") ##Check variable## forborn
## usborn FALSE TRUE <NA>
## United States 793 0 0
## Puerto Rico 2 0 0
## Another country 0 205 0
## <NA> 0 0 61
Male. Variable measures whether the respondent reported being male. The option “other” was offered to respondents. Two respondents in 2016 and five respondents in 2018 reported other and were counted as missing on this variable.
dcas16$man <- dcas16$dem.gender.mf=='Male'
table(dcas16[, c('dem.gender', 'man')], useNA='always')## man
## dem.gender FALSE TRUE <NA>
## Male 0 553 0
## Female 635 0 0
## Other 0 0 2
## <NA> 0 0 32
dcas18 <- dcas18 %>%
mutate(
man = if_else(gender=='Male', TRUE, FALSE)
)
table(dcas18[, c("gender","man")], useNA="always")## man
## gender FALSE TRUE <NA>
## Male 0 456 0
## Female 575 0 0
## In another way 5 0 0
## <NA> 0 0 25
Kids. Variable (kids) measures whether the respondent has any children under the age of 18 living at home. Recode unreasonable values to be missing.
dcas16$kids <- as.numeric(as.character(dcas16$q2)) > 0## Warning: NAs introduced by coercion
dcas16$kids[as.numeric(as.character(dcas16$q2)) > 90] <- NA## Warning in dcas16$kids[as.numeric(as.character(dcas16$q2)) > 90] <- NA: NAs
## introduced by coercion
table(dcas16[, c('q2', 'kids')], useNA='always')## kids
## q2 FALSE TRUE <NA>
## 0 813 0 0
## 1 0 172 0
## 10 0 1 0
## 2 0 137 0
## 3 0 32 0
## 4 0 13 0
## 5 0 4 0
## 7 0 2 0
## 8 0 1 0
## No Answer 0 0 45
## 99 0 0 2
## <NA> 0 0 0
dcas18$kidsbak <- dcas18$kids
dcas18$kids <- ifelse(!is.na(dcas18$kidsbak), dcas18$kidsbak>0, NA)
table(dcas18[, c('kidsbak', 'kids')], useNA="always")## kids
## kidsbak FALSE TRUE <NA>
## 0 783 0 0
## 1 0 119 0
## 2 0 97 0
## 3 0 19 0
## 4 0 8 0
## 5 0 1 0
## 6 0 1 0
## 7 0 1 0
## 10 0 1 0
## <NA> 0 0 31
Married. Variable (married) measures whether respondent is married or in a married-like relationship.
dcas16$married <- grepl("Now married", dcas16$dem.marital.stat)
table(dcas16[,c('dem.marital.stat','married')], useNA='always') ##Check recoding## married
## dem.marital.stat FALSE TRUE <NA>
## Now married or in a married-style arrangement 0 714 0
## Widowed 97 0 0
## Divorced 130 0 0
## Separated 25 0 0
## Never married 235 0 0
## <NA> 21 0 0
dcas18$marriedbak <- dcas18$married
dcas18$married <- dcas18$marriedbak == "Married"
table(dcas18[, c("marriedbak", "married")], useNA="always") ##Check variable## married
## marriedbak FALSE TRUE <NA>
## Married 0 540 0
## Widowed 82 0 0
## Divorced 149 0 0
## Separated 21 0 0
## Never married 239 0 0
## <NA> 0 0 30
3.2.3.2 Socioeconomic Variables
Educational attainment. Create variable (educ) measuring educational attainment. Check variable was created correctly.
dcas16$educ <- factor(dcas16$dem.educ.attain, order=FALSE) %>%
relevel(ref='H.S.')
table(dcas16[, c('dem.educ.attain', 'educ')], useNA='always')## educ
## dem.educ.attain H.S. <H.S. Some college, no B.A. B.A. M.A.+ <NA>
## <H.S. 0 79 0 0 0 0
## H.S. 129 0 0 0 0 0
## Some college, no B.A. 0 0 281 0 0 0
## B.A. 0 0 0 366 0 0
## M.A.+ 0 0 0 0 342 0
## <NA> 0 0 0 0 0 25
educlabs <- c(
"Less than HS" = "<H.S.",
"Did not finish HS" = "<H.S.",
"HS diploma or GED" = "H.S.",
"Some college, no degree" = "Some college, no B.A.",
"Associate\'s degree" = "Some college, no B.A.",
"Bachelor\'s degree" = "B.A.",
"Advanced degree" = "M.A.+"
)
dcas18 <- dcas18 %>%
mutate(
educbak = educ,
educ = recode_factor(educbak, !!!educlabs),
educ = relevel(educ, ref="H.S.")
)
table(dcas18[, c("educbak", "educ")], useNA="always")## educ
## educbak H.S. <H.S. Some college, no B.A. B.A. M.A.+ <NA>
## Less than HS 0 7 0 0 0 0
## Did not finish HS 0 21 0 0 0 0
## HS diploma or GED 103 0 0 0 0 0
## Some college, no degree 0 0 147 0 0 0
## Associate's degree 0 0 55 0 0 0
## Bachelor's degree 0 0 0 285 0 0
## Advanced degree 0 0 0 0 418 0
## <NA> 0 0 0 0 0 25
Income. Create variable (inc) measuring income using four levels.
# dcas16$inc <- dcas16$dem.income.cat4
# table(dcas16$inc, useNA='always')
# inc_dummies <- paste0('inc',1:4)
# dcas[,inc_dummies] <- lapply(1:4,
# function(i) {unclass(dcas$dem.income.cat4)==i})
# for(i in 1:4) { ## Check recoding
# print(table(dcas[,c('dem.income.cat4',paste0('inc',i))]))
# }
# inc_names <- sanitize(levels(dcas$dem.income.cat4))
# inc_names[1] <- paste0("\\emph{Income}&&\\\\", inc_names[1])
# table(dcas18$income, useNA="always")Missingness on Socioeconomic Variables. Record how many respondents were missing educational attaiment and income data.
texcmds[['miinc16']] <- sum(is.na(dcas16$dem.income.cat4))
texcmds[['miedu16']] <- sum(is.na(dcas16$dem.educ.attain))
texcmds[['miinc18']] <- sum(is.na(dcas18$income))
texcmds[['miedu18']] <- sum(is.na(dcas18$educ))| Variable | Missing 2016 | Missing 2018 |
|---|---|---|
| Education | 25 25 | |
| Income | 109 79 |
Housing tenure. Create indicator for whether respondent owns their home.
dcas16 <- dcas16 %>%
mutate(own = hh.own)
table(dcas16[, c("hh.own", "own")], useNA="always")## own
## hh.own FALSE TRUE <NA>
## FALSE 354 0 0
## TRUE 0 859 0
## <NA> 0 0 9
dcas18 <- dcas18 %>%
mutate(own = ifelse(is.na(tenure), NA, tenure=="Own"))
table(dcas18[, c("tenure", "own")], useNA="always")## own
## tenure FALSE TRUE <NA>
## Rent 326 0 0
## Own 0 701 0
## Other 30 0 0
## <NA> 0 0 4
3.2.3.3 Neighborhood Experience Variables
Years in the neighborhood. Variable (nhdyrs) measures how long the respondent reported living in neighborhood.
dcas16$nhdyrs <- as.numeric(as.character(dcas16$q4)) ## Warning: NAs introduced by coercion
qplot(dcas16$nhdyrs, binwidth=5)## Warning: Removed 18 rows containing non-finite values (stat_bin).

dcas18$nhdyrs <- dcas18$yrsnhd
qplot(dcas18$nhdyrs)## Warning: Removed 8 rows containing non-finite values (stat_bin).

Perceived size of neighborhood. Create three-category variable that measures perceived neighborhood size. Check that variable was created correctly.
sizelabs <- c(
`1 to 4 blocks` = "1-9 blocks",
`5 to 9 blocks` = "1-9 blocks",
`10 to 25 blocks` = "10-50 blocks",
`25 to 50 blocks` = "10-50 blocks",
`More than 50 blocks` = ">50 blocks"
)
dcas16 = mutate(dcas16, nhdsize = recode_factor(nhd.size, !!!sizelabs))
table(dcas16[, c('nhd.size', 'nhdsize')], useNA="always") ## Check variable## nhdsize
## nhd.size 1-9 blocks 10-50 blocks >50 blocks <NA>
## 1 to 4 blocks 395 0 0 0
## 5 to 9 blocks 343 0 0 0
## 10 to 25 blocks 0 298 0 0
## 25 to 50 blocks 0 105 0 0
## More than 50 blocks 0 0 58 0
## <NA> 0 0 0 23
names(sizelabs) <- gsub(" to ", "-", names(sizelabs))
dcas18 <- dcas18 %>%
mutate(
nhdsizebak = nhdsize,
nhdsize = recode_factor(nhdsize, !!!sizelabs)
)
table(dcas18[, c('nhdsizebak','nhdsize')], useNA="always") #Check variable## nhdsize
## nhdsizebak 1-9 blocks 10-50 blocks >50 blocks <NA>
## 1-4 blocks 267 0 0 0
## 5-9 blocks 307 0 0 0
## 10-25 blocks 0 254 0 0
## 25-50 blocks 0 109 0 0
## >50 blocks 0 0 46 0
## <NA> 0 0 0 78
3.3 Add Neighborhood Context onto Survey Data
Create variables containing neighborhood racial composition types by Census tract. These will be used to analyze data by different racial compositions in 2015.
dcarea <- dcarea %>%
mutate(
w = race.pnhw >= 10,
b = race.pnhb >= 10,
l = race.phsp >= 10,
a = race.papi >= 10,
q = quad15 == TRUE,
nhdtype = '',
nhdtype = ifelse(w, 'w', nhdtype),
nhdtype = ifelse(b, paste0(nhdtype,'b'), nhdtype),
nhdtype = ifelse(l, paste0(nhdtype,'l'), nhdtype),
nhdtype = ifelse(a, paste0(nhdtype,'a'), nhdtype),
nhdtype = ifelse(race.pnhw > 50, 'w', nhdtype),
nhdtype = ifelse(race.pnhb > 50, 'b', nhdtype),
nhdtype = ifelse(race.phsp > 50, 'l', nhdtype),
nhdtype = ifelse(race.papi > 50, 'a', nhdtype),
nhdtype = ifelse(quad15, 'quad', nhdtype),
wtype = ifelse(nhdtype=='quad', 'quad', ''),
wtype = ifelse(nhdtype=='w', 'white', wtype),
wtype = ifelse(wtype=='' & grepl('w\\w{1}$', nhdtype), 'white-1', wtype),
wtype = ifelse(wtype=='' & grepl('w\\w{2}$', nhdtype), 'white-2', wtype),
wtype = ifelse(wtype=='', 'non-white', wtype),
across(starts_with('race'), ~ifelse(.==0, 0, log(./100)),
.names='ln{.col}'),
H = (-1 * (exp(lnrace.pnhw) * lnrace.pnhw +
exp(lnrace.pnhb) * lnrace.pnhb +
exp(lnrace.phsp) * lnrace.phsp +
exp(lnrace.papi) * lnrace.papi )),
Hc= (H - mean(H, na.rm=TRUE)) / sd(H, na.rm = TRUE)
) %>%
select(-starts_with('lnrace'))Append the LTDB 2000 values of Census characteristics and the 2015 values of ACS characterisitcs to each of the DCAS dataframes. The code below constructs a variable, lntotchg, that equals the natural log of the change in the non-Hispanic white population from 2000 to 2015. This creates a linearly scaled variable that measures change in the white population that accounts for the size of neighborhoods.
The 2018 DCAS requires an additional step of appending a list of block-groups for each respondent and then merging based on tracts of sampled respondents.
## Load LTDB 2000 and merge into DC Area neighborhood tract-level dataset
ltdb <- read_csv('../data/ltdb_std_2000_fullcount.csv',
col_types = cols(TRTID10 = 'c')) %>%
rename_with(tolower)
nhoods <- dcarea %>%
mutate(trtid10 = sub('G(\\d{2})0(\\d{3})0(\\d{6})', '\\1\\2\\3', GISJOIN)) %>%
left_join(select(ltdb, trtid10, nhwht00), by='trtid10') %>%
mutate(lntotchg = log(nhw15/nhwht00))
## Merge combined 2000 Census/2015 ACS data into 2016 DC Area Survey data
dcas16 <- dcas16 %>%
mutate(trtid10 = sample_tract) %>%
left_join(nhoods, by='trtid10')
dcas18 <- dcas18 %>%
full_join(read_csv("../data/dcas/census_tract_R1049.csv", col_types = 'cc'),
by='rid') %>%
rename(trtid10 = tract) %>%
left_join(nhoods, by='trtid10') 3.4 Data Imputation and Weighting
Select variables to keep that will be used in analysis. Define vectors of identification and numeric variables that will be used for imputation.
vars <- c(
# 'satisfied', 'better',
'nhdsat', 'nhdchg'
, 'raceeth'
, 'age'
, 'forborn'
, 'man'
, 'kids'
, 'married'
, 'educ'
, 'own'
, 'nhdyrs'
, 'nhdsize'
# , 'inc'
)
dcas16 <- dcas16 %>%
mutate(
rid = as.character(studycase),
strata = sample_strata
)
dcas16$rid <- as.character(dcas16$studycase)
idvars16 <- c('rid','weight', 'strata', 'sample_tract')
nominals <- vars[!(vars %in% c('age', 'nhdyrs'))]
dcas18$rid <- as.character(dcas18$rid)
idvars18 <- c("rid", "weight", "strata")Re-level the tract identification number used for fixed effects to the reference tract is that which has the median level of satisfaction.
satmu <- dcas16 %>%
select(sample_tract, satisfied) %>%
group_by(sample_tract) %>%
summarize(meansat = mean(satisfied, na.rm=TRUE)) %>%
arrange(meansat)
medsatid <- satmu$sample_tract[round(nrow(satmu)/2)] %>% as.character()
dcas16$sample_tract <- relevel(dcas16$sample_tract, ref=medsatid)
# levels(dcas16$sample_tract) ## Not run: Check referencingPrepare data to be imputed based on different datasets.
set.seed(214518)
idvars <- c('satisfied', 'better', 'anarace')
nhoodvars <- c('nhwht00', 'lntotchg', 'nhdtype', 'wtype', 'H', 'Hc',
'race.pnhw')
nivars <- c(idvars, nhoodvars)Create five imputation datasets of survey-weighted data and assign to object dcas<YR>svy. For the DCAS 2016 data, include only respondents who live in multiracial neighborhoods in the multiple imputation. Create vectors of variable names for different types of variables to be used as parameters for multiple imputation.
dcas16q <- subset(dcas16, neighborhood=="Global Neighborhood") %>%
mutate(sample_tract=factor(sample_tract))
dcas16mi <- amelia(dcas16q[dcas16q$neighborhood=="Global Neighborhood",
unique(c(vars, nominals, nivars, idvars16))],
m=5, noms=nominals, emburn=c(500, 500), p2s=FALSE,
idvars=c(nivars, idvars16))
dcas16svy <- svydesign(id=~rid, strata=~strata, weights=~weight,
data=imputationList(dcas16mi$imputations))
dcas18mi <- amelia(dcas18[, unique(c(vars, nominals, nivars, idvars18))],
m=5, noms=nominals, emburn=c(500,500), p2s=FALSE,
idvars=c(nivars, idvars18))
dcas18svy <- svydesign(id=~rid, strata=~strata, weights=~weight,
data=imputationList(dcas18mi$imputations))3.5 Center Variables
Center all variables on their DC-area-wide values (estimated using the DCAS 2018 data) in order to create comparable regression values across the two data sets, Subtract the mean of the weighted DCAS 2018 variables from the corresponding variables in both the DCAS 2016 (multiracial neighborhoods) and DCAS 2018 (DC-area) data. This sets the intercept across the two data sets at a corresponding value that represents a white resident with DC-area-wide mean values on all other measures.
vars <- c("age", "forborn", "man", "kids", "married", "own", "nhdyrs")
mu18 <- lapply(vars, function(var){
fm <- as.formula(paste("~", var))
mu <- coef(MIcombine(with(dcas18svy, svymean(fm, na.rm=TRUE))))
if(length(mu)==1) return(mu[[1]])
return(mu[[grep("TRUE$", names(mu))]])
})
names(mu18) <- vars
educ <- coef(MIcombine(with(dcas18svy, svymean(~educ, na.rm=TRUE))))
mu18[paste0("educ", 1:5)] <- educ
nhdsize <- coef(MIcombine(with(dcas18svy, svymean(~nhdsize, na.rm=TRUE))))
mu18[paste0("nhdsize", 1:3)] <- nhdsize
svy.center <- function(df) {
df %>% update(
agec = age - mu18[["age"]],
forbornc = as.integer(forborn) - mu18[["forborn"]],
manc = as.integer(man) - mu18[["man"]],
kidsc = as.integer(kids) - mu18[["kids"]],
marriedc = as.integer(married) - mu18[["married"]],
educall = model.matrix(~educ),
educ1c = educall[, 2] - mu18[["educ1"]],
educ3c = educall[, 3] - mu18[["educ3"]],
educ4c = educall[, 4] - mu18[["educ4"]],
educ5c = educall[, 5] - mu18[["educ5"]],
educall = 1,
ownc = as.integer(own) - mu18[["own"]],
nhdyrsc = nhdyrs - mu18[["nhdyrs"]],
nhdsizeall = model.matrix(~nhdsize),
nhdsize2c = nhdsizeall[, 2] - mu18[["nhdsize2"]],
nhdsize3c = nhdsizeall[, 3] - mu18[["nhdsize3"]],
nhdsizeall = 1
)
}
dcas16svy <- svy.center(dcas16svy)
dcas18svy <- svy.center(dcas18svy)3.6 Save Data
Save R object containing the multiply-imputed survey-weighted data (dcassvy), variable names (vars), imputation list upon which survey data were created (dcasmi), a string representing the diretory containing data (dataDIR), and the list of values to export to LaTeX (texcmds).
# save(vars, texcmds, dcarea,
# dcas16svy, dcas16, dcas16mi,
# dcas18svy, dcas18, dcas18mi,
# file = '../data/dcassvy.Rdata')