3 Data Construction

Data for this manuscript come from the 2016 and 2018 DC Area Surveys. More information about the 2016 DC Area Survey can be found here and the 2018 DC Area Survey can be found here.

load('../data/dcas/DCAS_2016_weighted.Rdata')
dcas16 <- dcas

load('../data/dcas/DCAreaSurvey2018.Rdata')
dcas18 <- dcas
rm(dcas)

3.1 Respondent Selection

Respondents Living in Multiracial Neighborhoods (DCAS 2016). The DCAS 2016 data represent two populations: those living in multiracial neighborhoods and those living in disproportionately Latino neighborhoods. This manuscript only used data from the multiracial neighborhoods. I create a variable quad to represent these variables (‘quad’ comes from my previous description of stable multiracial neighborhoods as ‘quadrivial neighborhoods’).

dcas16$quad <- dcas16$neighborhood=='Global Neighborhood'
table(dcas16[,c('neighborhood','quad')]) ##Check variable

##                      quad
## neighborhood          FALSE TRUE
##   Global Neighborhood     0  674
##   Latino Enclave        548    0

dcas16$sample_tract <- droplevels(dcas16$sample_tract)

# dcas16$quad <- dcas16$neighborhood=='Global Neighborhood'
# table(dcas16[,c('neighborhood','quad')])
# N_norace <- sum(is.na(dcas$dem.race) & dcas$quad==TRUE)
# dcas <- dcas[!is.na(dcas$dem.race) & dcas$quad==TRUE,]
# dcas$sample_tract <- droplevels(dcas$sample_tract)
# N_quad <- length(dcas$studycase)
# N <- sum(!is.na(dcas$dem.race))
# texcmds[['N']] <- N

3.2 Variable Construction

3.2.1 Dependent Variables

Neighborhood Satisfaction. Create neighborhood satisfaction variable (satisfied) to represent responses of “Extremely Satisfied” or “Very Satisfied”. Check that the variable was created correctly.

dcas16$satisfied <- as.numeric(dcas16$nhd.satisfaction %in% c(
                       'Extremely satisfied','Very satisfied')*1)
dcas16$nhdsat <- gsub(" satisfied", "", dcas16$nhd.satisfaction) %>%
    ordered(levels = c("Not at all", "Somewhat", "Very", "Extremely"))
table(dcas16[, c('nhd.satisfaction','satisfied')]) ##Check variable

##                       satisfied
## nhd.satisfaction         0   1
##   Extremely satisfied    0 231
##   Very satisfied         0 591
##   Somewhat satisfied   362   0
##   Not at all satisfied  26   0

table(dcas16[, c('nhd.satisfaction','nhdsat')]) ##Check variable

##                       nhdsat
## nhd.satisfaction       Not at all Somewhat Very Extremely
##   Extremely satisfied           0        0    0       231
##   Very satisfied                0        0  591         0
##   Somewhat satisfied            0      362    0         0
##   Not at all satisfied         26        0    0         0

dcas18$satisfied <- dcas18$nhdsat %in% c('Extremely', 'Very')*1
table(dcas18[, c('nhdsat', 'satisfied')]) ##Checkvariable

##             satisfied
## nhdsat         0   1
##   Not at all  22   0
##   Somewhat   214   0
##   Very         0 468
##   Extremely    0 351

Neighborhood improvement. Create variable (better) measuring whether respondent perceives that the neighborhood has improved. Include responses “Much better” and “Somewhat better”. Check that variable was created correctly.

betterlabs <- c("Much better", "Somewhat better")
changelabs <- c(
    "Much worse" = "Worse",
    "Somewhat worse" = "Worse",
    "About the same" = "Same",
    "Somewhat better" = "Better",
    "Much better" = "Better"
)
dcas16 <- dcas16 %>%
    mutate(
        better = ifelse(
            is.na(nhd.change), NA, 
            dcas16$nhd.change %in% betterlabs),
        nhdchg = recode_factor(nhd.change, !!!changelabs)
    )
table(dcas16[, c("nhd.change", "better")], useNA="always") ##Check variable

##                  better
## nhd.change        FALSE TRUE <NA>
##   Much better         0  155    0
##   Somewhat better     0  285    0
##   About the same    624    0    0
##   Somewhat worse    124    0    0
##   Much worse         15    0    0
##   <NA>                0    0   19

table(dcas16[, c("nhd.change", "nhdchg")], useNA="always") ##Check variable

##                  nhdchg
## nhd.change        Worse Same Better <NA>
##   Much better         0    0    155    0
##   Somewhat better     0    0    285    0
##   About the same      0  624      0    0
##   Somewhat worse    124    0      0    0
##   Much worse         15    0      0    0
##   <NA>                0    0      0   19

dcas18 <- dcas18 %>%
    mutate(
        nhdchgbak = nhdchg,
        better = ifelse(
            is.na(nhdchgbak), NA, 
            nhdchgbak %in% betterlabs),
        nhdchg = recode_factor(nhdchgbak, !!!changelabs)
    )
table(dcas18[, c("nhdchgbak", "better")], useNA="always") ##Check variable

##                  better
## nhdchgbak         FALSE TRUE <NA>
##   Much worse         16    0    0
##   Somewhat worse     84    0    0
##   About the same    482    0    0
##   Somewhat better     0  260    0
##   Much better         0  138    0
##   <NA>                0    0   81

table(dcas18[, c("nhdchgbak", "nhdchg")], useNA="always") ##Check variable

##                  nhdchg
## nhdchgbak         Worse Same Better <NA>
##   Much worse         16    0      0    0
##   Somewhat worse     84    0      0    0
##   About the same      0  482      0    0
##   Somewhat better     0    0    260    0
##   Much better         0    0    138    0
##   <NA>                0    0      0   81

3.2.2 Independent Variable

Race. Create a race variable measuring four-category, mutually exclusive racial categories.

fourraces <- c('white', 'asian', 'black', 'latino')

dcas16 <- dcas16 %>%
    mutate(
        raceeth = recode_factor(dem.race, api = "asian"),
        raceeth = relevel(raceeth, ref="white"),
        raceeth_mi = is.na(raceeth),
        anarace = raceeth %in% fourraces
    )
table(dcas16[, c('dem.race', 'raceeth')], useNA='always') ##Check variable

##         raceeth
## dem.race white asian black latino <NA>
##   api        0   277     0      0    0
##   black      0     0   253      0    0
##   latino     0     0     0    215    0
##   white    417     0     0      0    0
##   <NA>       0     0     0      0   60

table(dcas16[, c('dem.race', 'raceeth_mi')], useNA="always")

##         raceeth_mi
## dem.race FALSE TRUE <NA>
##   api      277    0    0
##   black    253    0    0
##   latino   215    0    0
##   white    417    0    0
##   <NA>       0   60    0

table(dcas16[, c('dem.race', 'anarace')], useNA="always")

##         anarace
## dem.race FALSE TRUE <NA>
##   api        0  277    0
##   black      0  253    0
##   latino     0  215    0
##   white      0  417    0
##   <NA>      60    0    0

racelabs <- c(
    "White" = "white",
    "Asian/Pac. Islander" = "asian",
    "Black" = "black",
    "Latinx" = "latino",
    "Native American" = "other",
    "Other" = "other"
)
dcas18 <- dcas18 %>%
    mutate(
        raceethbak = raceeth,
        raceeth = recode_factor(raceeth, !!!racelabs),
        raceeth = relevel(raceeth, ref="white"),
        raceeth_mi = is.na(raceeth),
        anarace = raceeth %in% fourraces
    )
table(dcas18[, c('raceethbak', 'raceeth')], useNA="always") ##Check variable

##                      raceeth
## raceethbak            white asian black latino other <NA>
##   Asian/Pac. Islander     0    93     0      0     0    0
##   Black                   0     0   308      0     0    0
##   Latinx                  0     0     0     75     0    0
##   Native American         0     0     0      0     8    0
##   Other                   0     0     0      0    21    0
##   White                 513     0     0      0     0    0
##   <NA>                    0     0     0      0     0   43

table(dcas18[, c('raceethbak', 'raceeth_mi')], useNA="always")

##                      raceeth_mi
## raceethbak            FALSE TRUE <NA>
##   Asian/Pac. Islander    93    0    0
##   Black                 308    0    0
##   Latinx                 75    0    0
##   Native American         8    0    0
##   Other                  21    0    0
##   White                 513    0    0
##   <NA>                    0   43    0

table(dcas18[, c('raceethbak', 'anarace')], useNA="always")

##                      anarace
## raceethbak            FALSE TRUE <NA>
##   Asian/Pac. Islander     0   93    0
##   Black                   0  308    0
##   Latinx                  0   75    0
##   Native American         8    0    0
##   Other                  21    0    0
##   White                   0  513    0
##   <NA>                   43    0    0

3.2.3 Control Variables

3.2.3.1 Demographic variables

Create variables for demographic controls.

Age. The age variable (age) was calculated by subtracting birth year from 2016. Represents age on December 31, 2016. Center age at 50.

dcas16$age <- dcas16$dem.age - 50 ## Center at age 50
qplot(dcas16$age, labs=c(title="2016"))

## Warning: Ignoring unknown parameters: labs

## Warning: Removed 58 rows containing non-finite values (stat_bin).

dcas18$age <- dcas18$age - 50
qplot(dcas18$age, labs=c(title="2018"))

## Warning: Ignoring unknown parameters: labs

## Warning: Removed 49 rows containing non-finite values (stat_bin).

Foreign born. Variable (forborn) measuring whether respondent reported being born outside of the United States (reference=no).

dcas16$forborn <- dcas16$dem.forborn
table(dcas16[, c("dem.forborn", "forborn")], useNA='always') ##Check variable

##            forborn
## dem.forborn FALSE TRUE <NA>
##       FALSE   657    0    0
##       TRUE      0  530    0
##       <NA>      0    0   35

dcas18 <- dcas18 %>%
    mutate(forborn = ifelse(is.na(usborn), NA, usborn=="Another country"))
table(dcas18[, c("usborn", "forborn")], useNA="always") ##Check variable

##                  forborn
## usborn            FALSE TRUE <NA>
##   United States     793    0    0
##   Puerto Rico         2    0    0
##   Another country     0  205    0
##   <NA>                0    0   61

Male. Variable measures whether the respondent reported being male. The option “other” was offered to respondents. Two respondents in 2016 and five respondents in 2018 reported other and were counted as missing on this variable.

dcas16$man <- dcas16$dem.gender.mf=='Male'
table(dcas16[, c('dem.gender', 'man')], useNA='always')

##           man
## dem.gender FALSE TRUE <NA>
##     Male       0  553    0
##     Female   635    0    0
##     Other      0    0    2
##     <NA>       0    0   32

dcas18 <- dcas18 %>%
    mutate(
        man = if_else(gender=='Male', TRUE, FALSE)
        )
table(dcas18[, c("gender","man")], useNA="always")

##                 man
## gender           FALSE TRUE <NA>
##   Male               0  456    0
##   Female           575    0    0
##   In another way     5    0    0
##   <NA>               0    0   25

Kids. Variable (kids) measures whether the respondent has any children under the age of 18 living at home. Recode unreasonable values to be missing.

dcas16$kids <- as.numeric(as.character(dcas16$q2)) > 0

## Warning: NAs introduced by coercion

dcas16$kids[as.numeric(as.character(dcas16$q2)) > 90] <- NA

## Warning in dcas16$kids[as.numeric(as.character(dcas16$q2)) > 90] <- NA: NAs
## introduced by coercion

table(dcas16[, c('q2', 'kids')], useNA='always')

##            kids
## q2          FALSE TRUE <NA>
##   0           813    0    0
##   1             0  172    0
##   10            0    1    0
##   2             0  137    0
##   3             0   32    0
##   4             0   13    0
##   5             0    4    0
##   7             0    2    0
##   8             0    1    0
##   No Answer     0    0   45
##   99            0    0    2
##   <NA>          0    0    0

dcas18$kidsbak <- dcas18$kids
dcas18$kids <- ifelse(!is.na(dcas18$kidsbak), dcas18$kidsbak>0, NA)
table(dcas18[, c('kidsbak', 'kids')], useNA="always")

##        kids
## kidsbak FALSE TRUE <NA>
##    0      783    0    0
##    1        0  119    0
##    2        0   97    0
##    3        0   19    0
##    4        0    8    0
##    5        0    1    0
##    6        0    1    0
##    7        0    1    0
##    10       0    1    0
##    <NA>     0    0   31

Married. Variable (married) measures whether respondent is married or in a married-like relationship.

dcas16$married <- grepl("Now married", dcas16$dem.marital.stat)
table(dcas16[,c('dem.marital.stat','married')], useNA='always') ##Check recoding

##                                                married
## dem.marital.stat                                FALSE TRUE <NA>
##   Now married or in a married-style arrangement     0  714    0
##   Widowed                                          97    0    0
##   Divorced                                        130    0    0
##   Separated                                        25    0    0
##   Never married                                   235    0    0
##   <NA>                                             21    0    0

dcas18$marriedbak <- dcas18$married
dcas18$married <- dcas18$marriedbak == "Married"
table(dcas18[, c("marriedbak", "married")], useNA="always") ##Check variable

##                married
## marriedbak      FALSE TRUE <NA>
##   Married           0  540    0
##   Widowed          82    0    0
##   Divorced        149    0    0
##   Separated        21    0    0
##   Never married   239    0    0
##   <NA>              0    0   30

3.2.3.2 Socioeconomic Variables

Educational attainment. Create variable (educ) measuring educational attainment. Check variable was created correctly.

dcas16$educ <- factor(dcas16$dem.educ.attain, order=FALSE) %>% 
                relevel(ref='H.S.')
table(dcas16[, c('dem.educ.attain', 'educ')], useNA='always')

##                        educ
## dem.educ.attain         H.S. <H.S. Some college, no B.A. B.A. M.A.+ <NA>
##   <H.S.                    0    79                     0    0     0    0
##   H.S.                   129     0                     0    0     0    0
##   Some college, no B.A.    0     0                   281    0     0    0
##   B.A.                     0     0                     0  366     0    0
##   M.A.+                    0     0                     0    0   342    0
##   <NA>                     0     0                     0    0     0   25

educlabs <- c(
    "Less than HS" = "<H.S.",
    "Did not finish HS" = "<H.S.",
    "HS diploma or GED" = "H.S.",
    "Some college, no degree" = "Some college, no B.A.",
    "Associate\'s degree" = "Some college, no B.A.",
    "Bachelor\'s degree" = "B.A.",
    "Advanced degree" = "M.A.+"
)

dcas18 <- dcas18 %>%
    mutate(
        educbak = educ,
        educ = recode_factor(educbak, !!!educlabs),
        educ = relevel(educ, ref="H.S.")
    )
table(dcas18[, c("educbak", "educ")], useNA="always")

##                          educ
## educbak                   H.S. <H.S. Some college, no B.A. B.A. M.A.+ <NA>
##   Less than HS               0     7                     0    0     0    0
##   Did not finish HS          0    21                     0    0     0    0
##   HS diploma or GED        103     0                     0    0     0    0
##   Some college, no degree    0     0                   147    0     0    0
##   Associate's degree         0     0                    55    0     0    0
##   Bachelor's degree          0     0                     0  285     0    0
##   Advanced degree            0     0                     0    0   418    0
##   <NA>                       0     0                     0    0     0   25

Income. Create variable (inc) measuring income using four levels.

# dcas16$inc <- dcas16$dem.income.cat4
# table(dcas16$inc, useNA='always')
# inc_dummies <- paste0('inc',1:4)
# dcas[,inc_dummies] <- lapply(1:4,
#         function(i) {unclass(dcas$dem.income.cat4)==i})
# for(i in 1:4) { ## Check recoding
#     print(table(dcas[,c('dem.income.cat4',paste0('inc',i))]))
# }
# inc_names <- sanitize(levels(dcas$dem.income.cat4))
# inc_names[1] <- paste0("\\emph{Income}&&\\\\", inc_names[1])

# table(dcas18$income, useNA="always")

Missingness on Socioeconomic Variables. Record how many respondents were missing educational attaiment and income data.

texcmds[['miinc16']] <- sum(is.na(dcas16$dem.income.cat4))
texcmds[['miedu16']] <- sum(is.na(dcas16$dem.educ.attain))

texcmds[['miinc18']] <- sum(is.na(dcas18$income))
texcmds[['miedu18']] <- sum(is.na(dcas18$educ))

Variable	Missing 2016	Missing 2018
Education	25 25
Income	109 79

Housing tenure. Create indicator for whether respondent owns their home.

dcas16 <- dcas16 %>%
    mutate(own = hh.own)
table(dcas16[, c("hh.own", "own")], useNA="always")

##        own
## hh.own  FALSE TRUE <NA>
##   FALSE   354    0    0
##   TRUE      0  859    0
##   <NA>      0    0    9

dcas18 <- dcas18 %>%
    mutate(own = ifelse(is.na(tenure), NA, tenure=="Own"))
table(dcas18[, c("tenure", "own")], useNA="always")

##        own
## tenure  FALSE TRUE <NA>
##   Rent    326    0    0
##   Own       0  701    0
##   Other    30    0    0
##   <NA>      0    0    4

3.2.3.3 Neighborhood Experience Variables

Years in the neighborhood. Variable (nhdyrs) measures how long the respondent reported living in neighborhood.

dcas16$nhdyrs <- as.numeric(as.character(dcas16$q4))

## Warning: NAs introduced by coercion

qplot(dcas16$nhdyrs, binwidth=5)

## Warning: Removed 18 rows containing non-finite values (stat_bin).

dcas18$nhdyrs <- dcas18$yrsnhd
qplot(dcas18$nhdyrs)

## Warning: Removed 8 rows containing non-finite values (stat_bin).

Perceived size of neighborhood. Create three-category variable that measures perceived neighborhood size. Check that variable was created correctly.

sizelabs <- c(
    `1 to 4 blocks` = "1-9 blocks",
    `5 to 9 blocks` = "1-9 blocks",
    `10 to 25 blocks` = "10-50 blocks",
    `25 to 50 blocks` = "10-50 blocks",
    `More than 50 blocks` = ">50 blocks"
)
dcas16 = mutate(dcas16, nhdsize = recode_factor(nhd.size, !!!sizelabs))
table(dcas16[, c('nhd.size', 'nhdsize')], useNA="always") ## Check variable

##                      nhdsize
## nhd.size              1-9 blocks 10-50 blocks >50 blocks <NA>
##   1 to 4 blocks              395            0          0    0
##   5 to 9 blocks              343            0          0    0
##   10 to 25 blocks              0          298          0    0
##   25 to 50 blocks              0          105          0    0
##   More than 50 blocks          0            0         58    0
##   <NA>                         0            0          0   23

names(sizelabs) <- gsub(" to ", "-", names(sizelabs))
dcas18 <- dcas18 %>%
    mutate(
        nhdsizebak = nhdsize, 
        nhdsize = recode_factor(nhdsize, !!!sizelabs)
    )
table(dcas18[, c('nhdsizebak','nhdsize')], useNA="always") #Check variable

##               nhdsize
## nhdsizebak     1-9 blocks 10-50 blocks >50 blocks <NA>
##   1-4 blocks          267            0          0    0
##   5-9 blocks          307            0          0    0
##   10-25 blocks          0          254          0    0
##   25-50 blocks          0          109          0    0
##   >50 blocks            0            0         46    0
##   <NA>                  0            0          0   78

3.3 Add Neighborhood Context onto Survey Data

Create variables containing neighborhood racial composition types by Census tract. These will be used to analyze data by different racial compositions in 2015.

dcarea <- dcarea %>%
    mutate(
        w = race.pnhw >= 10,
        b = race.pnhb >= 10,
        l = race.phsp >= 10,
        a = race.papi >= 10,
        q = quad15 == TRUE,

        nhdtype = '',
        nhdtype = ifelse(w, 'w', nhdtype),
        nhdtype = ifelse(b, paste0(nhdtype,'b'), nhdtype),
        nhdtype = ifelse(l, paste0(nhdtype,'l'), nhdtype),
        nhdtype = ifelse(a, paste0(nhdtype,'a'), nhdtype),
        nhdtype = ifelse(race.pnhw > 50, 'w', nhdtype),
        nhdtype = ifelse(race.pnhb > 50, 'b', nhdtype),
        nhdtype = ifelse(race.phsp > 50, 'l', nhdtype),
        nhdtype = ifelse(race.papi > 50, 'a', nhdtype),
        nhdtype = ifelse(quad15, 'quad', nhdtype),
        
        wtype = ifelse(nhdtype=='quad', 'quad', ''),
        wtype = ifelse(nhdtype=='w', 'white', wtype),
        wtype = ifelse(wtype=='' & grepl('w\\w{1}$', nhdtype), 'white-1', wtype),
        wtype = ifelse(wtype=='' & grepl('w\\w{2}$', nhdtype), 'white-2', wtype),
        wtype = ifelse(wtype=='', 'non-white', wtype),
        
        across(starts_with('race'), ~ifelse(.==0, 0, log(./100)), 
               .names='ln{.col}'),
        H = (-1 * (exp(lnrace.pnhw) * lnrace.pnhw +
                   exp(lnrace.pnhb) * lnrace.pnhb +
                   exp(lnrace.phsp) * lnrace.phsp +
                   exp(lnrace.papi) * lnrace.papi  )),
        Hc= (H - mean(H, na.rm=TRUE)) / sd(H, na.rm = TRUE)
                
                
    ) %>%
    select(-starts_with('lnrace'))

Append the LTDB 2000 values of Census characteristics and the 2015 values of ACS characterisitcs to each of the DCAS dataframes. The code below constructs a variable, lntotchg, that equals the natural log of the change in the non-Hispanic white population from 2000 to 2015. This creates a linearly scaled variable that measures change in the white population that accounts for the size of neighborhoods. The 2018 DCAS requires an additional step of appending a list of block-groups for each respondent and then merging based on tracts of sampled respondents.

## Load LTDB 2000 and merge into DC Area neighborhood tract-level dataset
ltdb <- read_csv('../data/ltdb_std_2000_fullcount.csv',
                 col_types = cols(TRTID10 = 'c')) %>%
    rename_with(tolower) 

nhoods <- dcarea %>%
    mutate(trtid10 = sub('G(\\d{2})0(\\d{3})0(\\d{6})', '\\1\\2\\3', GISJOIN)) %>%
    left_join(select(ltdb, trtid10, nhwht00), by='trtid10') %>%
    mutate(lntotchg = log(nhw15/nhwht00))

## Merge combined 2000 Census/2015 ACS data into 2016 DC Area Survey data
dcas16 <- dcas16 %>%
    mutate(trtid10 = sample_tract) %>%
    left_join(nhoods, by='trtid10')
    
dcas18 <- dcas18 %>%
    full_join(read_csv("../data/dcas/census_tract_R1049.csv", col_types = 'cc'), 
              by='rid') %>%
    rename(trtid10 = tract) %>%
    left_join(nhoods, by='trtid10')

3.4 Data Imputation and Weighting

Select variables to keep that will be used in analysis. Define vectors of identification and numeric variables that will be used for imputation.

vars <- c(
          # 'satisfied', 'better',
          'nhdsat', 'nhdchg'
          , 'raceeth'
          , 'age'
          , 'forborn'
          , 'man'
          , 'kids'
          , 'married'
          , 'educ'
          , 'own'
          , 'nhdyrs'
          , 'nhdsize'
          # , 'inc'
)
dcas16 <- dcas16 %>%
    mutate(
        rid = as.character(studycase),
        strata = sample_strata
    )
dcas16$rid <- as.character(dcas16$studycase)
idvars16 <- c('rid','weight', 'strata', 'sample_tract')
nominals <- vars[!(vars %in% c('age', 'nhdyrs'))]

dcas18$rid <- as.character(dcas18$rid)
idvars18 <- c("rid", "weight", "strata")

Re-level the tract identification number used for fixed effects to the reference tract is that which has the median level of satisfaction.

satmu <- dcas16 %>%
    select(sample_tract, satisfied) %>%
    group_by(sample_tract) %>%
    summarize(meansat = mean(satisfied, na.rm=TRUE)) %>%
    arrange(meansat) 
medsatid <- satmu$sample_tract[round(nrow(satmu)/2)] %>% as.character()
dcas16$sample_tract <- relevel(dcas16$sample_tract, ref=medsatid)
# levels(dcas16$sample_tract) ## Not run: Check referencing

Prepare data to be imputed based on different datasets.

set.seed(214518)
idvars <- c('satisfied', 'better', 'anarace')
nhoodvars <- c('nhwht00', 'lntotchg', 'nhdtype', 'wtype', 'H', 'Hc', 
               'race.pnhw')
nivars <- c(idvars, nhoodvars)

Create five imputation datasets of survey-weighted data and assign to object dcas<YR>svy. For the DCAS 2016 data, include only respondents who live in multiracial neighborhoods in the multiple imputation. Create vectors of variable names for different types of variables to be used as parameters for multiple imputation.

dcas16q <- subset(dcas16, neighborhood=="Global Neighborhood") %>%
    mutate(sample_tract=factor(sample_tract))
dcas16mi <- amelia(dcas16q[dcas16q$neighborhood=="Global Neighborhood", 
                          unique(c(vars, nominals, nivars, idvars16))], 
                   m=5, noms=nominals, emburn=c(500, 500), p2s=FALSE, 
                   idvars=c(nivars, idvars16))
dcas16svy <- svydesign(id=~rid, strata=~strata, weights=~weight,
                       data=imputationList(dcas16mi$imputations))

dcas18mi <- amelia(dcas18[, unique(c(vars, nominals, nivars, idvars18))], 
                   m=5, noms=nominals, emburn=c(500,500), p2s=FALSE, 
                   idvars=c(nivars, idvars18))
dcas18svy <- svydesign(id=~rid, strata=~strata, weights=~weight,
                       data=imputationList(dcas18mi$imputations))

3.5 Center Variables

Center all variables on their DC-area-wide values (estimated using the DCAS 2018 data) in order to create comparable regression values across the two data sets, Subtract the mean of the weighted DCAS 2018 variables from the corresponding variables in both the DCAS 2016 (multiracial neighborhoods) and DCAS 2018 (DC-area) data. This sets the intercept across the two data sets at a corresponding value that represents a white resident with DC-area-wide mean values on all other measures.

vars <- c("age", "forborn", "man", "kids", "married", "own", "nhdyrs")
mu18 <- lapply(vars, function(var){
    fm <- as.formula(paste("~", var))
    mu <- coef(MIcombine(with(dcas18svy, svymean(fm, na.rm=TRUE))))
    if(length(mu)==1) return(mu[[1]])
    return(mu[[grep("TRUE$", names(mu))]])
})
names(mu18) <- vars
educ <- coef(MIcombine(with(dcas18svy, svymean(~educ, na.rm=TRUE))))
mu18[paste0("educ", 1:5)] <- educ
nhdsize <- coef(MIcombine(with(dcas18svy, svymean(~nhdsize, na.rm=TRUE))))
mu18[paste0("nhdsize", 1:3)] <- nhdsize


svy.center <- function(df) {
    df %>% update(
        agec       = age - mu18[["age"]],
        forbornc   = as.integer(forborn) - mu18[["forborn"]],
        manc       = as.integer(man) - mu18[["man"]],
        kidsc      = as.integer(kids) - mu18[["kids"]],
        marriedc   = as.integer(married) - mu18[["married"]],
        educall    = model.matrix(~educ),
        educ1c     = educall[, 2] - mu18[["educ1"]],
        educ3c     = educall[, 3] - mu18[["educ3"]],
        educ4c     = educall[, 4] - mu18[["educ4"]],
        educ5c     = educall[, 5] - mu18[["educ5"]],
        educall    = 1,
        ownc       = as.integer(own) - mu18[["own"]],
        nhdyrsc    = nhdyrs - mu18[["nhdyrs"]],
        nhdsizeall = model.matrix(~nhdsize),
        nhdsize2c  = nhdsizeall[, 2] - mu18[["nhdsize2"]],
        nhdsize3c  = nhdsizeall[, 3] - mu18[["nhdsize3"]],
        nhdsizeall = 1
    ) 
}
dcas16svy <- svy.center(dcas16svy)
dcas18svy <- svy.center(dcas18svy)

3.6 Save Data

Save R object containing the multiply-imputed survey-weighted data (dcassvy), variable names (vars), imputation list upon which survey data were created (dcasmi), a string representing the diretory containing data (dataDIR), and the list of values to export to LaTeX (texcmds).

# save(vars, texcmds, dcarea,
#      dcas16svy, dcas16, dcas16mi,
#      dcas18svy, dcas18, dcas18mi,
#      file = '../data/dcassvy.Rdata')