6 Satisfaction Comparison Between Multiethnic Neighborhoods and DC-Area Population

6.1 Descriptive analysis of satisfaction by race

Summarize percent of residents satisfied living in their neighborhoods for a) respondents of the DCAS 2016 that live in multiracial neighborhoods, and b) respondents of the DCAS 2018 that live throughout the DC area.

racedfs <- list(
    dcas16svy,
    subset(dcas16svy, raceeth=="asian"),
    subset(dcas16svy, raceeth=="black"),
    subset(dcas16svy, raceeth=="latino"),
    subset(dcas16svy, raceeth=="white")
)
desc16 <- lapply(racedfs, function(df){
    res <- with(df, svymean(~satisfied)) %>% MIcombine()
    n <- nrow(df$designs$imp1)
    list(mean16=coef(res)*100, se16=sqrt(vcov(res)[1,1])*100, n16 = n)
}) %>%
    bind_rows() %>%
    mutate(race = racelevs)

racedfs <- list(
    dcas18svy,
    subset(dcas18svy, raceeth=="asian"),
    subset(dcas18svy, raceeth=="black"),
    subset(dcas18svy, raceeth=="latino"),
    subset(dcas18svy, raceeth=="white")
)
desc18 <- lapply(racedfs, function(df){
    res <- with(df, svymean(~satisfied)) %>% MIcombine()
    n <- nrow(df$designs$imp1)
    list(mean18=coef(res)*100, se18=sqrt(vcov(res)[1,1])*100, n18 = n)
}) %>%
    bind_rows() %>%
    mutate(race = racelevs) 

Calculate the differences in the percent of residents satisfied in multiracial neighborhoods and the percent of all DC-area satisfied with their neighborhoods, both overall and by race. Calculate the standard error for the difference and the p-value for differences by race. The formatted results are saved in the file tables/between_descriptives.tex.

desc_cap <- paste(
    "Unconditional mean level of satisfaction among residents of", 
    "multiracial neighborhoods compared to residents in entire DC area,",
    "by race"
)
desc_tbl <- desc16 %>%
    inner_join(desc18, by="race") %>%
    select(race, mean16, se16, n16, mean18, se18, n18) %>%
    mutate(
        diff = mean16 - mean18,
        sediff = sqrt(se16^2 + se18^2),
        df = (se16^2 + se18^2) / ( ((se16^2)/(n16-1)) + ((se18^2)/(n18-1)) ),
        p = pt(abs(diff/sediff), df=df, lower.tail = FALSE)*2,
        star = ifelse(p<0.001, "***", 
                      ifelse(p<0.01, "** ", 
                             ifelse(p<0.05,"*  ", "   ")))
    )
kable(desc_tbl, "html", caption = desc_cap, 
      digits = c(1,1,1,0,1,1,0,1,1,0,4,1)) %>% 
    scroll_box(width = "100%") 
Table 6.1: Unconditional mean level of satisfaction among residents of multiracial neighborhoods compared to residents in entire DC area, by race
race mean16 se16 n16 mean18 se18 n18 diff sediff df p star
all 71.3 2.5 632 78.3 2.2 989 -7.0 3.3 746 0.0356
asian 71.7 5.0 183 81.1 5.4 93 -9.4 7.3 119 0.2013
black 68.8 5.8 104 68.9 4.6 308 -0.1 7.4 139 0.9883
latino 75.0 5.7 82 75.4 7.4 75 -0.4 9.4 76 0.9620
white 70.0 3.9 263 85.1 2.3 513 -15.1 4.6 300 0.0011 **

6.2 Multivariable influence of race on neighborhood satisfaction

Developed the model for each DCAS in a series of three steps to understand what is occurring in the data. The three are:

  1. Race-only
  2. Race and individual demographic characteristics (age, foreign-born, gender, children present, marital status, and education)
  3. Race, individual demographic characteristics, and neighborhood experience (home ownership, years in the neighborhood, and perceived neighborhood size)
m1bn <- "raceeth"
m2bn <- paste(m1bn, "+ agec + forbornc + manc + kidsc + marriedc"
            ,"+ educ1c + educ3c + educ4c + educ5c"
            )
m3bn <- paste(m2bn, "+ ownc + nhdyrsc + nhdsize2c + nhdsize3c")

6.2.1 Combining DCAS 2016 and DCAS 2018 data for analysis

I obtained combined sampling weights for the 2016 and 2018 waves of the DCAS. Having these data allowed me to directly assess the influence of being sampled in a multiracial neighborhood on neighborhood satisfaction. This addresses a reviewer comment that combining the data offers the best way to compare residents of multiracial neighborhoods with residents elsewhere in the DC area.

I first combined the data and then multiply imputed the combined data to create a multiple imputation object, dcasCsvy, containing five multiply imputed samples.

wgts <- foreign::read.spss(
  '../data/dcas/DCAS_combined_weights.sav'
  ) %>%
    as_tibble() %>%
    mutate(across(everything(), ~trimws(.x))) %>%
    mutate(
        wave = if_else(study=="O1087", "dcas16", "dcas18"),
        rid = if_else(wave=="dcas16", trimws(studycase), trimws(RID)),
        totwgt = as.numeric(TOTALWEIGHT),
        totwgt18 = as.numeric(TOTAL2018WEIGHT)
    )

idvarsC <- c('rid', 'strata')
dcasC <- bind_rows(
    select(dcas16, c(!!!vars, !!!nominals, !!!nivars, 'rid', 'strata')),
    select(dcas18, c(!!!vars, !!!nominals, !!!nivars, 'rid', 'strata')),
) %>%
    left_join(select(wgts, rid, totwgt)) %>%
    mutate(
        nhdsmp = if_else(grepl("^Global", strata), "quad",
                 if_else(grepl("^Latino", strata), "latino", "all")),
        nhdsmp = ordered(nhdsmp, levels=c("all", "quad", "latino"))
    ) %>%
    subset(
        raceeth != 'other'
    ) %>%
    mutate(
        raceeth = factor(raceeth)
    )
contrasts(dcasC$nhdsmp) <- contr.treatment(3)
idvarsC <- c(idvarsC, 'totwgt', 'nhdsmp')

set.seed(214518)
dcasCmi <- amelia(dcasC[, unique(c(vars, nominals, nivars, idvarsC))],
                  m=5, noms=nominals, emburn=c(500,500), p2s=FALSE,
                  idvars=c(nivars, idvarsC))
dcasCsvy <- svydesign(
  id=~rid, weights=~totwgt, data=imputationList(dcasCmi$imputations)
) %>%
  svy.center()

6.2.2 Estimate model of satisfaction using combined DCAS 2016 and DCAS 2018 data

I use the same variables as the within-neighborhood analysis but include an interaction between respondent race and the sampled neighborhood type (all neighborhoods, global neighborhood, or disproportionately Latinx neighborhood). The coefficients for the interactions indicate the difference in the log-odds of satisfaction for the racial group in the DC area versus those in global or disproportionately Latino neighborhoods.

m1bnC <- "nhdsmp * raceeth"
m2bnC <- paste(m1bnC, "+ agec + forbornc + manc + kidsc + marriedc"
              ,"+ educ1c + educ3c + educ4c + educ5c"
)
m3bnC <- paste(m2bnC, "+ ownc + nhdyrsc + nhdsize2c + nhdsize3c")

## Warnings suppressed: non-integer #successes in a binomial glm 
m1C <- with(dcasCsvy, svyglm(
    as.formula(paste('satisfied ~', m1bnC)), family=binomial,
    contrasts = list(nhdsmp = "contr.treatment"))
)

m2C <- with(dcasCsvy, svyglm(
    as.formula(paste('satisfied ~', m2bnC)), family=binomial,
    contrasts = list(nhdsmp = "contr.treatment"))
)

m3C <- with(dcasCsvy, svyglm(
    as.formula(paste('satisfied ~', m3bnC)), family=binomial,
    contrasts = list(nhdsmp = "contr.treatment"))
)

The parameter estimates and standard errors can be found in Table 6.2, below, and are saved to the file tables/combined.tex.

regression_labelsC <- c(racelabs[1:3],
                        "Multiracial neighborhoods",
                        paste("~x ", racelabs[1:3]),
                        "Predom. Latinx neighborhoods",
                        paste("~x ", racelabs[1:3]),
                        regression_labels[-1:-3])
coef_names <- c("(Intercept)",
                paste0("raceeth", racelevs[2:4]),
                paste0("nhdsmpquad", c("", paste0(":raceeth", racelevs[2:4]))),
                paste0("nhdsmplatino", c("", paste0(":raceeth", racelevs[2:4]))),
                "agec", "forbornc", "manc", "kidsc", "marriedc",
                paste0("educ", c(1, 3:5), "c"), "ownc", "nhdyrsc",
                paste0("nhdsize", c(2, 3), "c"))
cmb_tbl_cap <- paste(
    "Logistic regression coefficients and standard errors predicted",
    "from models estimating neighborhood satisfaction among residents",
    "combining DCAS 2016 and DCAS 2018 data"
)
cmb_tbl <- report_models(
    MIcombine_aic(m1C), MIcombine_aic(m2C), MIcombine_aic(m3C),
    reglabels = regression_labelsC,
    use.headers = TRUE,
    coefs=coef_names
) %>%
    set_top_padding(0) %>%
    set_bottom_padding(0) %>%
    set_bottom_border(1, everything(), TRUE) %>%
    set_caption(cmb_tbl_cap) %>%
    set_label("tab:combined")
cmb_tbl
Table 6.2: Logistic regression coefficients and standard errors predicted from models estimating neighborhood satisfaction among residents combining DCAS 2016 and DCAS 2018 data
(1)(2)(3)
(Intercept)1.998 ***1.964 ***1.950 ***
(0.196)   (0.230)   (0.232)   
Race                        
Asian-0.024    -0.138    -0.095    
(0.389)   (0.469)   (0.475)   
Black-1.033 ***-0.897 ** -0.826 ** 
(0.283)   (0.284)   (0.288)   
Latino-0.975    -0.901    -0.857    
(0.511)   (0.536)   (0.544)   
Multiracial neighborhoods-1.186 ***-1.093 ** -1.063 ** 
(0.306)   (0.345)   (0.338)   
~x Asian0.092    0.109    0.041    
(0.641)   (0.618)   (0.616)   
~x Black1.320 *  1.240 *  1.113 *  
(0.531)   (0.596)   (0.546)   
~x Latino1.066    0.990    0.948    
(0.717)   (0.704)   (0.714)   
Predom. Latinx neighborhoods-1.516 ** -1.505 ** -1.426 ***
(0.471)   (0.460)   (0.413)   
~x Asian-0.216    -0.127    -0.288    
(0.702)   (0.708)   (0.673)   
~x Black1.394 *  1.369 *  1.268 *  
(0.613)   (0.604)   (0.562)   
~x Latino1.200    1.144    1.063    
(0.726)   (0.689)   (0.666)   
Demographics                        
Age        0.004    0.001    
        (0.006)   (0.007)   
Foreign Born        0.162    0.189    
        (0.287)   (0.296)   
Male        -0.243    -0.262    
        (0.197)   (0.198)   
Children Present        -0.176    -0.193    
        (0.224)   (0.223)   
Married        0.446 *  0.364    
        (0.216)   (0.217)   
Socioeconomic                        
<H.S.        0.031    0.101    
        (0.379)   (0.381)   
Some college, no B.A.        -0.104    -0.087    
        (0.322)   (0.324)   
B.A.        0.125    0.140    
        (0.321)   (0.322)   
M.A.+        0.130    0.109    
        (0.313)   (0.321)   
Neighborhood perceptions                        
Home owner                0.456 *  
                (0.217)   
Years in neighborhood                -0.001    
                (0.009)   
10-50 blocks                0.300    
                (0.210)   
>50 blocks                -0.580    
                (0.427)   
N2151        2151        2151        
AIC1984.405    1978.436    1959.919    
*** p < 0.001; ** p < 0.01; * p < 0.05.

6.2.3 Calculate predicted probabilities and marginal effects from between-data comparison

Calculate the predicted probabilities and standard errors at the intercept of control variables based on the third model in Table 6.2. To provide a conservative estimate, the standard errors represent the maximum standard error across the estimates from the five imputed datasets. The predicted values are plotted in Figure 6.1 and saved in the file images/between-prediction.pdf.

## Get list of control variables from formula (without sample & race)
ctrlvarnames <- strsplit(m3bnC, " \\+ ")[[1]][-1] 

## Record display names for neighborhood types
sampnames <- c(
    all = "DC area", quad = "Multiracial neighborhoods", latino = "latino"
)

## Create data with predictions of model of combined sample
bnC_pred <- tibble(
    raceeth = relevel(factor(rep(racelevs[-1], 3)), ref = "white"),
    nhdsmp = factor(rep(c("all", "quad", "latino"), each = 4)),

    ## Create matrix containing zeros on all control variables
    !!!matrix(rep(0, 12 * length(ctrlvarnames)), nrow = 12) %>%
        as_tibble(.name_repair = ~ctrlvarnames),
) %>%
    mutate(
        ## Predict across all five imputations
        !!!lapply(m3C, prediction, data = ., type = "link") %>%
        
            ## Keep only the predictions and standard errors
            lapply(select, fitted, se.fitted) %>% 
            
            ## Combine the five separate data frames of predictions 
            bind_cols(                            
                .name_repair = ~paste0(
                    rep(c("fitted", "se"), 5), rep(1:5, each = 2)
                )
            ) %>% 
            
            ## Summarize across rows of predictions (mean) and SEs (max)
            mutate(
                yhat = rowMeans(across(.cols = seq(1, 9, 2))), 
                se = apply(.[, seq(2, 10, 2)], 1, max)       
            ) %>%
            select(yhat, se),
        
        ## Calculate predicted value in response scale and standard errors
        p = plogis(yhat),
        uci = plogis(yhat + 1.96 * se),
        lci = plogis(yhat - 1.96 * se),
        race = ordered(raceeth, levels=racelevs[-1]),
        samp = recode_factor(
            nhdsmp, !!!c(
                all = "DC area",
                quad = "Multiracial neighborhoods"
            )
        )
    ) %>%
    select(raceeth, race, samp, yhat, se, p, uci, lci) 

## Plot values
bnC_pred_plt <- ggplot(
    filter(bnC_pred, samp !="latino"), 
    aes(
        x = race, y = p, ymin = lci, ymax = uci,
        group = samp, color = samp, shape = samp
    )
) +
    geom_point(position = position_dodge(dodge_wd)) +
    geom_linerange(position = position_dodge(dodge_wd)) +
    scale_color_manual(values = c("#888888", "#222222")) +
    scale_x_discrete(
        labels=gsub("^(\\w)", "\\U\\1", racelevs, perl=TRUE)
    ) +
    scale_y_continuous(
        limits=c(.45, 1), breaks=seq(.5, 1.0, .1)
    ) +
    labs(
        x = NULL,
        y = "Probability",
        shape = NULL, color = NULL
    ) +
    theme_minimal() +
    theme(
        legend.position = "bottom",
        panel.grid.major.x = element_blank()
    )

cap = paste(
    "Predicted probability of neighborhood satisfaction among residents of",
    "multiracial neighborhoods and the DC area based on model using combined",
    "data from the DCAS 2016 and DCAS 2018"
)
ggsave("images/between-prediction.pdf", plot = bnC_pred_plt,
       width=6.75, units='in')
Predicted probability of neighborhood satisfaction among residents of multiracial neighborhoods and the DC area based on model using combined data from the DCAS 2016 and DCAS 2018

Figure 6.1: Predicted probability of neighborhood satisfaction among residents of multiracial neighborhoods and the DC area based on model using combined data from the DCAS 2016 and DCAS 2018

Table 6.3 reports the predicted percentages of residents satisfied with their neighborhoods in all DC-area neighborhoods and in multiracial neighborhoods, as well as the difference between the two probabilities.

bn_pred_tbl <- bnC_pred %>%
    select(race, samp, p) %>%
    filter(samp != "latino") %>%
    pivot_wider(id_cols = race, names_from = samp, values_from = p) %>%
    mutate(
        across(2:3, ~. * 100),
        race = sub("^(\\w)", "\\U\\1", race, perl = TRUE),
        Difference = `DC area` - `Multiracial neighborhoods`
    ) %>%
    as_huxtable(digits = 1) %>%
    set_caption(paste(
        "Predicted percentage of residents satisfied with their neighborhood",
        "by race and DCAS sample estimated from model with combined samples"
    )) %>%
    set_label("tab:bnC")
bn_pred_tbl
Table 6.3: Predicted percentage of residents satisfied with their neighborhood by race and DCAS sample estimated from model with combined samples
raceDC areaMultiracial neighborhoodsDifference
Asian86.569.716.8  
Black75.576.4-0.912
Latino74.972.72.23 
White87.570.816.7  

From the third model in Table 6.2, I estimated the marginal effects of living in a multiracial neighborhood compared to living in the DC area generally by race. I plotted the mean of the average marginal effects across the five imputations and then the maximum absolute values of the lower and upper confidence levels in Figure 6.2.

## Calculate and combine marginal effects across imputations
# Warning that contrasts are dropped from factor nhdsmp suppressed
mfxC <- lapply(
    m3C, margins, variables = "nhdsmp", at = list(raceeth = racelevs[-1]), 
    design = dcasCsvy$designs[[1]] #Weights are the same across imputations
) 
mfxC_all <- mfxC %>%
    lapply(summary) %>%
    bind_rows() %>%
    as_tibble() %>%
    filter(!grepl("latino$", factor)) %>%
    mutate(
        imp = rep(1:5, each = 4),
        raceeth = sub("^(\\w)", "\\U\\1", raceeth, perl = TRUE)
    ) 

## Plot marginal effects
mfxC_sum <- mfxC_all %>%
    group_by(raceeth) %>%
    mutate(
        lower = sign(lower) * max(abs(lower)),
        upper = sign(upper) * max(abs(upper))
    ) %>%
    summarize(
        across(c(AME, lower, upper), mean)
    ) %>%
    ungroup()

mfxC_plt <- ggplot(mfxC_sum, 
                   aes(x = raceeth, y = AME, ymin = lower, ymax = upper)) +
    geom_hline(yintercept = 0, color = "#cccccc", size = 1) +
    geom_point() + 
    geom_linerange() +
    scale_y_continuous(
        breaks = round(seq(-0.3, 0.3, 0.1), 1), limits = c(-0.35, 0.35)
    ) +
    labs(
        x = NULL,
        y = "Average marginal effect"
    ) +
    theme_minimal() +
    theme(
        panel.grid.major.x = element_blank()
    )
cap = paste("Average marginal effects on satisfaction of living in multiracial",
            "neighborhoods compared to all DC-area residents of the same race")
mfxC_plt
Average marginal effects on satisfaction of living in multiracial neighborhoods compared to all DC-area residents of the same race

Figure 6.2: Average marginal effects on satisfaction of living in multiracial neighborhoods compared to all DC-area residents of the same race

ggsave("images/between.pdf", plot = mfxC_plt,
       width=6.75, units='in')