6 Satisfaction Comparison Between Multiethnic Neighborhoods and DC-Area Population

6.1 Descriptive analysis of satisfaction by race

Summarize percent of residents satisfied living in their neighborhoods for a) respondents of the DCAS 2016 that live in multiracial neighborhoods, and b) respondents of the DCAS 2018 that live throughout the DC area.

racedfs <- list(
    dcas16svy,
    subset(dcas16svy, raceeth=="asian"),
    subset(dcas16svy, raceeth=="black"),
    subset(dcas16svy, raceeth=="latino"),
    subset(dcas16svy, raceeth=="white")
)
desc16 <- lapply(racedfs, function(df){
    res <- with(df, svymean(~satisfied)) %>% MIcombine()
    n <- nrow(df$designs$imp1)
    list(mean16=coef(res)*100, se16=sqrt(vcov(res)[1,1])*100, n16 = n)
}) %>%
    bind_rows() %>%
    mutate(race = racelevs)

racedfs <- list(
    dcas18svy,
    subset(dcas18svy, raceeth=="asian"),
    subset(dcas18svy, raceeth=="black"),
    subset(dcas18svy, raceeth=="latino"),
    subset(dcas18svy, raceeth=="white")
)
desc18 <- lapply(racedfs, function(df){
    res <- with(df, svymean(~satisfied)) %>% MIcombine()
    n <- nrow(df$designs$imp1)
    list(mean18=coef(res)*100, se18=sqrt(vcov(res)[1,1])*100, n18 = n)
}) %>%
    bind_rows() %>%
    mutate(race = racelevs)

Calculate the differences in the percent of residents satisfied in multiracial neighborhoods and the percent of all DC-area satisfied with their neighborhoods, both overall and by race. Calculate the standard error for the difference and the p-value for differences by race. The formatted results are saved in the file tables/between_descriptives.tex.

desc_cap <- paste(
    "Unconditional mean level of satisfaction among residents of", 
    "multiracial neighborhoods compared to residents in entire DC area,",
    "by race"
)
desc_tbl <- desc16 %>%
    inner_join(desc18, by="race") %>%
    select(race, mean16, se16, n16, mean18, se18, n18) %>%
    mutate(
        diff = mean16 - mean18,
        sediff = sqrt(se16^2 + se18^2),
        df = (se16^2 + se18^2) / ( ((se16^2)/(n16-1)) + ((se18^2)/(n18-1)) ),
        p = pt(abs(diff/sediff), df=df, lower.tail = FALSE)*2,
        star = ifelse(p<0.001, "***", 
                      ifelse(p<0.01, "** ", 
                             ifelse(p<0.05,"*  ", "   ")))
    )
kable(desc_tbl, "html", caption = desc_cap, 
      digits = c(1,1,1,0,1,1,0,1,1,0,4,1)) %>% 
    scroll_box(width = "100%")

Table 6.1: Unconditional mean level of satisfaction among residents of multiracial neighborhoods compared to residents in entire DC area, by race
race	mean16	se16	n16	mean18	se18	n18	diff	sediff	df	p	star
all	71.3	2.5	632	78.3	2.2	989	-7.0	3.3	746	0.0356
asian	71.7	5.0	183	81.1	5.4	93	-9.4	7.3	119	0.2013
black	68.8	5.8	104	68.9	4.6	308	-0.1	7.4	139	0.9883
latino	75.0	5.7	82	75.4	7.4	75	-0.4	9.4	76	0.9620
white	70.0	3.9	263	85.1	2.3	513	-15.1	4.6	300	0.0011	**

6.2 Multivariable influence of race on neighborhood satisfaction

Developed the model for each DCAS in a series of three steps to understand what is occurring in the data. The three are:

Race-only
Race and individual demographic characteristics (age, foreign-born, gender, children present, marital status, and education)
Race, individual demographic characteristics, and neighborhood experience (home ownership, years in the neighborhood, and perceived neighborhood size)

m1bn <- "raceeth"
m2bn <- paste(m1bn, "+ agec + forbornc + manc + kidsc + marriedc"
            ,"+ educ1c + educ3c + educ4c + educ5c"
            )
m3bn <- paste(m2bn, "+ ownc + nhdyrsc + nhdsize2c + nhdsize3c")

6.2.1 Combining DCAS 2016 and DCAS 2018 data for analysis

I obtained combined sampling weights for the 2016 and 2018 waves of the DCAS. Having these data allowed me to directly assess the influence of being sampled in a multiracial neighborhood on neighborhood satisfaction. This addresses a reviewer comment that combining the data offers the best way to compare residents of multiracial neighborhoods with residents elsewhere in the DC area.

I first combined the data and then multiply imputed the combined data to create a multiple imputation object, dcasCsvy, containing five multiply imputed samples.

wgts <- foreign::read.spss(
  '../data/dcas/DCAS_combined_weights.sav'
  ) %>%
    as_tibble() %>%
    mutate(across(everything(), ~trimws(.x))) %>%
    mutate(
        wave = if_else(study=="O1087", "dcas16", "dcas18"),
        rid = if_else(wave=="dcas16", trimws(studycase), trimws(RID)),
        totwgt = as.numeric(TOTALWEIGHT),
        totwgt18 = as.numeric(TOTAL2018WEIGHT)
    )

idvarsC <- c('rid', 'strata')
dcasC <- bind_rows(
    select(dcas16, c(!!!vars, !!!nominals, !!!nivars, 'rid', 'strata')),
    select(dcas18, c(!!!vars, !!!nominals, !!!nivars, 'rid', 'strata')),
) %>%
    left_join(select(wgts, rid, totwgt)) %>%
    mutate(
        nhdsmp = if_else(grepl("^Global", strata), "quad",
                 if_else(grepl("^Latino", strata), "latino", "all")),
        nhdsmp = ordered(nhdsmp, levels=c("all", "quad", "latino"))
    ) %>%
    subset(
        raceeth != 'other'
    ) %>%
    mutate(
        raceeth = factor(raceeth)
    )
contrasts(dcasC$nhdsmp) <- contr.treatment(3)
idvarsC <- c(idvarsC, 'totwgt', 'nhdsmp')

set.seed(214518)
dcasCmi <- amelia(dcasC[, unique(c(vars, nominals, nivars, idvarsC))],
                  m=5, noms=nominals, emburn=c(500,500), p2s=FALSE,
                  idvars=c(nivars, idvarsC))
dcasCsvy <- svydesign(
  id=~rid, weights=~totwgt, data=imputationList(dcasCmi$imputations)
) %>%
  svy.center()

6.2.2 Estimate model of satisfaction using combined DCAS 2016 and DCAS 2018 data

I use the same variables as the within-neighborhood analysis but include an interaction between respondent race and the sampled neighborhood type (all neighborhoods, global neighborhood, or disproportionately Latinx neighborhood). The coefficients for the interactions indicate the difference in the log-odds of satisfaction for the racial group in the DC area versus those in global or disproportionately Latino neighborhoods.

m1bnC <- "nhdsmp * raceeth"
m2bnC <- paste(m1bnC, "+ agec + forbornc + manc + kidsc + marriedc"
              ,"+ educ1c + educ3c + educ4c + educ5c"
)
m3bnC <- paste(m2bnC, "+ ownc + nhdyrsc + nhdsize2c + nhdsize3c")

## Warnings suppressed: non-integer #successes in a binomial glm 
m1C <- with(dcasCsvy, svyglm(
    as.formula(paste('satisfied ~', m1bnC)), family=binomial,
    contrasts = list(nhdsmp = "contr.treatment"))
)

m2C <- with(dcasCsvy, svyglm(
    as.formula(paste('satisfied ~', m2bnC)), family=binomial,
    contrasts = list(nhdsmp = "contr.treatment"))
)

m3C <- with(dcasCsvy, svyglm(
    as.formula(paste('satisfied ~', m3bnC)), family=binomial,
    contrasts = list(nhdsmp = "contr.treatment"))
)

The parameter estimates and standard errors can be found in Table 6.2, below, and are saved to the file tables/combined.tex.

regression_labelsC <- c(racelabs[1:3],
                        "Multiracial neighborhoods",
                        paste("~x ", racelabs[1:3]),
                        "Predom. Latinx neighborhoods",
                        paste("~x ", racelabs[1:3]),
                        regression_labels[-1:-3])
coef_names <- c("(Intercept)",
                paste0("raceeth", racelevs[2:4]),
                paste0("nhdsmpquad", c("", paste0(":raceeth", racelevs[2:4]))),
                paste0("nhdsmplatino", c("", paste0(":raceeth", racelevs[2:4]))),
                "agec", "forbornc", "manc", "kidsc", "marriedc",
                paste0("educ", c(1, 3:5), "c"), "ownc", "nhdyrsc",
                paste0("nhdsize", c(2, 3), "c"))
cmb_tbl_cap <- paste(
    "Logistic regression coefficients and standard errors predicted",
    "from models estimating neighborhood satisfaction among residents",
    "combining DCAS 2016 and DCAS 2018 data"
)
cmb_tbl <- report_models(
    MIcombine_aic(m1C), MIcombine_aic(m2C), MIcombine_aic(m3C),
    reglabels = regression_labelsC,
    use.headers = TRUE,
    coefs=coef_names
) %>%
    set_top_padding(0) %>%
    set_bottom_padding(0) %>%
    set_bottom_border(1, everything(), TRUE) %>%
    set_caption(cmb_tbl_cap) %>%
    set_label("tab:combined")
cmb_tbl

Table 6.2: Logistic regression coefficients and standard errors predicted from models estimating neighborhood satisfaction among residents combining DCAS 2016 and DCAS 2018 data
	(1)	(2)	(3)
(Intercept)	1.998 ***	1.964 ***	1.950 ***
	(0.196)	(0.230)	(0.232)
Race
Asian	-0.024	-0.138	-0.095
	(0.389)	(0.469)	(0.475)
Black	-1.033 ***	-0.897 **	-0.826 **
	(0.283)	(0.284)	(0.288)
Latino	-0.975	-0.901	-0.857
	(0.511)	(0.536)	(0.544)
Multiracial neighborhoods	-1.186 ***	-1.093 **	-1.063 **
	(0.306)	(0.345)	(0.338)
~x Asian	0.092	0.109	0.041
	(0.641)	(0.618)	(0.616)
~x Black	1.320 *	1.240 *	1.113 *
	(0.531)	(0.596)	(0.546)
~x Latino	1.066	0.990	0.948
	(0.717)	(0.704)	(0.714)
Predom. Latinx neighborhoods	-1.516 **	-1.505 **	-1.426 ***
	(0.471)	(0.460)	(0.413)
~x Asian	-0.216	-0.127	-0.288
	(0.702)	(0.708)	(0.673)
~x Black	1.394 *	1.369 *	1.268 *
	(0.613)	(0.604)	(0.562)
~x Latino	1.200	1.144	1.063
	(0.726)	(0.689)	(0.666)
Demographics
Age		0.004	0.001
		(0.006)	(0.007)
Foreign Born		0.162	0.189
		(0.287)	(0.296)
Male		-0.243	-0.262
		(0.197)	(0.198)
Children Present		-0.176	-0.193
		(0.224)	(0.223)
Married		0.446 *	0.364
		(0.216)	(0.217)
Socioeconomic
<H.S.		0.031	0.101
		(0.379)	(0.381)
Some college, no B.A.		-0.104	-0.087
		(0.322)	(0.324)
B.A.		0.125	0.140
		(0.321)	(0.322)
M.A.+		0.130	0.109
		(0.313)	(0.321)
Neighborhood perceptions
Home owner			0.456 *
			(0.217)
Years in neighborhood			-0.001
			(0.009)
10-50 blocks			0.300
			(0.210)
>50 blocks			-0.580
			(0.427)
N	2151	2151	2151
AIC	1984.405	1978.436	1959.919
* p < 0.001; p < 0.01; * p < 0.05.

6.2.3 Calculate predicted probabilities and marginal effects from between-data comparison

Calculate the predicted probabilities and standard errors at the intercept of control variables based on the third model in Table 6.2. To provide a conservative estimate, the standard errors represent the maximum standard error across the estimates from the five imputed datasets. The predicted values are plotted in Figure 6.1 and saved in the file images/between-prediction.pdf.

## Get list of control variables from formula (without sample & race)
ctrlvarnames <- strsplit(m3bnC, " \\+ ")[[1]][-1] 

## Record display names for neighborhood types
sampnames <- c(
    all = "DC area", quad = "Multiracial neighborhoods", latino = "latino"
)

## Create data with predictions of model of combined sample
bnC_pred <- tibble(
    raceeth = relevel(factor(rep(racelevs[-1], 3)), ref = "white"),
    nhdsmp = factor(rep(c("all", "quad", "latino"), each = 4)),

    ## Create matrix containing zeros on all control variables
    !!!matrix(rep(0, 12 * length(ctrlvarnames)), nrow = 12) %>%
        as_tibble(.name_repair = ~ctrlvarnames),
) %>%
    mutate(
        ## Predict across all five imputations
        !!!lapply(m3C, prediction, data = ., type = "link") %>%
        
            ## Keep only the predictions and standard errors
            lapply(select, fitted, se.fitted) %>% 
            
            ## Combine the five separate data frames of predictions 
            bind_cols(                            
                .name_repair = ~paste0(
                    rep(c("fitted", "se"), 5), rep(1:5, each = 2)
                )
            ) %>% 
            
            ## Summarize across rows of predictions (mean) and SEs (max)
            mutate(
                yhat = rowMeans(across(.cols = seq(1, 9, 2))), 
                se = apply(.[, seq(2, 10, 2)], 1, max)       
            ) %>%
            select(yhat, se),
        
        ## Calculate predicted value in response scale and standard errors
        p = plogis(yhat),
        uci = plogis(yhat + 1.96 * se),
        lci = plogis(yhat - 1.96 * se),
        race = ordered(raceeth, levels=racelevs[-1]),
        samp = recode_factor(
            nhdsmp, !!!c(
                all = "DC area",
                quad = "Multiracial neighborhoods"
            )
        )
    ) %>%
    select(raceeth, race, samp, yhat, se, p, uci, lci) 

## Plot values
bnC_pred_plt <- ggplot(
    filter(bnC_pred, samp !="latino"), 
    aes(
        x = race, y = p, ymin = lci, ymax = uci,
        group = samp, color = samp, shape = samp
    )
) +
    geom_point(position = position_dodge(dodge_wd)) +
    geom_linerange(position = position_dodge(dodge_wd)) +
    scale_color_manual(values = c("#888888", "#222222")) +
    scale_x_discrete(
        labels=gsub("^(\\w)", "\\U\\1", racelevs, perl=TRUE)
    ) +
    scale_y_continuous(
        limits=c(.45, 1), breaks=seq(.5, 1.0, .1)
    ) +
    labs(
        x = NULL,
        y = "Probability",
        shape = NULL, color = NULL
    ) +
    theme_minimal() +
    theme(
        legend.position = "bottom",
        panel.grid.major.x = element_blank()
    )

cap = paste(
    "Predicted probability of neighborhood satisfaction among residents of",
    "multiracial neighborhoods and the DC area based on model using combined",
    "data from the DCAS 2016 and DCAS 2018"
)
ggsave("images/between-prediction.pdf", plot = bnC_pred_plt,
       width=6.75, units='in')

Predicted probability of neighborhood satisfaction among residents of multiracial neighborhoods and the DC area based on model using combined data from the DCAS 2016 and DCAS 2018

Figure 6.1: Predicted probability of neighborhood satisfaction among residents of multiracial neighborhoods and the DC area based on model using combined data from the DCAS 2016 and DCAS 2018

Table 6.3 reports the predicted percentages of residents satisfied with their neighborhoods in all DC-area neighborhoods and in multiracial neighborhoods, as well as the difference between the two probabilities.

bn_pred_tbl <- bnC_pred %>%
    select(race, samp, p) %>%
    filter(samp != "latino") %>%
    pivot_wider(id_cols = race, names_from = samp, values_from = p) %>%
    mutate(
        across(2:3, ~. * 100),
        race = sub("^(\\w)", "\\U\\1", race, perl = TRUE),
        Difference = `DC area` - `Multiracial neighborhoods`
    ) %>%
    as_huxtable(digits = 1) %>%
    set_caption(paste(
        "Predicted percentage of residents satisfied with their neighborhood",
        "by race and DCAS sample estimated from model with combined samples"
    )) %>%
    set_label("tab:bnC")
bn_pred_tbl

Table 6.3: Predicted percentage of residents satisfied with their neighborhood by race and DCAS sample estimated from model with combined samples
race	DC area	Multiracial neighborhoods	Difference
Asian	86.5	69.7	16.8
Black	75.5	76.4	-0.912
Latino	74.9	72.7	2.23
White	87.5	70.8	16.7

From the third model in Table 6.2, I estimated the marginal effects of living in a multiracial neighborhood compared to living in the DC area generally by race. I plotted the mean of the average marginal effects across the five imputations and then the maximum absolute values of the lower and upper confidence levels in Figure 6.2.

## Calculate and combine marginal effects across imputations
# Warning that contrasts are dropped from factor nhdsmp suppressed
mfxC <- lapply(
    m3C, margins, variables = "nhdsmp", at = list(raceeth = racelevs[-1]), 
    design = dcasCsvy$designs[[1]] #Weights are the same across imputations
) 
mfxC_all <- mfxC %>%
    lapply(summary) %>%
    bind_rows() %>%
    as_tibble() %>%
    filter(!grepl("latino$", factor)) %>%
    mutate(
        imp = rep(1:5, each = 4),
        raceeth = sub("^(\\w)", "\\U\\1", raceeth, perl = TRUE)
    ) 

## Plot marginal effects
mfxC_sum <- mfxC_all %>%
    group_by(raceeth) %>%
    mutate(
        lower = sign(lower) * max(abs(lower)),
        upper = sign(upper) * max(abs(upper))
    ) %>%
    summarize(
        across(c(AME, lower, upper), mean)
    ) %>%
    ungroup()

mfxC_plt <- ggplot(mfxC_sum, 
                   aes(x = raceeth, y = AME, ymin = lower, ymax = upper)) +
    geom_hline(yintercept = 0, color = "#cccccc", size = 1) +
    geom_point() + 
    geom_linerange() +
    scale_y_continuous(
        breaks = round(seq(-0.3, 0.3, 0.1), 1), limits = c(-0.35, 0.35)
    ) +
    labs(
        x = NULL,
        y = "Average marginal effect"
    ) +
    theme_minimal() +
    theme(
        panel.grid.major.x = element_blank()
    )
cap = paste("Average marginal effects on satisfaction of living in multiracial",
            "neighborhoods compared to all DC-area residents of the same race")

mfxC_plt

Figure 6.2: Average marginal effects on satisfaction of living in multiracial neighborhoods compared to all DC-area residents of the same race

ggsave("images/between.pdf", plot = mfxC_plt,
       width=6.75, units='in')