6 Satisfaction Comparison Between Multiethnic Neighborhoods and DC-Area Population
6.1 Descriptive analysis of satisfaction by race
Summarize percent of residents satisfied living in their neighborhoods for a) respondents of the DCAS 2016 that live in multiracial neighborhoods, and b) respondents of the DCAS 2018 that live throughout the DC area.
racedfs <- list(
dcas16svy,
subset(dcas16svy, raceeth=="asian"),
subset(dcas16svy, raceeth=="black"),
subset(dcas16svy, raceeth=="latino"),
subset(dcas16svy, raceeth=="white")
)
desc16 <- lapply(racedfs, function(df){
res <- with(df, svymean(~satisfied)) %>% MIcombine()
n <- nrow(df$designs$imp1)
list(mean16=coef(res)*100, se16=sqrt(vcov(res)[1,1])*100, n16 = n)
}) %>%
bind_rows() %>%
mutate(race = racelevs)
racedfs <- list(
dcas18svy,
subset(dcas18svy, raceeth=="asian"),
subset(dcas18svy, raceeth=="black"),
subset(dcas18svy, raceeth=="latino"),
subset(dcas18svy, raceeth=="white")
)
desc18 <- lapply(racedfs, function(df){
res <- with(df, svymean(~satisfied)) %>% MIcombine()
n <- nrow(df$designs$imp1)
list(mean18=coef(res)*100, se18=sqrt(vcov(res)[1,1])*100, n18 = n)
}) %>%
bind_rows() %>%
mutate(race = racelevs) Calculate the differences in the percent of residents satisfied in multiracial neighborhoods and the percent of all DC-area satisfied with their neighborhoods, both overall and by race. Calculate the standard error for the difference and the p-value for differences by race. The formatted results are saved in the file tables/between_descriptives.tex.
desc_cap <- paste(
"Unconditional mean level of satisfaction among residents of",
"multiracial neighborhoods compared to residents in entire DC area,",
"by race"
)
desc_tbl <- desc16 %>%
inner_join(desc18, by="race") %>%
select(race, mean16, se16, n16, mean18, se18, n18) %>%
mutate(
diff = mean16 - mean18,
sediff = sqrt(se16^2 + se18^2),
df = (se16^2 + se18^2) / ( ((se16^2)/(n16-1)) + ((se18^2)/(n18-1)) ),
p = pt(abs(diff/sediff), df=df, lower.tail = FALSE)*2,
star = ifelse(p<0.001, "***",
ifelse(p<0.01, "** ",
ifelse(p<0.05,"* ", " ")))
)
kable(desc_tbl, "html", caption = desc_cap,
digits = c(1,1,1,0,1,1,0,1,1,0,4,1)) %>%
scroll_box(width = "100%") | race | mean16 | se16 | n16 | mean18 | se18 | n18 | diff | sediff | df | p | star |
|---|---|---|---|---|---|---|---|---|---|---|---|
| all | 71.3 | 2.5 | 632 | 78.3 | 2.2 | 989 | -7.0 | 3.3 | 746 | 0.0356 |
|
| asian | 71.7 | 5.0 | 183 | 81.1 | 5.4 | 93 | -9.4 | 7.3 | 119 | 0.2013 | |
| black | 68.8 | 5.8 | 104 | 68.9 | 4.6 | 308 | -0.1 | 7.4 | 139 | 0.9883 | |
| latino | 75.0 | 5.7 | 82 | 75.4 | 7.4 | 75 | -0.4 | 9.4 | 76 | 0.9620 | |
| white | 70.0 | 3.9 | 263 | 85.1 | 2.3 | 513 | -15.1 | 4.6 | 300 | 0.0011 | ** |
6.2 Multivariable influence of race on neighborhood satisfaction
Developed the model for each DCAS in a series of three steps to understand what is occurring in the data. The three are:
- Race-only
- Race and individual demographic characteristics (age, foreign-born, gender, children present, marital status, and education)
- Race, individual demographic characteristics, and neighborhood experience (home ownership, years in the neighborhood, and perceived neighborhood size)
m1bn <- "raceeth"
m2bn <- paste(m1bn, "+ agec + forbornc + manc + kidsc + marriedc"
,"+ educ1c + educ3c + educ4c + educ5c"
)
m3bn <- paste(m2bn, "+ ownc + nhdyrsc + nhdsize2c + nhdsize3c")6.2.1 Combining DCAS 2016 and DCAS 2018 data for analysis
I obtained combined sampling weights for the 2016 and 2018 waves of the DCAS. Having these data allowed me to directly assess the influence of being sampled in a multiracial neighborhood on neighborhood satisfaction. This addresses a reviewer comment that combining the data offers the best way to compare residents of multiracial neighborhoods with residents elsewhere in the DC area.
I first combined the data and then multiply imputed the combined data to create a multiple imputation object, dcasCsvy, containing five multiply imputed samples.
wgts <- foreign::read.spss(
'../data/dcas/DCAS_combined_weights.sav'
) %>%
as_tibble() %>%
mutate(across(everything(), ~trimws(.x))) %>%
mutate(
wave = if_else(study=="O1087", "dcas16", "dcas18"),
rid = if_else(wave=="dcas16", trimws(studycase), trimws(RID)),
totwgt = as.numeric(TOTALWEIGHT),
totwgt18 = as.numeric(TOTAL2018WEIGHT)
)
idvarsC <- c('rid', 'strata')
dcasC <- bind_rows(
select(dcas16, c(!!!vars, !!!nominals, !!!nivars, 'rid', 'strata')),
select(dcas18, c(!!!vars, !!!nominals, !!!nivars, 'rid', 'strata')),
) %>%
left_join(select(wgts, rid, totwgt)) %>%
mutate(
nhdsmp = if_else(grepl("^Global", strata), "quad",
if_else(grepl("^Latino", strata), "latino", "all")),
nhdsmp = ordered(nhdsmp, levels=c("all", "quad", "latino"))
) %>%
subset(
raceeth != 'other'
) %>%
mutate(
raceeth = factor(raceeth)
)
contrasts(dcasC$nhdsmp) <- contr.treatment(3)
idvarsC <- c(idvarsC, 'totwgt', 'nhdsmp')
set.seed(214518)
dcasCmi <- amelia(dcasC[, unique(c(vars, nominals, nivars, idvarsC))],
m=5, noms=nominals, emburn=c(500,500), p2s=FALSE,
idvars=c(nivars, idvarsC))
dcasCsvy <- svydesign(
id=~rid, weights=~totwgt, data=imputationList(dcasCmi$imputations)
) %>%
svy.center()6.2.2 Estimate model of satisfaction using combined DCAS 2016 and DCAS 2018 data
I use the same variables as the within-neighborhood analysis but include an interaction between respondent race and the sampled neighborhood type (all neighborhoods, global neighborhood, or disproportionately Latinx neighborhood). The coefficients for the interactions indicate the difference in the log-odds of satisfaction for the racial group in the DC area versus those in global or disproportionately Latino neighborhoods.
m1bnC <- "nhdsmp * raceeth"
m2bnC <- paste(m1bnC, "+ agec + forbornc + manc + kidsc + marriedc"
,"+ educ1c + educ3c + educ4c + educ5c"
)
m3bnC <- paste(m2bnC, "+ ownc + nhdyrsc + nhdsize2c + nhdsize3c")
## Warnings suppressed: non-integer #successes in a binomial glm
m1C <- with(dcasCsvy, svyglm(
as.formula(paste('satisfied ~', m1bnC)), family=binomial,
contrasts = list(nhdsmp = "contr.treatment"))
)
m2C <- with(dcasCsvy, svyglm(
as.formula(paste('satisfied ~', m2bnC)), family=binomial,
contrasts = list(nhdsmp = "contr.treatment"))
)
m3C <- with(dcasCsvy, svyglm(
as.formula(paste('satisfied ~', m3bnC)), family=binomial,
contrasts = list(nhdsmp = "contr.treatment"))
)The parameter estimates and standard errors can be found in Table 6.2, below, and are saved to the file tables/combined.tex.
regression_labelsC <- c(racelabs[1:3],
"Multiracial neighborhoods",
paste("~x ", racelabs[1:3]),
"Predom. Latinx neighborhoods",
paste("~x ", racelabs[1:3]),
regression_labels[-1:-3])
coef_names <- c("(Intercept)",
paste0("raceeth", racelevs[2:4]),
paste0("nhdsmpquad", c("", paste0(":raceeth", racelevs[2:4]))),
paste0("nhdsmplatino", c("", paste0(":raceeth", racelevs[2:4]))),
"agec", "forbornc", "manc", "kidsc", "marriedc",
paste0("educ", c(1, 3:5), "c"), "ownc", "nhdyrsc",
paste0("nhdsize", c(2, 3), "c"))
cmb_tbl_cap <- paste(
"Logistic regression coefficients and standard errors predicted",
"from models estimating neighborhood satisfaction among residents",
"combining DCAS 2016 and DCAS 2018 data"
)
cmb_tbl <- report_models(
MIcombine_aic(m1C), MIcombine_aic(m2C), MIcombine_aic(m3C),
reglabels = regression_labelsC,
use.headers = TRUE,
coefs=coef_names
) %>%
set_top_padding(0) %>%
set_bottom_padding(0) %>%
set_bottom_border(1, everything(), TRUE) %>%
set_caption(cmb_tbl_cap) %>%
set_label("tab:combined")
cmb_tbl| (1) | (2) | (3) | |
|---|---|---|---|
| (Intercept) | 1.998 *** | 1.964 *** | 1.950 *** |
| (0.196) | (0.230) | (0.232) | |
| Race | |||
| Asian | -0.024 | -0.138 | -0.095 |
| (0.389) | (0.469) | (0.475) | |
| Black | -1.033 *** | -0.897 ** | -0.826 ** |
| (0.283) | (0.284) | (0.288) | |
| Latino | -0.975 | -0.901 | -0.857 |
| (0.511) | (0.536) | (0.544) | |
| Multiracial neighborhoods | -1.186 *** | -1.093 ** | -1.063 ** |
| (0.306) | (0.345) | (0.338) | |
| ~x Asian | 0.092 | 0.109 | 0.041 |
| (0.641) | (0.618) | (0.616) | |
| ~x Black | 1.320 * | 1.240 * | 1.113 * |
| (0.531) | (0.596) | (0.546) | |
| ~x Latino | 1.066 | 0.990 | 0.948 |
| (0.717) | (0.704) | (0.714) | |
| Predom. Latinx neighborhoods | -1.516 ** | -1.505 ** | -1.426 *** |
| (0.471) | (0.460) | (0.413) | |
| ~x Asian | -0.216 | -0.127 | -0.288 |
| (0.702) | (0.708) | (0.673) | |
| ~x Black | 1.394 * | 1.369 * | 1.268 * |
| (0.613) | (0.604) | (0.562) | |
| ~x Latino | 1.200 | 1.144 | 1.063 |
| (0.726) | (0.689) | (0.666) | |
| Demographics | |||
| Age | 0.004 | 0.001 | |
| (0.006) | (0.007) | ||
| Foreign Born | 0.162 | 0.189 | |
| (0.287) | (0.296) | ||
| Male | -0.243 | -0.262 | |
| (0.197) | (0.198) | ||
| Children Present | -0.176 | -0.193 | |
| (0.224) | (0.223) | ||
| Married | 0.446 * | 0.364 | |
| (0.216) | (0.217) | ||
| Socioeconomic | |||
| <H.S. | 0.031 | 0.101 | |
| (0.379) | (0.381) | ||
| Some college, no B.A. | -0.104 | -0.087 | |
| (0.322) | (0.324) | ||
| B.A. | 0.125 | 0.140 | |
| (0.321) | (0.322) | ||
| M.A.+ | 0.130 | 0.109 | |
| (0.313) | (0.321) | ||
| Neighborhood perceptions | |||
| Home owner | 0.456 * | ||
| (0.217) | |||
| Years in neighborhood | -0.001 | ||
| (0.009) | |||
| 10-50 blocks | 0.300 | ||
| (0.210) | |||
| >50 blocks | -0.580 | ||
| (0.427) | |||
| N | 2151 | 2151 | 2151 |
| AIC | 1984.405 | 1978.436 | 1959.919 |
| *** p < 0.001; ** p < 0.01; * p < 0.05. | |||
6.2.3 Calculate predicted probabilities and marginal effects from between-data comparison
Calculate the predicted probabilities and standard errors at the intercept of control variables based on the third model in Table 6.2. To provide a conservative estimate, the standard errors represent the maximum standard error across the estimates from the five imputed datasets. The predicted values are plotted in Figure 6.1 and saved in the file images/between-prediction.pdf.
## Get list of control variables from formula (without sample & race)
ctrlvarnames <- strsplit(m3bnC, " \\+ ")[[1]][-1]
## Record display names for neighborhood types
sampnames <- c(
all = "DC area", quad = "Multiracial neighborhoods", latino = "latino"
)
## Create data with predictions of model of combined sample
bnC_pred <- tibble(
raceeth = relevel(factor(rep(racelevs[-1], 3)), ref = "white"),
nhdsmp = factor(rep(c("all", "quad", "latino"), each = 4)),
## Create matrix containing zeros on all control variables
!!!matrix(rep(0, 12 * length(ctrlvarnames)), nrow = 12) %>%
as_tibble(.name_repair = ~ctrlvarnames),
) %>%
mutate(
## Predict across all five imputations
!!!lapply(m3C, prediction, data = ., type = "link") %>%
## Keep only the predictions and standard errors
lapply(select, fitted, se.fitted) %>%
## Combine the five separate data frames of predictions
bind_cols(
.name_repair = ~paste0(
rep(c("fitted", "se"), 5), rep(1:5, each = 2)
)
) %>%
## Summarize across rows of predictions (mean) and SEs (max)
mutate(
yhat = rowMeans(across(.cols = seq(1, 9, 2))),
se = apply(.[, seq(2, 10, 2)], 1, max)
) %>%
select(yhat, se),
## Calculate predicted value in response scale and standard errors
p = plogis(yhat),
uci = plogis(yhat + 1.96 * se),
lci = plogis(yhat - 1.96 * se),
race = ordered(raceeth, levels=racelevs[-1]),
samp = recode_factor(
nhdsmp, !!!c(
all = "DC area",
quad = "Multiracial neighborhoods"
)
)
) %>%
select(raceeth, race, samp, yhat, se, p, uci, lci)
## Plot values
bnC_pred_plt <- ggplot(
filter(bnC_pred, samp !="latino"),
aes(
x = race, y = p, ymin = lci, ymax = uci,
group = samp, color = samp, shape = samp
)
) +
geom_point(position = position_dodge(dodge_wd)) +
geom_linerange(position = position_dodge(dodge_wd)) +
scale_color_manual(values = c("#888888", "#222222")) +
scale_x_discrete(
labels=gsub("^(\\w)", "\\U\\1", racelevs, perl=TRUE)
) +
scale_y_continuous(
limits=c(.45, 1), breaks=seq(.5, 1.0, .1)
) +
labs(
x = NULL,
y = "Probability",
shape = NULL, color = NULL
) +
theme_minimal() +
theme(
legend.position = "bottom",
panel.grid.major.x = element_blank()
)
cap = paste(
"Predicted probability of neighborhood satisfaction among residents of",
"multiracial neighborhoods and the DC area based on model using combined",
"data from the DCAS 2016 and DCAS 2018"
)
ggsave("images/between-prediction.pdf", plot = bnC_pred_plt,
width=6.75, units='in')
Figure 6.1: Predicted probability of neighborhood satisfaction among residents of multiracial neighborhoods and the DC area based on model using combined data from the DCAS 2016 and DCAS 2018
Table 6.3 reports the predicted percentages of residents satisfied with their neighborhoods in all DC-area neighborhoods and in multiracial neighborhoods, as well as the difference between the two probabilities.
bn_pred_tbl <- bnC_pred %>%
select(race, samp, p) %>%
filter(samp != "latino") %>%
pivot_wider(id_cols = race, names_from = samp, values_from = p) %>%
mutate(
across(2:3, ~. * 100),
race = sub("^(\\w)", "\\U\\1", race, perl = TRUE),
Difference = `DC area` - `Multiracial neighborhoods`
) %>%
as_huxtable(digits = 1) %>%
set_caption(paste(
"Predicted percentage of residents satisfied with their neighborhood",
"by race and DCAS sample estimated from model with combined samples"
)) %>%
set_label("tab:bnC")
bn_pred_tbl| race | DC area | Multiracial neighborhoods | Difference |
|---|---|---|---|
| Asian | 86.5 | 69.7 | 16.8 |
| Black | 75.5 | 76.4 | -0.912 |
| Latino | 74.9 | 72.7 | 2.23 |
| White | 87.5 | 70.8 | 16.7 |
From the third model in Table 6.2, I estimated the marginal effects of living in a multiracial neighborhood compared to living in the DC area generally by race. I plotted the mean of the average marginal effects across the five imputations and then the maximum absolute values of the lower and upper confidence levels in Figure 6.2.
## Calculate and combine marginal effects across imputations
# Warning that contrasts are dropped from factor nhdsmp suppressed
mfxC <- lapply(
m3C, margins, variables = "nhdsmp", at = list(raceeth = racelevs[-1]),
design = dcasCsvy$designs[[1]] #Weights are the same across imputations
)
mfxC_all <- mfxC %>%
lapply(summary) %>%
bind_rows() %>%
as_tibble() %>%
filter(!grepl("latino$", factor)) %>%
mutate(
imp = rep(1:5, each = 4),
raceeth = sub("^(\\w)", "\\U\\1", raceeth, perl = TRUE)
)
## Plot marginal effects
mfxC_sum <- mfxC_all %>%
group_by(raceeth) %>%
mutate(
lower = sign(lower) * max(abs(lower)),
upper = sign(upper) * max(abs(upper))
) %>%
summarize(
across(c(AME, lower, upper), mean)
) %>%
ungroup()
mfxC_plt <- ggplot(mfxC_sum,
aes(x = raceeth, y = AME, ymin = lower, ymax = upper)) +
geom_hline(yintercept = 0, color = "#cccccc", size = 1) +
geom_point() +
geom_linerange() +
scale_y_continuous(
breaks = round(seq(-0.3, 0.3, 0.1), 1), limits = c(-0.35, 0.35)
) +
labs(
x = NULL,
y = "Average marginal effect"
) +
theme_minimal() +
theme(
panel.grid.major.x = element_blank()
)
cap = paste("Average marginal effects on satisfaction of living in multiracial",
"neighborhoods compared to all DC-area residents of the same race")mfxC_plt
Figure 6.2: Average marginal effects on satisfaction of living in multiracial neighborhoods compared to all DC-area residents of the same race
ggsave("images/between.pdf", plot = mfxC_plt,
width=6.75, units='in')