Health Outcomes

Data Sources

For health outcomes data, we are interested in the incidence of asthma, melanoma of the skin (skin cancer) and lung cancer across time and space. We obtained age-adjusted incidence rates of asthma related hospitalizations, age-adjusted incidence rates of melanoma of the skin and age-adjusted incidence rates of lung cancer. The rates were per 100,000 people for the three U.S. states New York, Ohio and Pennsylvania over multiple years. The following bullet points provide links from where we obtained the data.

Asthma
Melanoma of the skin (skin cancer)
Lung cancer

We also obtained age-adjusted incidence rates data for these three health outcomes for the U.S. states New York, Ohio and Pennsylvania at the county-level at fixed time points. The following bullet points provide links from where we obtained the data. The asthma data is provided for the year 2016 and the skin cancer and lung cancer data is averaged over the years 2014-2018.

Asthma
Melanoma of the skin (skin cancer) and lung cancer
- All 50 states

Each of these data sets were exported and each imported into R. For each of the three health outcomes, the data sets were merged across the three states for the longitudinal data and for the cross-sectional data. This effort resulted in six data sets. The data import and cleaning of the original data for asthma, melanoma of the skin and lung cancer can be found in our github repository.

We will now import the six data sets and merge all longitudinal data together and merge all cross-sectional data together to have two data sets.

A significant amount of cleaning was required, as can be seen in the code chunks below:

Data Cleaning and Tidying

Load and combine longitudinal state-level datasets.

Lung cancer for NY, PA, and OH:

#state-level lung cancer PA data https://www.phaim1.health.pa.gov/EDD/

pa_cancer = read.csv('./data/lung/pa_state_lung.csv', sep = ";", header = T, skip = 3) %>%
  janitor::clean_names() %>%
  select(year, rate_ratio_result) %>%
  map_df(str_replace, pattern = ",", replacement = ".") %>%
    map_df(as.numeric) %>%
    rename(c("age_adjusted_incidence_rate" = "rate_ratio_result")) %>%
  add_column(state = "PA")

#state-level lung cancer OH data https://publicapps.odh.ohio.gov/EDW/DataBrowser/Browse/StateLayoutLockdownCancers

oh_cancer = read.csv('./data/lung/oh_state_lung.csv', sep = ";") %>%
  janitor::clean_names() %>%
  rename(year = cancer_year_year) %>%
  select(year, age_adjusted_rate) %>%
    map_df(str_replace, pattern = ",", replacement = ".") %>%
    map_df(as.numeric) %>%
    rename(c("age_adjusted_incidence_rate" = "age_adjusted_rate")) %>%
  add_column(state = "OH") %>%
  distinct() %>%
  filter(complete.cases(.))


#state-level lung cancer NY data https://www.health.ny.gov/statistics/cancer/registry/table2/tb2lungnys.htm

ny_cancer = read.csv("./data/lung/ny_state_lung.csv", skip = 2)[ ,1:3] %>%
  janitor::clean_names() %>%
  rename(year = x) %>%
  select(year, rate_per_100_000_population) %>%
  rename(c("age_adjusted_incidence_rate" = "rate_per_100_000_population")) %>%
  add_column(state = "NY")

Melanoma for NY, PA, and OH:

# Read in the data and set the column names
# Add a state column and make all rows = "NY"
# Make the year column of type integer
# Add a cancer type column and make all rows = "melanoma"
# Select the state, year, cancer_type and mf_case_rate columns
# Rename mf_case_rate column "age_adjusted_incidence_rate"
nys_melanoma_age_adjusted <- read_csv(here("data",
                                           "incidence_melanoma_data",
                                           "nys_melanoma_incidence_mortality_year.csv"),
                                      skip = 3,
                                      col_names = c("year",
                                                    "mf_cases",
                                                    "mf_case_rate",
                                                    "mf_case_rate_ci",
                                                    "m_cases",
                                                    "m_case_rate",
                                                    "m_case_rate_ci",
                                                    "f_cases",
                                                    "f_case_rate",
                                                    "f_case_rate_ci",
                                                    "mf_deaths",
                                                    "mf_death_rate",
                                                    "mf_death_rate_ci",
                                                    "m_deaths",
                                                    "m_death_rate",
                                                    "m_death_rate_ci",
                                                    "f_deaths",
                                                    "f_death_rate",
                                                    "f_death_rate_ci"
                                                    )) %>%
  mutate(state = "NY",
         year = as.integer(year),
         cancer_type = "melanoma") %>%
  select(state, year, cancer_type, mf_case_rate) %>%
  rename(age_adjusted_incidence_rate = mf_case_rate)


# Read in data set
ohio_cancer_incidence <- readxl::read_xls(here("data",
                                               "incidence_melanoma_data",
                                               "ohio_cancer_incidence_year.xls"),
                                          skip = 1)
###############################################################################
# Melanoma cases

# Get cases for each year
ohio_cancer_cases <- ohio_cancer_incidence %>%
  select("Site/Type", starts_with("Case"))

# Update column names for the Ohio cancer cases data frame
colnames(ohio_cancer_cases) <- c("type", 1996:2018, "total")

# Get melanoma of the skin cancer cases for Ohio
# Pivot longer year
# Select year and cases variables
ohio_melanoma_cases <- ohio_cancer_cases %>%
  filter(type == "Melanoma of Skin") %>%
  pivot_longer("1996":total,
               names_to = "year",
               values_to = "cases") %>%
  select(year, cases)

###############################################################################
# Melanoma age-adjusted

# Get age-adjusted for each year
ohio_cancer_age_adjusted <- ohio_cancer_incidence %>%
  select("Site/Type", starts_with("Age"))

# Update column names for Ohio cancer age-adjusted data frame
colnames(ohio_cancer_age_adjusted) <- c("type", 1996:2018, "total")

# Get melanoma of the skin cancer age-adjusted for Ohio
# Pivot longer year
# Select year and age_adjusted variables
# Remove total row
# Add a state column and set all rows = "OH"
# Convert type of year column to be integer
# Add a cancer type column and set all rows = "melanoma"
# Convert type of age_adjusted_incidence_rate column to be double
# Reorder columns: state, year, cancer_type, age_adjusted_incidence_rate
ohio_melanoma_age_adjusted <- ohio_cancer_age_adjusted %>%
  filter(type == "Melanoma of Skin") %>%
  pivot_longer("1996":total,
               names_to = "year",
               values_to = "age_adjusted_incidence_rate") %>%
  select(year, age_adjusted_incidence_rate) %>%
  filter(year != "total") %>%
  mutate(state = "OH", 
         year = as.integer(year),
         cancer_type = "melanoma",
         age_adjusted_incidence_rate = as.double(age_adjusted_incidence_rate)) %>%
  select(state, year, cancer_type, age_adjusted_incidence_rate)

# Final data frames: ohio_melanoma_cases and ohio_melanoma_age_adjusted


# Read in data, skip first three lines
# Clean variable names
# Select relevant variables
# Add a state column and set all rows = "PA"
# Convert type of year column to be integer
# Add a cancer_type column and set all rows = "melanoma"
# Rename rate_ratio_result column to "age_adjusted_incidence_rate"
# Reorder columns: state, year, cancer_type, age_adjusted_incidence_rate
# Sort data frame by ascending year
pa_melanoma_age_adjusted <- read_csv(here("data",
                                          "incidence_melanoma_data",
                                          "pa_melanoma_incidence_year.csv"),
                                     skip = 3) %>%
                            janitor::clean_names() %>%
  select(year, rate_ratio_result) %>%
  mutate(state = "PA",
         year = as.integer(year),
         cancer_type = "melanoma") %>%
  rename(age_adjusted_incidence_rate = rate_ratio_result) %>%
  select(state, year, cancer_type, age_adjusted_incidence_rate) %>%
  arrange(year)

Asthma for NY, PA, and OH:

#new york asthma data

ny_asthma <-
  read_csv("./data/asthma_data/NYS_ASTHMA.csv") %>% 
  janitor::clean_names()

# ohio asthma data

oh_asthma <- 
  read_csv("./data/asthma_data/ohio_all.csv") %>% 
  janitor::clean_names()

#pennsylvania asthma data

pa_asthma <-
  read_csv("./data/asthma_data/pa_asthma_all.csv", col_names = TRUE) %>% 
  janitor::clean_names()

#ohio state level data
oh_state <-
  oh_asthma %>% 
  filter(age_group == "All",
         month == "All",
         race == "Total",
         sex == "All",
         geography == "Ohio",
         visit == "Inpatient",
         frequency > 0) %>% 
  rename(state = geography) %>% 
  mutate(rate = rate * 10,
         outcome = "asthma",
         state = "OH") %>% 
  select(year, state, outcome, rate)

#pennsylvania state level data
pa_state <-
  pa_asthma %>% 
  filter(age == "All Ages",
         race == "All Races",
         sex == "Total",
         geography_code == "0",
         rate_adj == "Age-Adjusted Rate") %>% 
  rename(state = geography) %>% 
  mutate(outcome = "asthma",
         state = "PA") %>% 
  select(year, state, outcome, rate)


#new york state level data
ny_state <-
  ny_asthma %>% 
  filter(visit == "Inpatient",
         geography == "New York State",
         age_group == "Total",
         sex == "Total",
         rate_adj == "Age-Adjusted Rate",
         month == "Total",
         str_detect(year, "-20", negate = TRUE)) %>% 
  rename(rate = rate_per_10k) %>% 
  mutate(rate = round(rate * 10, 2),
         state = "NY",
         outcome = "asthma") %>% 
  select(year, state, outcome, rate)


# merged state level asthma
asthma_state <-
  rbind(ny_state, oh_state, pa_state) %>% 
  write_csv("./asthma_state.csv")

Combine the three state-level health outcome datasets into one.

# merging state level data for lung cancer

state_lc <- rbind(pa_cancer, oh_cancer, ny_cancer)

state_lc_wide = state_lc %>%
  pivot_wider(
    names_from = state,
    values_from = age_adjusted_incidence_rate) %>%
  rename(c("PA_AAIR" = "PA", "NY_AAIR" = "NY", "OH_AAIR" = "OH"))

write_csv(state_lc_wide, "./data/state_lc_wide.csv")


# Merge state-level data frames with bind_rows
state_melanoma_age_adjusted <- 
  bind_rows(nys_melanoma_age_adjusted,
            ohio_melanoma_age_adjusted,
            pa_melanoma_age_adjusted)

# read in longitudinal state data for lung and skin cancer

# longitudinal state level lung cancer data
state_lc_data = read_csv("./data/lung/state_lc_data.csv")

# longitudinal state level melanoma data
state_mel_data = read_csv("./data/incidence_melanoma_data/state_melanoma_age_adjusted.csv") %>%
  rename(c("outcome" = "cancer_type"))

# combine cancer outcome data
lc_mel_state = bind_rows(state_lc_data, state_mel_data) %>%
  select(state, year, outcome, age_adjusted_incidence_rate)

# Read in longitudinal state-level asthma data
# Rename and select columns
asthma_state <- read_csv(here::here("data", "asthma_state.csv")) %>%
  rename(age_adjusted_incidence_rate = rate) %>%
  select(state, year, outcome, age_adjusted_incidence_rate)

# Bind rows of longitudinal asthma and longitudinal cancer by state and arrange 
# accordingly
lc_mel_asthma_state <- bind_rows(lc_mel_state, asthma_state) %>%
  arrange(state, year, outcome)

The resulting data set has 234 rows and 4 columns. There are 3 states: New York, Ohio and Pennsylvania. Years span from 1976 to 2019. There are 234 non-missing age-adjusted incidence rates. So, no missing data.

The columns in this data set are:
state: The U.S. state.
year: The year.
outcome: The health outcome.
age_adjusted_incidence_rate: The age-adjusted incidence rate for the health outcome per 100,000.

Lastly, let’s export the combined data.

write_csv(lc_mel_asthma_state, here::here("data", "lc_mel_asthma_state.csv"))

Load and combine cross-sectional county-level datasets.

Let’s begin with a helper function

read_and_clean_county_data <- function(file_path_name, state, outcome) {
  read_csv(file_path_name,
           skip = 8) %>%
  janitor::clean_names() %>%
  select(county, age_adjusted_incidence_rate_rate_note_cases_per_100_000) %>%
  mutate(county = stringr::str_remove(county, "\\(.\\)$"),
          state = state,
         outcome = outcome,
         age_adjusted_incidence_rate =
           as.numeric(age_adjusted_incidence_rate_rate_note_cases_per_100_000)) %>%
  select(state, county, outcome, age_adjusted_incidence_rate)
}

Like before, we load lung cancer data for NY, PA, and OH, only this time is cross-sectional and county-wide:

#county-level lung cancer PA data https://statecancerprofiles.cancer.gov/

pa_county_lc = read_and_clean_county_data('./data/lung/pa_lung_data_county.csv', 
                                          state = "PA",
                                          outcome = "lung cancer") %>%
                                          filter(complete.cases(.)) %>%
                                          slice(-c(1, 2))

#county-level lung cancer OH data https://statecancerprofiles.cancer.gov/

oh_county_lc = read_and_clean_county_data('./data/lung/oh_lung_data_county.csv', 
                                          state = "OH", 
                                          outcome = "lung cancer") %>%
                                          filter(complete.cases(.)) %>%
                                          slice(-c(1, 2))

#county-level lung cancer NY data https://statecancerprofiles.cancer.gov/

ny_county_lc = read_and_clean_county_data('./data/lung/ny_lung_data_county.csv', 
                                          state = "NY", 
                                          outcome = "lung cancer") %>%
                                          filter(complete.cases(.)) %>%
                                          slice(-c(1, 2))

# merge county data
county_lc <- rbind(pa_county_lc, oh_county_lc, ny_county_lc) %>%
   mutate(county = (gsub('.{7}$', '', county)))

Melanoma for NY, PA, and OH:

# Read in New York melanoma incidence county-level data averaged from 2014 to 
# 2018
nys_county_melanoma_incidence_2014_2018 <- 
read_and_clean_county_data(here("data",
                                "incidence_melanoma_data",
                                "nys_county_melanoma_incidence_2014_2018.csv"),
                           "NY",
                           "melanoma")

# Read in Ohio melanoma incidence county-level data averaged from 2014 to 2018
ohio_county_melanoma_incidence_2014_2018 <- 
read_and_clean_county_data(here("data",
                                "incidence_melanoma_data",
                                "ohio_county_melanoma_incidence_2014_2018.csv"),
                           "OH",
                           "melanoma")

# Read in Pennsylvania melanoma incidence county-level data averaged from 
# 2014 to 2018
pa_county_melanoma_incidence_2014_2018 <- 
read_and_clean_county_data(here("data",
                                "incidence_melanoma_data",
                                "pa_county_melanoma_incidence_2014_2018.csv"),
                           "PA",
                           "melanoma")
# merge county data
county_mel <- 
  bind_rows(nys_county_melanoma_incidence_2014_2018,
            ohio_county_melanoma_incidence_2014_2018,
            pa_county_melanoma_incidence_2014_2018)

Asthma for NY, PA, and OH:

# new york county level asthma data

ny_co <-
  ny_asthma %>% 
  filter(age_group == "Total", 
         month == "Total",
         year == "2015-2017",
         visit == "Inpatient",
         rate_adj == "Age-Adjusted Rate",
         geography != "New York State",
         geography != "New York City",
         geography != "NYS excluding NYC") %>% 
  mutate(county = tolower(geography),
         rate = rate_per_10k,
         region = "new york",
         county = replace(county, 
                          county == "new york county", "new york")) %>% 
  select(region, county, rate) %>% 
  group_by(county)


# ohio county level asthma data

oh_co <- 
  oh_asthma %>% 
  filter(geography != "Ohio",
         age_group == "All", 
         visit == "Inpatient") %>% 
  mutate(county = tolower(geography),
         region = "ohio") %>%
  select(region, county, rate) %>% 
  group_by(county)

#pennsylvania county data

pa_co <- 
  pa_asthma %>% 
  filter(geography_code != "0",
         rate_adj == "Age-Adjusted Rate",
         year == "2016",
         age == "All Ages",
         sex == "Total") %>% 
  mutate(county = tolower(geography),
         region = "pennsylvania",
         rate = as.numeric(rate) / 10) %>%
  select(region, county, rate) %>% 
  group_by(county)

# merge county data

county_asth <-
  bind_rows(oh_co, ny_co, pa_co)

Combine the cross-sectional lung and skin cancer data at the county level. We will add a column here for county FIPS code for mapping.

# we need fips county fips for plotting, so lets combine this onto our combined dataset

fips_codes = read_csv("./data/fips_codes.csv")

# cleaning to make the data sets compatible

county_mel = county_mel %>%
  drop_na() 

county_mel <- county_mel[!(county_mel$county == "Ohio" | county_mel$county == "Pennsylvania" | 
     county_mel$county == "New York"), ] %>%
  filter(!grepl('SEER', county))

county_mel$county <- gsub(" County","", county_mel$county)

county_mel$county[county_mel$county == "St Lawrence"] <- "St. Lawrence"

county_lc$county <- gsub(" County","", county_lc$county)

# binding rows

lc_mel_county = bind_rows(county_lc, county_mel) %>%
  filter(complete.cases(.)) 

# Add FIPS column to the data
fips = fips_codes %>%
    janitor::clean_names() %>%
    rename(c("county" = "name"))

# fips df has 'St Lawrence' instead of 'St. Lawrence'
fips$county[fips$county == "St Lawrence"] <- "St. Lawrence"

lc_mel_county = merge(lc_mel_county, fips, by = c("state", "county"))

Finally, we will combine the cross-sectional asthma county data with the cross-sectional cancer data.

# Only get county data for the year 2016 since we need county data at
# a fixed time point (i.e. not longitudinal)
# Adjust column names and types in prep for merge
county_asth %>%
  filter(year == 2016) %>%
  select(-year) %>%
  rename(age_adjusted_incidence_rate = rate) %>%
  mutate(age_adjusted_incidence_rate = as.double(age_adjusted_incidence_rate))

# asthma_county has 'New York County' instead of 'New York' in fips df, and "GAllia" instead of "Gallia"
asthma_county$county[asthma_county$county == "New York County"] <- "New York"
asthma_county$county[asthma_county$county == "GAllia"] <- "Gallia"

asthma_county <- merge(asthma_county, fips, by = c("state", "county"))

# Bind rows of cancer and asthma by county and arrange accordingly
lc_mel_asthma_county <- bind_rows(lc_mel_county, asthma_county) %>%
  arrange(state, county, outcome)

The resulting data set has 647 rows and 5 columns. There are 3 states: New York, Ohio and Pennsylvania.
There are 624 non-missing age-adjusted incidence rates.

The columns in this data set are:
state: The U.S. state.
county: The county in the state.
outcome: The health outcome.
age_adjusted_incidence_rate: The age-adjusted incidence rate for the health outcome per 100,000.
fips: The state-county FIPS code.

The following table displays the number of counties for which we have non-missing age-adjusted incidence rates in each state and the total number of counties in each state

lc_mel_asthma_county %>%
  group_by(state) %>%
  summarize(
    non_missing_county = sum(!is.na(age_adjusted_incidence_rate)),
    total_counties = n()
  ) %>%
  knitr::kable(col.names = c("State", "Non-Missing County", "Total County"))

State	Non-Missing County	Total County
NY	185	185
OH	264	264
PA	175	198

From this table, we see that we are missing 23 age-adjusted incidence rates from Pennsylvania. We have all age-adjusted incidence rates for New York and Ohio.

Lastly, let’s export the combined data.

write_csv(lc_mel_asthma_county, here::here("data", "lc_mel_asthma_county.csv"))

Looking at the “Data Exploration: Map” Shiny.app, we can see that there is the most spread in the asthma incidence rate, and the least spread in the melanoma incidence rates.

Now, that we have combined the health outcomes data, we can explore.

Visualizing Trends in Health Outcomes over Space and Time

We can define a function to generate cross-sectional maps for a given health outcome at the county-level.

# Purpose: Generates a map for the given county-level health outcome data, 
#          outcome and plot title.
# Arguments: df: The data frame, the county-level health outcome data. 
#            outcome_var: a character, the health outcome of interest.
#            plot_title: a character, the plot title.
# Returns: The plotly map.

# lc_mel_asthma_county has 'St. Lawrence' instead of 'St Lawrence' the counties list will need to map
lc_mel_asthma_county$county[lc_mel_asthma_county$county == "St. Lawrence"] <- "St Lawrence"

map_by_outcome <- function(df, outcome_var, plot_title) {
  url <- 'https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json'
  counties <- rjson::fromJSON(file = url)

  df %>%
    filter(outcome == outcome_var) %>%
    plot_ly() %>%
    add_trace(
      type = "choroplethmapbox",
      geojson = counties,
      locations = ~fips,
      z = ~age_adjusted_incidence_rate,
      text = ~county,
      colorscale = "Viridis",
      reversescale = TRUE,
      marker = list(line = list(width = 0),
                    opacity = 0.5)
      ) %>%
    colorbar(title = "Age-Adjusted Incidence Rate") %>%
    layout(
      title = plot_title,
      mapbox = list(
        style = "carto-positron",
        zoom = 4,
        center = list(lon = -77.215135, lat = 41.164818)
        )
      )
}

We can also define a function to generate longitudinal line graphs of the age-adjusted incidence rate of the specified health outcome by year for all three states.

# Purpose: Generate longitudinal line graphs of the age-adjusted incidence rate 
# of the specified health outcome by year for all three states.
# Arguments: df: The data frame, the longitudinal state-level health outcome 
#                data.
#            outcome_var: a character, the health outcome of interest.
#            plot_title: a character, the plot title.
# Returns: The plotly map.
aair_by_year_by_outcome <- function(df, outcome_var, plot_title) {
  df %>%
    filter(outcome == outcome_var) %>%
    pivot_wider(names_from = "state",
                values_from = "age_adjusted_incidence_rate") %>%
    plot_ly(x = ~year) %>%
    add_lines(y = ~NY, name = "New York") %>%
    add_lines(y = ~OH, name = "Ohio") %>%
    add_lines(y = ~PA, name = "Pennsylvania") %>% 
    layout(
      title = plot_title,
      xaxis = list(
        rangeselector = list(buttons = list(list(count = 1,
                                                 label = "1 yr",
                                                 step = "year",
                                                 stepmode = "backward"),
                                            list(count = 5,
                                                 label = "5 yr",
                                                 step = "year",
                                                 stepmode = "backward"),
                                            list(count = 10,
                                                 label = "10 yr",
                                                 step = "year",
                                                 stepmode = "backward"),
                                            list(step = "all"))),
        rangeslider = list(type = "year"),
        title = "Year"),
      yaxis = list(title = "Age-Adjusted Incidence Rate")
  )  
}

Now, let’s plot the cross-sectional map for each health outcome at the county-level followed by the longitudinal line graph of the age-adjusted incidence rate of the health outcome by year for all three states.

Asthma:

map_by_outcome(lc_mel_asthma_county, 
               "asthma", 
               "Asthma Related Hospitalizations Age-Adjusted Incidence Rates (2016)")

aair_by_year_by_outcome(lc_mel_asthma_state, 
                        "asthma",  
                        "Asthma Age-Adjusted Incidence Rates")

The age-adjusted incidence rates of asthma related hospitalizations, at the county-level, appears relatively higher in major cities in each of the three states, like New York City, Philadelphia and Cleveland, in 2016.

Over time, age-adjusted incidence rates of asthma related hospitalizations have fallen for all three states.

Melanoma of the Skin:

map_by_outcome(lc_mel_asthma_county, 
               "melanoma", 
               "Melanoma of the Skin Age-Adjusted Incidence Rates (2014-2018)")

aair_by_year_by_outcome(lc_mel_asthma_state, 
                        "melanoma", 
                        "Melanoma of the Skin Age-Adjusted Incidence Rates")

The average age-adjusted incidence rates of melanoma of the skin from 2014 to 2018, at the county-level, appears randomly varied across the three states with all states having counties with relatively lower and higher average age-adjusted incidence rates of melanoma of the skin.

Over time, average age-adjusted incidence rates of melanoma of the skin have increased for all three states.

Lung Cancer:

map_by_outcome(lc_mel_asthma_county, 
               "lung cancer", 
               "Lung Cancer Age-Adjusted Incidence Rates (2014-2018)")

aair_by_year_by_outcome(lc_mel_asthma_state, 
                        "lung cancer", 
                        "Lung Cancer Age-Adjusted Incidence Rates")

Relatively high average age-adjusted incidence rates of lung cancer from 2014 to 2018, at the county-level, occur most frequently in Ohio, followed by New York, followed by Pennsylvania.

Average age-adjusted incidence rates of lung cancer roughly increased for all three states until 1998 and have fallen since.

Looking at the three longitudinal line graphs of the age-adjusted incidence rates for the health outcomes by year, while there are some differences in the age-adjusted incidence rates, all three states generally follow the same trend for each health outcome. Perhaps, this consistency in trend can be explained by federal government policy relating to these health outcomes.

Outcome case study of age, race, and gender

Given the limitations of obtaining publicly available data on health outcomes across states and the inconsistencies among the type of data available, it was not possible to assess important contributing factors to the health outcomes explored here. However, for the asthma data available for Pennsylvania it was possible to visualize trends over time when stratifying for demographic variables like age, gender, and race. This was explored below for hospitalizations due to asthma from 2000 until 2019.

pa_asthma <-
  read_csv("./data/asthma_data/pa_asthma_all.csv", col_names = TRUE) %>%
  janitor::clean_names()

# age-adjusted incidence rates of asthma related hospitalizations in PA by year
# plot
pa_asthma_plot <-
  pa_asthma %>% 
  filter(race == "All Races", 
         age == "All Ages",
         sex == "Total",
         geography == "Pennsylvania") %>%
  mutate(rate = as.numeric(rate),
         lb = as.numeric(lb),
         year = as.numeric(year),
         ub = as.numeric(ub)) %>% 
  ggplot(aes(x = year, y = rate)) + 
  geom_point() +
  geom_smooth(method = "loess", se = ) +
  geom_errorbar(aes(ymin = lb, ymax = ub)) +
  labs(title = "Hospitalizations due to asthma in PA, 2000-2019", 
       x ="Year", 
       y = "Rate\n(age-adjusted per 100,000)") +
  theme(plot.title = element_text(hjust = 0.5))

# age-adjusted incidence rates of asthma related hospitalizations in PA by year
# stratified by gender and race plot
race_asthma <-
  pa_asthma %>% 
  filter(geography_code == "0",
         sex != "Total",
         age == "All Ages") %>%
  mutate(rate = as.numeric(rate),
         lb = as.numeric(lb),
         year = as.numeric(year),
         ub = as.numeric(ub)) %>%
  ggplot(aes(x = year, y = rate, color = race)) +
  geom_point() +
  geom_smooth(method = "loess", se = ) +
  geom_errorbar(aes(ymin = lb, ymax = ub)) +
  facet_grid(sex ~ .) + 
  theme(legend.position="bottom") + 
  labs(title = "Stratified by gender and race", 
       x ="Year", y = "Rate \n(age-adjusted per 100,000)") +
  scale_color_viridis_d(name = "Race") +
  theme(plot.title = element_text(hjust = 0.5))

# age-adjusted incidence rates of asthma related hospitalizations in PA by year
# stratified by gender and age plot
age_asthma <-
  pa_asthma %>% 
  filter(geography_code == "0",
         sex != "Total",
         race == "All Races") %>%
  mutate(rate = as.numeric(rate),
         year = as.numeric(year),
         lb = as.numeric(lb),
         ub = as.numeric(ub)) %>% 
  ggplot(aes(x = year, y = rate, color = age)) +
  geom_point() +
  geom_smooth(method = "loess", se = ) +
  geom_errorbar(aes(ymin = lb, ymax = ub)) +
  facet_grid(sex ~ .) + 
  theme(legend.position="bottom") +
  labs(title = "Stratified by gender and age", 
       x ="Year", y = "Rate \n(not age-adjusted per 100,000)") +
  scale_color_viridis_d(name = "Age Group") +
  theme(plot.title = element_text(hjust = 0.5))

# Plots
pa_asthma_plot

race_asthma

age_asthma