font_add_google(name = "Roboto Mono", family = "Roboto Mono")
font <- "Roboto Mono"

showtext_auto()
showtext_opts(dpi = 320)

options(scipen = 999) 
grants <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-10-03/grants.csv')
## Rows: 74669 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (10): opportunity_number, opportunity_title, agency_code, agency_name, ...
## dbl   (2): opportunity_id, estimated_funding
## dttm  (1): last_updated_date_time
## date  (4): estimated_post_date, estimated_application_due_date, posted_date,...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
grant_opportunity_details <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-10-03/grant_opportunity_details.csv')
## Rows: 2000 Columns: 68
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (11): funding_opportunity_number, funding_opportunity_title, opportunit...
## dbl   (6): opportunity_id, expected_number_of_awards, version, estimated_tot...
## lgl  (46): cost_sharing_or_matching_requirement, eligibility_individuals, el...
## date  (5): posted_date, last_updated_date, original_closing_date_for_applica...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_1 <- grants |>
   unnest_tokens(word, opportunity_title) |>
  filter(grepl("*ethic*|data|dataset|database|*statistic*",word, ignore.case = TRUE)) |>
  rename(grant_word = word)
df_2 <- grants |>
   unnest_tokens(word, agency_name) |>
  rename(agency_word = word) |>
  filter(agency_word %in% c("data", "statistics", "statistical"))
final_df <- df_1 %>%
  full_join(df_2, by = "opportunity_id") |>
  drop_na(posted_date.x) |>
  mutate(posted_year = year(posted_date.x)) |>
  select(opportunity_id, grant_word, posted_year) |>
  group_by(opportunity_id) |>
  slice_head(n = 1) |>
  ungroup() |>
  mutate(word_type = case_when(
    str_detect(grant_word, "data") ~ "Data",
    str_detect(grant_word, "ethic") ~ "Ethics",
    TRUE ~ "Statistics")) |>
  group_by(posted_year, word_type) |>
  count()
  
final_df$word_type <- factor(final_df$word_type, levels = c("Data", "Statistics", "Ethics"))
final_df$index <- 1:nrow(final_df)
gg <- final_df |>
  ggplot(aes(x = posted_year, y = n))+
  geom_line(size = .7, aes(color = word_type))+
  geom_point_interactive(size = 1, aes(color = word_type, fill = word_type, tooltip = n, data_id = index))+
  scale_x_continuous(breaks = seq(2005,2023,1))+
  scale_y_continuous(breaks = seq(0,200,10))+
  scale_color_manual(values = park_palette("voyageurs"))+
  labs(x = "Posted Year", y = "Number of Posted Grants")+
  guides(color = guide_legend(title = "Keyword Included\nin Grant Title:"), fill = FALSE)+
  theme_minimal()+
  theme(text = element_text(size = 7, family = font, color = "#000000"),
    plot.background = element_rect(fill = "#FFFFFF"))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
girafe(ggobj = gg,
        options = list(opts_tooltip(use_fill = TRUE, opacity = 1)))