The KPHD package is a comprehensive toolkit designed by and for epidemiologists at Kitsap Public Health District. It provides visualization tools following KPHD style guidelines alongside practical epidemiological functions to streamline public health data analysis, reporting, and surveillance workflows.
- KPHD Style Guide: Brand-compliant themes, color palettes, and typography
- Epidemiological Tools: Pending
- Data Privacy: Primary and secondary suppression for protecting small cell counts
- Surveillance Workflows: Pending
- Spatial Analysis: Pending
- Reproducible Reports: Pending
Install the development version from GitHub:
# install.packages("devtools")
devtools::install_github("KPHD-Assessment-Epidemiology/KPHD")The package requires the following packages: - ggplot2 - for
plotting - showtext and sysfonts - for Poppins font support -
grDevices - for color interpolation
library(KPHD)
library(ggplot2)
# Setup KPHD theme
setup_kphd_fonts()
set_kphd_theme()
# Visualization example
ggplot(mtcars, aes(x = wt, y = mpg, color = factor(cyl))) +
geom_point(size = 3) +
scale_color_kphd() +
labs(
title = "Fuel Economy by Weight",
subtitle = "Using KPHD theme and colors",
x = "Weight (1000 lbs)",
y = "Miles per Gallon",
color = "Cylinders"
)
show_kphd_palettes()- primary: Main KPHD brand colors (colorblind-friendly)
- sequential: Marine blue gradients for continuous data
- diverging: Purple to marine blue through neutral
- categorical: Extended palette for multiple categories
- colorblind_safe: Colorblind-accessible colors
- high_contrast: Maximum accessibility palette
# Get specific palette
colors <- get_kphd_colors("primary")
# Get marine blue (primary brand color)
marine_blue <- get_kphd_colors("marine_blue")
# Get interpolated colors
colors_10 <- get_kphd_colors("sequential", n = 10)
# Use in ggplot2
ggplot(data, aes(x = x, y = y, fill = category)) +
geom_col() +
scale_fill_kphd(palette = "primary")
# Continuous scales
ggplot(data, aes(x = x, y = y, fill = value)) +
geom_tile() +
scale_fill_kphd_c(palette = "sequential")# Set as default for all plots in session
setup_kphd_fonts()
set_kphd_theme()
# All subsequent plots use KPHD theme
ggplot(data, aes(x, y)) + geom_point()# Use without setting as default
ggplot(data, aes(x, y)) +
geom_point() +
theme_kphd(base_size = 14)
# Without Poppins font
ggplot(data, aes(x, y)) +
geom_point() +
theme_kphd(base_family = "sans")The kphd_suppress() function implements primary and secondary
suppression to protect privacy in public health data.
# Example data with small counts
health_data <- data.frame(
region = c("North", "South", "East", "West", "Central"),
year = rep(2023, 5),
cases = c(5, 150, 8, 200, 12)
)
# Apply suppression (threshold = 10 by default)
suppressed_data <- kphd_suppress(
health_data,
count_var = "cases",
group_vars = "year",
threshold = 10
)
# View results
print(suppressed_data)
# region year cases suppression_flag
# 1 North 2023 NA primary
# 2 South 2023 150 none
# 3 East 2023 NA primary
# 4 West 2023 200 none
# 5 Central 2023 NA secondarykphd_suppression_summary(health_data, suppressed_data, "cases")
# === KPHD Data Suppression Summary ===
#
# Total rows: 5
# Rows suppressed: 3 ( 60 %)
# - Primary suppression: 2
# - Secondary suppression: 1# Multiple grouping variables
health_data2 <- data.frame(
region = rep(c("North", "South"), each = 3),
age_group = rep(c("0-17", "18-64", "65+"), 2),
cases = c(8, 45, 3, 150, 7, 89)
)
suppressed <- kphd_suppress(
health_data2,
count_var = "cases",
group_vars = c("region", "age_group"),
threshold = 10,
replace_with = NA # or use -999, 0, etc.
)
# Without suppression flags (cleaner output)
suppressed_clean <- kphd_suppress(
health_data2,
count_var = "cases",
group_vars = c("region", "age_group"),
flag_suppressed = FALSE
)library(KPHD)
library(ggplot2)
library(dplyr)
# Setup
setup_kphd_fonts()
set_kphd_theme()
# Outbreak data
outbreak_data <- data.frame(
onset_date = seq(as.Date("2024-01-01"), as.Date("2024-02-15"), by = "day"),
cases = c(0, 0, 1, 2, 3, 5, 8, 12, 15, 18, 22, 25, 20, 18, 15,
12, 10, 8, 6, 5, 4, 3, 2, 2, 1, 1, 0, 0, 0, 0,
rep(0, 16))
)
# Create epidemic curve
ggplot(outbreak_data, aes(x = onset_date, y = cases)) +
geom_col(fill = get_kphd_colors("marine_blue")) +
scale_x_date(date_breaks = "1 week", date_labels = "%b %d") +
labs(
title = "Foodborne Illness Outbreak - Epidemic Curve",
subtitle = "Cases by date of symptom onset",
x = "Date of Onset",
y = "Number of Cases",
caption = "Data: KPHD Disease Investigation | Peak: January 11, 2024"
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Calculate key metrics
peak_date <- outbreak_data$onset_date[which.max(outbreak_data$cases)]
total_cases <- sum(outbreak_data$cases)
attack_rate <- (total_cases / 500) * 100 # If exposed population = 500
cat("\nOutbreak Summary:\n")
cat("Total cases:", total_cases, "\n")
cat("Peak date:", format(peak_date, "%B %d, %Y"), "\n")
cat("Attack rate:", round(attack_rate, 1), "%\n")# Disease surveillance data by county
disease_data <- data.frame(
county = c("Kitsap", "Jefferson", "Clallam", "Mason", "Thurston"),
cases = c(145, 8, 67, 12, 234),
population = c(275000, 32000, 78000, 65000, 295000),
year = rep(2023, 5)
)
# Calculate rates per 100,000
disease_data <- disease_data %>%
mutate(
rate = (cases / population) * 100000,
rate_lower = # Calculate 95% CI lower bound
(qchisq(0.025, 2 * cases) / 2 / population) * 100000,
rate_upper = # Calculate 95% CI upper bound
(qchisq(0.975, 2 * (cases + 1)) / 2 / population) * 100000
)
# Apply suppression
disease_data_suppressed <- kphd_suppress(
disease_data,
count_var = "cases",
threshold = 10
)
# Visualize (non-suppressed only)
disease_data_plot <- disease_data_suppressed %>%
filter(suppression_flag == "none")
ggplot(disease_data_plot, aes(x = reorder(county, rate), y = rate)) +
geom_col(fill = get_kphd_colors("marine_blue")) +
geom_errorbar(
aes(ymin = rate_lower, ymax = rate_upper),
width = 0.3,
color = get_kphd_colors("additional_3")
) +
coord_flip() +
labs(
title = "Disease Rates by County, 2023",
subtitle = "Rate per 100,000 population with 95% confidence intervals",
x = NULL,
y = "Rate per 100,000",
caption = "* Counties with <10 cases suppressed for privacy"
)# Age group analysis
age_data <- data.frame(
age_group = c("0-4", "5-17", "18-24", "25-44", "45-64", "65-74", "75+"),
cases = c(12, 45, 38, 156, 234, 189, 98),
population = c(15000, 45000, 35000, 85000, 75000, 35000, 25000)
) %>%
mutate(
rate = (cases / population) * 100000,
percent = (cases / sum(cases)) * 100
)
# Create population pyramid-style comparison
ggplot(age_data, aes(x = age_group, y = rate)) +
geom_col(fill = get_kphd_colors("marine_blue")) +
geom_text(
aes(label = sprintf("%.1f", rate)),
hjust = -0.2,
color = get_kphd_colors("marine_blue"),
size = 3.5
) +
coord_flip() +
scale_y_continuous(expand = expansion(mult = c(0, 0.15))) +
labs(
title = "Disease Rate by Age Group",
subtitle = "Cases per 100,000 population, 2023",
x = "Age Group",
y = "Rate per 100,000"
)# Monthly case data over 3 years
monthly_data <- data.frame(
month = seq(as.Date("2021-01-01"), as.Date("2023-12-01"), by = "month"),
cases = c(
# 2021
45, 52, 48, 55, 62, 58, 65, 72, 68, 55, 48, 42,
# 2022
38, 42, 45, 52, 58, 65, 72, 78, 85, 92, 88, 82,
# 2023
75, 72, 68, 65, 70, 75, 82, 88, 92, 95, 90, 85
)
) %>%
mutate(
# Calculate 3-month moving average
ma_3month = zoo::rollmean(cases, k = 3, fill = NA, align = "right")
)
# Visualize trend
ggplot(monthly_data, aes(x = month)) +
geom_line(
aes(y = cases, color = "Monthly Cases"),
linewidth = 0.8,
alpha = 0.5
) +
geom_line(
aes(y = ma_3month, color = "3-Month Moving Average"),
linewidth = 1.2
) +
scale_color_manual(
values = c(
"Monthly Cases" = get_kphd_colors("secondary"),
"3-Month Moving Average" = get_kphd_colors("marine_blue")
)
) +
scale_x_date(date_breaks = "3 months", date_labels = "%b %Y") +
labs(
title = "Disease Trend Analysis, 2021-2023",
subtitle = "Monthly cases with 3-month moving average",
x = "Month",
y = "Number of Cases",
color = NULL
) +
theme(
legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1)
)# Regional surveillance data
regional_data <- expand.grid(
region = c("North", "South", "East", "West"),
year = 2019:2023
) %>%
mutate(
cases = c(
# North
45, 52, 48, 8, 55,
# South
78, 85, 92, 88, 95,
# East
5, 4, 6, 3, 7,
# West
65, 72, 68, 70, 75
),
population = rep(c(50000, 85000, 15000, 70000), each = 5),
rate = (cases / population) * 100000
)
# Apply suppression by region-year
regional_suppressed <- kphd_suppress(
regional_data,
count_var = "cases",
group_vars = c("region", "year"),
threshold = 10
)
# Visualize trends (non-suppressed only)
regional_plot <- regional_suppressed %>%
filter(suppression_flag == "none")
ggplot(regional_plot, aes(x = year, y = rate, color = region, group = region)) +
geom_line(linewidth = 1.2) +
geom_point(size = 3) +
scale_color_kphd(palette = "primary") +
labs(
title = "Regional Disease Rate Trends",
subtitle = "Rates per 100,000 population (suppressed cells excluded)",
x = "Year",
y = "Rate per 100,000",
color = "Region",
caption = "Note: Some data points suppressed for privacy (n<10)"
) +
theme(legend.position = "bottom")
# Summary statistics
kphd_suppression_summary(regional_data, regional_suppressed, "cases")theme_kphd()- KPHD ggplot2 themeset_kphd_theme()- Set KPHD theme as defaultsetup_kphd_fonts()- Load Poppins font from Google Fontsget_kphd_colors()- Get KPHD color paletteshow_kphd_palettes()- Display all available palettesscale_color_kphd()- Color scale for discrete variablesscale_fill_kphd()- Fill scale for discrete variablesscale_color_kphd_c()- Color scale for continuous variablesscale_fill_kphd_c()- Fill scale for continuous variables
kphd_suppress()- Apply primary and secondary suppressionkphd_suppression_summary()- Generate suppression report
kphd_clean_dates()- Standardize date formatskphd_validate_ages()- Validate and categorize age datakphd_geocode()- Geocode addresses for mapping
kphd_table_one()- Generate demographic tableskphd_crosstab()- Create publication-ready crosstabskphd_summary_stats()- Calculate common summary statisticskphd_export_table()- Export tables to Word/Excel with formatting
| Color Name | Hex Code | RGB | Usage |
|---|---|---|---|
| Marine Blue | #004960 |
0, 74, 97 | Primary brand color |
| Secondary | #54b4b7 |
85, 181, 183 | Secondary accent |
| Background | #fbf0d7 |
252, 240, 216 | Light backgrounds |
| Additional 1 | #245c85 |
40, 99, 145 | Data visualization |
| Additional 2 | #DAFF00 |
218, 255, 0 | High contrast |
| Additional 3 | #56215e |
87, 33, 94 | Purple accent |
This package is under active development. Current status:
- Core theme functionality with Poppins font
- Comprehensive color palette system
- Data suppression functions (primary & secondary)
- Basic documentation and examples
- Unit tests for existing functions
- Colorblind-safe palette options
- Geographic data cleaning and validation
- Choropleth map templates
- Spatial clustering detection (SaTScan integration)
- Distance-based calculations
- ZIP code to census tract crosswalks
- Date standardization and validation
- Age calculation and categorization
- Record deduplication algorithms
- Data quality checks and reports
- Table One generators (demographic tables)
- Publication-ready crosstabs
- Automated summary statistics
- R Markdown templates (reports, briefs, presentations)
- Export utilities (Word, Excel, PowerPoint with KPHD branding)
# Clone and setup
devtools::load_all()
devtools::document()
devtools::test()
devtools::check()- Email: trey.evans@kitsappublichealth.org
- Bug reports: GitHub Issues
- Questions: GitHub Discussions
If you use this package in your work, please cite:
Kitsap Public Health District (2025). KPHD: Epidemiological Tools and Style
Guide for Public Health Analysis. R package version 0.1.0.
https://github.com/KPHD-Assessment-Epidemiology/KPHD
BibTeX:
@Manual{,
title = {KPHD: Epidemiological Tools and Style Guide for Public Health Analysis},
author = {{Kitsap Public Health District}},
year = {2025},
note = {R package version 0.1.0},
url = {https://github.com/KPHD-Assessment-Epidemiology/KPHD},
}This package is licensed under the MIT License. See LICENSE file for
details.
- Developed by epidemiologists at Kitsap Public Health District
- Color palette design based on KPHD Style Guide
- Colorblind-safe palettes informed by the work of the paletter gallery
- Built with ggplot2, dplyr, and the tidyverse
- Font support via showtext and sysfonts
Kitsap Public Health District
