From 5299a68df66a9da84dce00770d6b8a03b7b5aaf3 Mon Sep 17 00:00:00 2001 From: bagusramadan Date: Fri, 22 May 2026 18:59:39 +0700 Subject: [PATCH 1/8] feat: add aws athena module --- modules/aws-athena/CHANGELOG.md | 8 ++ modules/aws-athena/README.md | 128 ++++++++++++++++++++++++++++++++ modules/aws-athena/main.tf | 37 +++++++++ modules/aws-athena/outputs.tf | 19 +++++ modules/aws-athena/variables.tf | 95 ++++++++++++++++++++++++ modules/aws-athena/versions.tf | 10 +++ 6 files changed, 297 insertions(+) create mode 100644 modules/aws-athena/CHANGELOG.md create mode 100644 modules/aws-athena/README.md create mode 100644 modules/aws-athena/main.tf create mode 100644 modules/aws-athena/outputs.tf create mode 100644 modules/aws-athena/variables.tf create mode 100644 modules/aws-athena/versions.tf diff --git a/modules/aws-athena/CHANGELOG.md b/modules/aws-athena/CHANGELOG.md new file mode 100644 index 0000000..f633f8f --- /dev/null +++ b/modules/aws-athena/CHANGELOG.md @@ -0,0 +1,8 @@ +# Changelog + +## 0.1.0 (2026-05-22) + + +### Features + +* add aws-athena module with workgroup and named query support diff --git a/modules/aws-athena/README.md b/modules/aws-athena/README.md new file mode 100644 index 0000000..3a47292 --- /dev/null +++ b/modules/aws-athena/README.md @@ -0,0 +1,128 @@ +# aws-athena + +Terraform module to create AWS Athena resources including a workgroup with configurable result location, encryption, and optional saved (named) queries. + +Supported resources: + +- Athena workgroup (`aws_athena_workgroup`) +- Athena named queries (`aws_athena_named_query`) + +## Terraform versions + +Terraform >= 1.3. Pin module version to an exact tag when referencing from Terragrunt or other modules. + +## Usage + +### Minimal — workgroup only + +```hcl +module "athena" { + source = "git::git@github.com:kloia/platform-modules.git//modules/aws-athena?ref=aws-athena-v0.1.0" + + workgroup_name = "analytics" + results_output_location = "s3://myapp-athena-results/queries/" + + tags = { + Environment = "prod" + ManagedBy = "terraform" + } +} +``` + +### Full — workgroup with encryption and named queries + +```hcl +module "athena" { + source = "git::git@github.com:kloia/platform-modules.git//modules/aws-athena?ref=aws-athena-v0.1.0" + + workgroup_name = "analytics" + workgroup_description = "Workgroup for myapp analytics queries" + results_output_location = "s3://myapp-athena-results/queries/" + + enforce_workgroup_configuration = true + publish_cloudwatch_metrics_enabled = true + bytes_scanned_cutoff_per_query = 10737418240 # 10 GB + + encryption_option = "SSE_S3" + + named_queries = { + daily_active_users = { + database = "myapp_data_lake" + description = "Count distinct active users per day" + query = "SELECT DATE(event_time) AS day, COUNT(DISTINCT user_id) AS dau FROM myapp_data_lake.events WHERE event_type = 'session_start' GROUP BY 1 ORDER BY 1 DESC" + } + revenue_by_month = { + database = "myapp_data_lake" + description = "Monthly revenue summary" + query = "SELECT DATE_TRUNC('month', created_at) AS month, SUM(amount) AS revenue FROM myapp_data_lake.orders GROUP BY 1 ORDER BY 1 DESC" + } + } + + tags = { + Environment = "prod" + ManagedBy = "terraform" + Project = "myapp" + Owner = "data-platform" + } +} +``` + +### Import an existing workgroup + +If the workgroup already exists in AWS (e.g., the default `primary` workgroup), import it before the first apply: + +```bash +terraform import module.athena.aws_athena_workgroup.this[0] +``` + + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 1.3 | +| [aws](#requirement\_aws) | >= 5.0 | + +## Providers + +| Name | Version | +|------|---------| +| [aws](#provider\_aws) | >= 5.0 | + +## Modules + +No modules. + +## Resources + +| Name | Type | +|------|------| +| [aws_athena_workgroup.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/athena_workgroup) | resource | +| [aws_athena_named_query.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/athena_named_query) | resource | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [workgroup\_name](#input\_workgroup\_name) | Name of the Athena workgroup. | `string` | n/a | yes | +| [results\_output\_location](#input\_results\_output\_location) | S3 URL for query results (e.g., `s3://my-bucket/prefix/`). | `string` | n/a | yes | +| [create\_workgroup](#input\_create\_workgroup) | Whether to create the Athena workgroup. | `bool` | `true` | no | +| [workgroup\_description](#input\_workgroup\_description) | Description of the Athena workgroup. | `string` | `null` | no | +| [workgroup\_state](#input\_workgroup\_state) | State of the workgroup: `ENABLED` or `DISABLED`. | `string` | `"ENABLED"` | no | +| [enforce\_workgroup\_configuration](#input\_enforce\_workgroup\_configuration) | Enforce workgroup result location and encryption settings for all queries. | `bool` | `true` | no | +| [publish\_cloudwatch\_metrics\_enabled](#input\_publish\_cloudwatch\_metrics\_enabled) | Publish workgroup-level query metrics to CloudWatch. | `bool` | `true` | no | +| [bytes\_scanned\_cutoff\_per\_query](#input\_bytes\_scanned\_cutoff\_per\_query) | Maximum bytes scanned per query. Queries exceeding this are cancelled. `null` disables the limit. | `number` | `null` | no | +| [encryption\_option](#input\_encryption\_option) | Encryption option for query results: `SSE_S3`, `SSE_KMS`, or `CSE_KMS`. `null` disables encryption configuration. | `string` | `null` | no | +| [kms\_key\_arn](#input\_kms\_key\_arn) | KMS key ARN. Required when `encryption_option` is `SSE_KMS` or `CSE_KMS`. | `string` | `null` | no | +| [named\_queries](#input\_named\_queries) | Map of named (saved) Athena queries. Key is the query name. Each value: `database` (string), `query` (string), `description` (optional string). | `map(object({...}))` | `{}` | no | +| [tags](#input\_tags) | Map of tags to apply to all resources. | `map(string)` | `{}` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| [workgroup\_id](#output\_workgroup\_id) | ID of the Athena workgroup. | +| [workgroup\_arn](#output\_workgroup\_arn) | ARN of the Athena workgroup. | +| [workgroup\_name](#output\_workgroup\_name) | Name of the Athena workgroup. | +| [named\_query\_ids](#output\_named\_query\_ids) | Map of named query name to its ID. | + diff --git a/modules/aws-athena/main.tf b/modules/aws-athena/main.tf new file mode 100644 index 0000000..70404df --- /dev/null +++ b/modules/aws-athena/main.tf @@ -0,0 +1,37 @@ +resource "aws_athena_workgroup" "this" { + count = var.create_workgroup ? 1 : 0 + + name = var.workgroup_name + description = var.workgroup_description + state = var.workgroup_state + + configuration { + enforce_workgroup_configuration = var.enforce_workgroup_configuration + publish_cloudwatch_metrics_enabled = var.publish_cloudwatch_metrics_enabled + bytes_scanned_cutoff_per_query = var.bytes_scanned_cutoff_per_query + + result_configuration { + output_location = var.results_output_location + + dynamic "encryption_configuration" { + for_each = var.encryption_option != null ? [1] : [] + content { + encryption_option = var.encryption_option + kms_key_arn = var.kms_key_arn + } + } + } + } + + tags = var.tags +} + +resource "aws_athena_named_query" "this" { + for_each = var.named_queries + + name = each.key + workgroup = var.create_workgroup ? aws_athena_workgroup.this[0].name : var.workgroup_name + database = each.value.database + query = each.value.query + description = try(each.value.description, null) +} diff --git a/modules/aws-athena/outputs.tf b/modules/aws-athena/outputs.tf new file mode 100644 index 0000000..eeba781 --- /dev/null +++ b/modules/aws-athena/outputs.tf @@ -0,0 +1,19 @@ +output "workgroup_id" { + description = "ID of the Athena workgroup." + value = var.create_workgroup ? aws_athena_workgroup.this[0].id : null +} + +output "workgroup_arn" { + description = "ARN of the Athena workgroup." + value = var.create_workgroup ? aws_athena_workgroup.this[0].arn : null +} + +output "workgroup_name" { + description = "Name of the Athena workgroup." + value = var.create_workgroup ? aws_athena_workgroup.this[0].name : var.workgroup_name +} + +output "named_query_ids" { + description = "Map of named query name to its ID." + value = { for k, v in aws_athena_named_query.this : k => v.id } +} diff --git a/modules/aws-athena/variables.tf b/modules/aws-athena/variables.tf new file mode 100644 index 0000000..a7c73bf --- /dev/null +++ b/modules/aws-athena/variables.tf @@ -0,0 +1,95 @@ +################################################################################ +# Athena Workgroup +################################################################################ + +variable "create_workgroup" { + description = "Whether to create the Athena workgroup." + type = bool + default = true +} + +variable "workgroup_name" { + description = "Name of the Athena workgroup." + type = string +} + +variable "workgroup_description" { + description = "Description of the Athena workgroup." + type = string + default = null +} + +variable "workgroup_state" { + description = "State of the workgroup. ENABLED or DISABLED." + type = string + default = "ENABLED" + + validation { + condition = contains(["ENABLED", "DISABLED"], var.workgroup_state) + error_message = "workgroup_state must be ENABLED or DISABLED." + } +} + +variable "enforce_workgroup_configuration" { + description = "Enforce workgroup result location and encryption settings for all queries." + type = bool + default = true +} + +variable "publish_cloudwatch_metrics_enabled" { + description = "Publish workgroup-level query metrics to CloudWatch." + type = bool + default = true +} + +variable "bytes_scanned_cutoff_per_query" { + description = "Maximum bytes scanned per query. Queries exceeding this are cancelled. null disables the limit." + type = number + default = null +} + +variable "results_output_location" { + description = "S3 URL for query results (e.g., s3://my-bucket/prefix/)." + type = string +} + +variable "encryption_option" { + description = "Encryption option for query results: SSE_S3, SSE_KMS, or CSE_KMS. null disables encryption configuration." + type = string + default = null + + validation { + condition = var.encryption_option == null || contains(["SSE_S3", "SSE_KMS", "CSE_KMS"], var.encryption_option) + error_message = "encryption_option must be SSE_S3, SSE_KMS, CSE_KMS, or null." + } +} + +variable "kms_key_arn" { + description = "KMS key ARN for SSE_KMS or CSE_KMS encryption. Required when encryption_option is SSE_KMS or CSE_KMS." + type = string + default = null +} + +################################################################################ +# Named Queries +################################################################################ + +variable "named_queries" { + description = "Map of named (saved) Athena queries. Key is the query name. Each value: database (string), query (string), description (optional string)." + type = map(object({ + database = string + query = string + description = optional(string) + })) + default = {} +} + +################################################################################ +# Common +################################################################################ + +variable "tags" { + description = "Map of tags to apply to all resources." + type = map(string) + default = {} +} diff --git a/modules/aws-athena/versions.tf b/modules/aws-athena/versions.tf new file mode 100644 index 0000000..29ec41d --- /dev/null +++ b/modules/aws-athena/versions.tf @@ -0,0 +1,10 @@ +terraform { + required_version = ">= 1.3" + + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 5.0" + } + } +} From 14ceceec1171dc4f6bed4b8930b58f0b2adcee8f Mon Sep 17 00:00:00 2001 From: bagusramadan Date: Fri, 22 May 2026 18:59:49 +0700 Subject: [PATCH 2/8] feat: add aws glue module --- modules/aws-glue/CHANGELOG.md | 8 ++ modules/aws-glue/README.md | 151 ++++++++++++++++++++++++++++++++++ modules/aws-glue/main.tf | 36 ++++++++ modules/aws-glue/outputs.tf | 24 ++++++ modules/aws-glue/variables.tf | 130 +++++++++++++++++++++++++++++ modules/aws-glue/versions.tf | 10 +++ 6 files changed, 359 insertions(+) create mode 100644 modules/aws-glue/CHANGELOG.md create mode 100644 modules/aws-glue/README.md create mode 100644 modules/aws-glue/main.tf create mode 100644 modules/aws-glue/outputs.tf create mode 100644 modules/aws-glue/variables.tf create mode 100644 modules/aws-glue/versions.tf diff --git a/modules/aws-glue/CHANGELOG.md b/modules/aws-glue/CHANGELOG.md new file mode 100644 index 0000000..d3d7a8b --- /dev/null +++ b/modules/aws-glue/CHANGELOG.md @@ -0,0 +1,8 @@ +# Changelog + +## 0.1.0 (2026-05-22) + + +### Features + +* add aws-glue module with catalog database and crawler support diff --git a/modules/aws-glue/README.md b/modules/aws-glue/README.md new file mode 100644 index 0000000..a24cffc --- /dev/null +++ b/modules/aws-glue/README.md @@ -0,0 +1,151 @@ +# aws-glue + +Terraform module to create AWS Glue Data Catalog resources including a Glue catalog database and S3-backed crawler. + +Uses [`terraform-aws-modules/glue/aws`](https://registry.terraform.io/modules/terraform-aws-modules/glue/aws) submodules under the hood. Both the database and crawler can be toggled independently via `create_database` and `create_crawler`. + +Supported resources: + +- Glue catalog database (`aws_glue_catalog_database`) +- Glue crawler with S3 targets (`aws_glue_crawler`) + +## Terraform versions + +Terraform >= 1.3. Pin module version to an exact tag when referencing from Terragrunt or other modules. + +## Usage + +### Minimal — database only + +```hcl +module "glue" { + source = "git::git@github.com:kloia/platform-modules.git//modules/aws-glue?ref=aws-glue-v0.1.0" + + database_name = "my_data_lake" + + create_crawler = false + + tags = { + Environment = "prod" + ManagedBy = "terraform" + } +} +``` + +### Full — database + crawler with multiple S3 targets + +```hcl +module "glue" { + source = "git::git@github.com:kloia/platform-modules.git//modules/aws-glue?ref=aws-glue-v0.1.0" + + database_name = "myapp_data_lake" + database_description = "Data lake for myapp analytics" + + crawler_name = "myapp-s3-crawler" + crawler_role_arn = aws_iam_role.glue.arn + crawler_description = "Crawls Parquet files in the data lake bucket" + + s3_targets = [ + { path = "s3://myapp-data-lake/events/" }, + { path = "s3://myapp-data-lake/users/", exclusions = ["tmp/*"] }, + { path = "s3://myapp-data-lake/orders/" }, + ] + + schema_change_policy = { + update_behavior = "UPDATE_IN_DATABASE" + delete_behavior = "LOG" + } + + crawler_configuration = jsonencode({ + Version = 1.0 + Grouping = { + TableGroupingPolicy = "CombineCompatibleSchemas" + } + }) + + tags = { + Environment = "prod" + ManagedBy = "terraform" + Project = "myapp" + Owner = "data-platform" + } +} +``` + +### Crawler against an existing database (created outside this module) + +```hcl +module "glue" { + source = "git::git@github.com:kloia/platform-modules.git//modules/aws-glue?ref=aws-glue-v0.1.0" + + database_name = "existing_db" + create_database = false + + crawler_name = "my-crawler" + crawler_role_arn = aws_iam_role.glue.arn + + s3_targets = [{ path = "s3://my-bucket/data/" }] + + tags = { Environment = "staging" } +} +``` + + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 1.3 | +| [aws](#requirement\_aws) | >= 5.0 | + +## Providers + +No providers directly. Resources are managed by the `terraform-aws-modules/glue/aws` submodules. + +## Modules + +| Name | Source | Version | +|------|--------|---------| +| [glue\_catalog\_database](#module\_glue\_catalog\_database) | terraform-aws-modules/glue/aws//modules/catalog-database | ~> 1.0 | +| [glue\_crawler](#module\_glue\_crawler) | terraform-aws-modules/glue/aws//modules/crawler | ~> 1.0 | + +## Resources + +| Name | Type | +|------|------| +| [aws_glue_catalog_database](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/glue_catalog_database) | resource | +| [aws_glue_crawler](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/glue_crawler) | resource | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [database\_name](#input\_database\_name) | Name of the Glue catalog database. | `string` | n/a | yes | +| [create\_database](#input\_create\_database) | Whether to create the Glue catalog database. | `bool` | `true` | no | +| [database\_description](#input\_database\_description) | Description of the Glue catalog database. | `string` | `null` | no | +| [catalog\_id](#input\_catalog\_id) | AWS account ID of the Glue data catalog. Defaults to the current account. | `string` | `null` | no | +| [database\_location\_uri](#input\_database\_location\_uri) | Location of the database (e.g., an HDFS path). | `string` | `null` | no | +| [database\_parameters](#input\_database\_parameters) | Additional parameters for the Glue catalog database. | `map(string)` | `{}` | no | +| [create\_crawler](#input\_create\_crawler) | Whether to create the Glue crawler. | `bool` | `true` | no | +| [crawler\_name](#input\_crawler\_name) | Name of the Glue crawler. | `string` | `null` | no | +| [crawler\_role\_arn](#input\_crawler\_role\_arn) | IAM role ARN that the crawler uses to access S3 and Glue. | `string` | `null` | no | +| [crawler\_description](#input\_crawler\_description) | Description of the Glue crawler. | `string` | `null` | no | +| [crawler\_schedule](#input\_crawler\_schedule) | Cron expression for the crawler schedule (e.g., `cron(0 12 * * ? *)`). Omit for on-demand. | `string` | `null` | no | +| [crawler\_table\_prefix](#input\_crawler\_table\_prefix) | Prefix for table names created by the crawler. | `string` | `null` | no | +| [crawler\_classifiers](#input\_crawler\_classifiers) | List of custom classifier names to associate with the crawler. | `list(string)` | `[]` | no | +| [crawler\_configuration](#input\_crawler\_configuration) | JSON string of Glue crawler configuration (e.g., grouping behavior). | `string` | `null` | no | +| [s3\_targets](#input\_s3\_targets) | List of S3 target configurations for the crawler. Each object: `path` (required), `exclusions`, `connection_name`, `event_queue_arn`, `dlq_event_queue_arn`, `sample_size`. | `list(object({...}))` | `[]` | no | +| [schema\_change\_policy](#input\_schema\_change\_policy) | Behavior when the crawler discovers a changed schema. `update_behavior`: UPDATE\_IN\_DATABASE or LOG. `delete_behavior`: DELETE\_FROM\_DATABASE, LOG, or DEPRECATE\_IN\_DATABASE. | `object({...})` | `{}` | no | +| [recrawl\_policy](#input\_recrawl\_policy) | Recrawl behavior. `recrawl_behavior`: CRAWL\_EVERYTHING, CRAWL\_NEW\_FOLDERS\_ONLY, or CRAWL\_EVENT\_MODE. | `object({...})` | `{}` | no | +| [tags](#input\_tags) | Map of tags to apply to all resources. | `map(string)` | `{}` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| [database\_name](#output\_database\_name) | Name of the Glue catalog database. | +| [database\_id](#output\_database\_id) | ID of the Glue catalog database (`catalog_id:name`). | +| [crawler\_id](#output\_crawler\_id) | ID of the Glue crawler (same as its name). | +| [crawler\_arn](#output\_crawler\_arn) | ARN of the Glue crawler. | +| [crawler\_name](#output\_crawler\_name) | Name of the Glue crawler. | + diff --git a/modules/aws-glue/main.tf b/modules/aws-glue/main.tf new file mode 100644 index 0000000..f982352 --- /dev/null +++ b/modules/aws-glue/main.tf @@ -0,0 +1,36 @@ +module "glue_catalog_database" { + source = "terraform-aws-modules/glue/aws//modules/catalog-database" + version = "~> 1.0" + + create = var.create_database + name = var.database_name + description = var.database_description + catalog_id = var.catalog_id + location_uri = var.database_location_uri + parameters = var.database_parameters + + tags = var.tags +} + +module "glue_crawler" { + source = "terraform-aws-modules/glue/aws//modules/crawler" + version = "~> 1.0" + + create = var.create_crawler + name = var.crawler_name + database_name = var.create_database ? module.glue_catalog_database.name : var.database_name + role = var.crawler_role_arn + description = var.crawler_description + schedule = var.crawler_schedule + table_prefix = var.crawler_table_prefix + classifiers = var.crawler_classifiers + configuration = var.crawler_configuration + + s3_targets = var.s3_targets + schema_change_policy = var.schema_change_policy + recrawl_policy = var.recrawl_policy + + tags = var.tags + + depends_on = [module.glue_catalog_database] +} diff --git a/modules/aws-glue/outputs.tf b/modules/aws-glue/outputs.tf new file mode 100644 index 0000000..1f51953 --- /dev/null +++ b/modules/aws-glue/outputs.tf @@ -0,0 +1,24 @@ +output "database_name" { + description = "Name of the Glue catalog database." + value = var.create_database ? module.glue_catalog_database.name : var.database_name +} + +output "database_id" { + description = "ID of the Glue catalog database (catalog_id:name)." + value = var.create_database ? module.glue_catalog_database.id : null +} + +output "crawler_id" { + description = "ID of the Glue crawler (same as its name)." + value = var.create_crawler ? module.glue_crawler.id : null +} + +output "crawler_arn" { + description = "ARN of the Glue crawler." + value = var.create_crawler ? module.glue_crawler.arn : null +} + +output "crawler_name" { + description = "Name of the Glue crawler." + value = var.create_crawler ? module.glue_crawler.name : null +} diff --git a/modules/aws-glue/variables.tf b/modules/aws-glue/variables.tf new file mode 100644 index 0000000..f493dda --- /dev/null +++ b/modules/aws-glue/variables.tf @@ -0,0 +1,130 @@ +################################################################################ +# Glue Catalog Database +################################################################################ + +variable "create_database" { + description = "Whether to create the Glue catalog database." + type = bool + default = true +} + +variable "database_name" { + description = "Name of the Glue catalog database." + type = string +} + +variable "database_description" { + description = "Description of the Glue catalog database." + type = string + default = null +} + +variable "catalog_id" { + description = "AWS account ID of the Glue data catalog. Defaults to the current account." + type = string + default = null +} + +variable "database_location_uri" { + description = "Location of the database (e.g., an HDFS path)." + type = string + default = null +} + +variable "database_parameters" { + description = "Additional parameters for the Glue catalog database." + type = map(string) + default = {} +} + +################################################################################ +# Glue Crawler +################################################################################ + +variable "create_crawler" { + description = "Whether to create the Glue crawler." + type = bool + default = true +} + +variable "crawler_name" { + description = "Name of the Glue crawler." + type = string + default = null +} + +variable "crawler_role_arn" { + description = "IAM role ARN that the crawler uses to access S3 and Glue." + type = string + default = null +} + +variable "crawler_description" { + description = "Description of the Glue crawler." + type = string + default = null +} + +variable "crawler_schedule" { + description = "Cron expression for the crawler schedule (e.g., 'cron(0 12 * * ? *)'). Omit for on-demand." + type = string + default = null +} + +variable "crawler_table_prefix" { + description = "Prefix for table names created by the crawler." + type = string + default = null +} + +variable "crawler_classifiers" { + description = "List of custom classifier names to associate with the crawler." + type = list(string) + default = [] +} + +variable "crawler_configuration" { + description = "JSON string of Glue crawler configuration (e.g., grouping behavior)." + type = string + default = null +} + +variable "s3_targets" { + description = "List of S3 target configurations for the crawler." + type = list(object({ + path = string + exclusions = optional(list(string), []) + connection_name = optional(string) + event_queue_arn = optional(string) + dlq_event_queue_arn = optional(string) + sample_size = optional(number) + })) + default = [] +} + +variable "schema_change_policy" { + description = "Behavior when the crawler discovers a changed schema. update_behavior: UPDATE_IN_DATABASE or LOG. delete_behavior: DELETE_FROM_DATABASE, LOG, or DEPRECATE_IN_DATABASE." + type = object({ + update_behavior = optional(string, "UPDATE_IN_DATABASE") + delete_behavior = optional(string, "LOG") + }) + default = {} +} + +variable "recrawl_policy" { + description = "Recrawl behavior. recrawl_behavior: CRAWL_EVERYTHING, CRAWL_NEW_FOLDERS_ONLY, or CRAWL_EVENT_MODE." + type = object({ + recrawl_behavior = optional(string, "CRAWL_EVERYTHING") + }) + default = {} +} + +################################################################################ +# Common +################################################################################ + +variable "tags" { + description = "Map of tags to apply to all resources." + type = map(string) + default = {} +} diff --git a/modules/aws-glue/versions.tf b/modules/aws-glue/versions.tf new file mode 100644 index 0000000..29ec41d --- /dev/null +++ b/modules/aws-glue/versions.tf @@ -0,0 +1,10 @@ +terraform { + required_version = ">= 1.3" + + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 5.0" + } + } +} From c0e11c4ca3eaa4c88020e5b95358a58658ffae66 Mon Sep 17 00:00:00 2001 From: bagusramadan Date: Fri, 22 May 2026 19:01:10 +0700 Subject: [PATCH 3/8] chore: remove changelog --- modules/aws-athena/CHANGELOG.md | 8 -------- modules/aws-glue/CHANGELOG.md | 8 -------- 2 files changed, 16 deletions(-) delete mode 100644 modules/aws-athena/CHANGELOG.md delete mode 100644 modules/aws-glue/CHANGELOG.md diff --git a/modules/aws-athena/CHANGELOG.md b/modules/aws-athena/CHANGELOG.md deleted file mode 100644 index f633f8f..0000000 --- a/modules/aws-athena/CHANGELOG.md +++ /dev/null @@ -1,8 +0,0 @@ -# Changelog - -## 0.1.0 (2026-05-22) - - -### Features - -* add aws-athena module with workgroup and named query support diff --git a/modules/aws-glue/CHANGELOG.md b/modules/aws-glue/CHANGELOG.md deleted file mode 100644 index d3d7a8b..0000000 --- a/modules/aws-glue/CHANGELOG.md +++ /dev/null @@ -1,8 +0,0 @@ -# Changelog - -## 0.1.0 (2026-05-22) - - -### Features - -* add aws-glue module with catalog database and crawler support From 0997ff488dda35866abdd563b1ff80dbad161d64 Mon Sep 17 00:00:00 2001 From: bagusramadan Date: Fri, 22 May 2026 19:35:41 +0700 Subject: [PATCH 4/8] chore: remove readme and version --- modules/aws-athena/README.md | 128 ---------------------------- modules/aws-athena/versions.tf | 10 --- modules/aws-glue/README.md | 151 --------------------------------- modules/aws-glue/versions.tf | 10 --- 4 files changed, 299 deletions(-) delete mode 100644 modules/aws-athena/README.md delete mode 100644 modules/aws-athena/versions.tf delete mode 100644 modules/aws-glue/README.md delete mode 100644 modules/aws-glue/versions.tf diff --git a/modules/aws-athena/README.md b/modules/aws-athena/README.md deleted file mode 100644 index 3a47292..0000000 --- a/modules/aws-athena/README.md +++ /dev/null @@ -1,128 +0,0 @@ -# aws-athena - -Terraform module to create AWS Athena resources including a workgroup with configurable result location, encryption, and optional saved (named) queries. - -Supported resources: - -- Athena workgroup (`aws_athena_workgroup`) -- Athena named queries (`aws_athena_named_query`) - -## Terraform versions - -Terraform >= 1.3. Pin module version to an exact tag when referencing from Terragrunt or other modules. - -## Usage - -### Minimal — workgroup only - -```hcl -module "athena" { - source = "git::git@github.com:kloia/platform-modules.git//modules/aws-athena?ref=aws-athena-v0.1.0" - - workgroup_name = "analytics" - results_output_location = "s3://myapp-athena-results/queries/" - - tags = { - Environment = "prod" - ManagedBy = "terraform" - } -} -``` - -### Full — workgroup with encryption and named queries - -```hcl -module "athena" { - source = "git::git@github.com:kloia/platform-modules.git//modules/aws-athena?ref=aws-athena-v0.1.0" - - workgroup_name = "analytics" - workgroup_description = "Workgroup for myapp analytics queries" - results_output_location = "s3://myapp-athena-results/queries/" - - enforce_workgroup_configuration = true - publish_cloudwatch_metrics_enabled = true - bytes_scanned_cutoff_per_query = 10737418240 # 10 GB - - encryption_option = "SSE_S3" - - named_queries = { - daily_active_users = { - database = "myapp_data_lake" - description = "Count distinct active users per day" - query = "SELECT DATE(event_time) AS day, COUNT(DISTINCT user_id) AS dau FROM myapp_data_lake.events WHERE event_type = 'session_start' GROUP BY 1 ORDER BY 1 DESC" - } - revenue_by_month = { - database = "myapp_data_lake" - description = "Monthly revenue summary" - query = "SELECT DATE_TRUNC('month', created_at) AS month, SUM(amount) AS revenue FROM myapp_data_lake.orders GROUP BY 1 ORDER BY 1 DESC" - } - } - - tags = { - Environment = "prod" - ManagedBy = "terraform" - Project = "myapp" - Owner = "data-platform" - } -} -``` - -### Import an existing workgroup - -If the workgroup already exists in AWS (e.g., the default `primary` workgroup), import it before the first apply: - -```bash -terraform import module.athena.aws_athena_workgroup.this[0] -``` - - -## Requirements - -| Name | Version | -|------|---------| -| [terraform](#requirement\_terraform) | >= 1.3 | -| [aws](#requirement\_aws) | >= 5.0 | - -## Providers - -| Name | Version | -|------|---------| -| [aws](#provider\_aws) | >= 5.0 | - -## Modules - -No modules. - -## Resources - -| Name | Type | -|------|------| -| [aws_athena_workgroup.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/athena_workgroup) | resource | -| [aws_athena_named_query.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/athena_named_query) | resource | - -## Inputs - -| Name | Description | Type | Default | Required | -|------|-------------|------|---------|:--------:| -| [workgroup\_name](#input\_workgroup\_name) | Name of the Athena workgroup. | `string` | n/a | yes | -| [results\_output\_location](#input\_results\_output\_location) | S3 URL for query results (e.g., `s3://my-bucket/prefix/`). | `string` | n/a | yes | -| [create\_workgroup](#input\_create\_workgroup) | Whether to create the Athena workgroup. | `bool` | `true` | no | -| [workgroup\_description](#input\_workgroup\_description) | Description of the Athena workgroup. | `string` | `null` | no | -| [workgroup\_state](#input\_workgroup\_state) | State of the workgroup: `ENABLED` or `DISABLED`. | `string` | `"ENABLED"` | no | -| [enforce\_workgroup\_configuration](#input\_enforce\_workgroup\_configuration) | Enforce workgroup result location and encryption settings for all queries. | `bool` | `true` | no | -| [publish\_cloudwatch\_metrics\_enabled](#input\_publish\_cloudwatch\_metrics\_enabled) | Publish workgroup-level query metrics to CloudWatch. | `bool` | `true` | no | -| [bytes\_scanned\_cutoff\_per\_query](#input\_bytes\_scanned\_cutoff\_per\_query) | Maximum bytes scanned per query. Queries exceeding this are cancelled. `null` disables the limit. | `number` | `null` | no | -| [encryption\_option](#input\_encryption\_option) | Encryption option for query results: `SSE_S3`, `SSE_KMS`, or `CSE_KMS`. `null` disables encryption configuration. | `string` | `null` | no | -| [kms\_key\_arn](#input\_kms\_key\_arn) | KMS key ARN. Required when `encryption_option` is `SSE_KMS` or `CSE_KMS`. | `string` | `null` | no | -| [named\_queries](#input\_named\_queries) | Map of named (saved) Athena queries. Key is the query name. Each value: `database` (string), `query` (string), `description` (optional string). | `map(object({...}))` | `{}` | no | -| [tags](#input\_tags) | Map of tags to apply to all resources. | `map(string)` | `{}` | no | - -## Outputs - -| Name | Description | -|------|-------------| -| [workgroup\_id](#output\_workgroup\_id) | ID of the Athena workgroup. | -| [workgroup\_arn](#output\_workgroup\_arn) | ARN of the Athena workgroup. | -| [workgroup\_name](#output\_workgroup\_name) | Name of the Athena workgroup. | -| [named\_query\_ids](#output\_named\_query\_ids) | Map of named query name to its ID. | - diff --git a/modules/aws-athena/versions.tf b/modules/aws-athena/versions.tf deleted file mode 100644 index 29ec41d..0000000 --- a/modules/aws-athena/versions.tf +++ /dev/null @@ -1,10 +0,0 @@ -terraform { - required_version = ">= 1.3" - - required_providers { - aws = { - source = "hashicorp/aws" - version = ">= 5.0" - } - } -} diff --git a/modules/aws-glue/README.md b/modules/aws-glue/README.md deleted file mode 100644 index a24cffc..0000000 --- a/modules/aws-glue/README.md +++ /dev/null @@ -1,151 +0,0 @@ -# aws-glue - -Terraform module to create AWS Glue Data Catalog resources including a Glue catalog database and S3-backed crawler. - -Uses [`terraform-aws-modules/glue/aws`](https://registry.terraform.io/modules/terraform-aws-modules/glue/aws) submodules under the hood. Both the database and crawler can be toggled independently via `create_database` and `create_crawler`. - -Supported resources: - -- Glue catalog database (`aws_glue_catalog_database`) -- Glue crawler with S3 targets (`aws_glue_crawler`) - -## Terraform versions - -Terraform >= 1.3. Pin module version to an exact tag when referencing from Terragrunt or other modules. - -## Usage - -### Minimal — database only - -```hcl -module "glue" { - source = "git::git@github.com:kloia/platform-modules.git//modules/aws-glue?ref=aws-glue-v0.1.0" - - database_name = "my_data_lake" - - create_crawler = false - - tags = { - Environment = "prod" - ManagedBy = "terraform" - } -} -``` - -### Full — database + crawler with multiple S3 targets - -```hcl -module "glue" { - source = "git::git@github.com:kloia/platform-modules.git//modules/aws-glue?ref=aws-glue-v0.1.0" - - database_name = "myapp_data_lake" - database_description = "Data lake for myapp analytics" - - crawler_name = "myapp-s3-crawler" - crawler_role_arn = aws_iam_role.glue.arn - crawler_description = "Crawls Parquet files in the data lake bucket" - - s3_targets = [ - { path = "s3://myapp-data-lake/events/" }, - { path = "s3://myapp-data-lake/users/", exclusions = ["tmp/*"] }, - { path = "s3://myapp-data-lake/orders/" }, - ] - - schema_change_policy = { - update_behavior = "UPDATE_IN_DATABASE" - delete_behavior = "LOG" - } - - crawler_configuration = jsonencode({ - Version = 1.0 - Grouping = { - TableGroupingPolicy = "CombineCompatibleSchemas" - } - }) - - tags = { - Environment = "prod" - ManagedBy = "terraform" - Project = "myapp" - Owner = "data-platform" - } -} -``` - -### Crawler against an existing database (created outside this module) - -```hcl -module "glue" { - source = "git::git@github.com:kloia/platform-modules.git//modules/aws-glue?ref=aws-glue-v0.1.0" - - database_name = "existing_db" - create_database = false - - crawler_name = "my-crawler" - crawler_role_arn = aws_iam_role.glue.arn - - s3_targets = [{ path = "s3://my-bucket/data/" }] - - tags = { Environment = "staging" } -} -``` - - -## Requirements - -| Name | Version | -|------|---------| -| [terraform](#requirement\_terraform) | >= 1.3 | -| [aws](#requirement\_aws) | >= 5.0 | - -## Providers - -No providers directly. Resources are managed by the `terraform-aws-modules/glue/aws` submodules. - -## Modules - -| Name | Source | Version | -|------|--------|---------| -| [glue\_catalog\_database](#module\_glue\_catalog\_database) | terraform-aws-modules/glue/aws//modules/catalog-database | ~> 1.0 | -| [glue\_crawler](#module\_glue\_crawler) | terraform-aws-modules/glue/aws//modules/crawler | ~> 1.0 | - -## Resources - -| Name | Type | -|------|------| -| [aws_glue_catalog_database](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/glue_catalog_database) | resource | -| [aws_glue_crawler](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/glue_crawler) | resource | - -## Inputs - -| Name | Description | Type | Default | Required | -|------|-------------|------|---------|:--------:| -| [database\_name](#input\_database\_name) | Name of the Glue catalog database. | `string` | n/a | yes | -| [create\_database](#input\_create\_database) | Whether to create the Glue catalog database. | `bool` | `true` | no | -| [database\_description](#input\_database\_description) | Description of the Glue catalog database. | `string` | `null` | no | -| [catalog\_id](#input\_catalog\_id) | AWS account ID of the Glue data catalog. Defaults to the current account. | `string` | `null` | no | -| [database\_location\_uri](#input\_database\_location\_uri) | Location of the database (e.g., an HDFS path). | `string` | `null` | no | -| [database\_parameters](#input\_database\_parameters) | Additional parameters for the Glue catalog database. | `map(string)` | `{}` | no | -| [create\_crawler](#input\_create\_crawler) | Whether to create the Glue crawler. | `bool` | `true` | no | -| [crawler\_name](#input\_crawler\_name) | Name of the Glue crawler. | `string` | `null` | no | -| [crawler\_role\_arn](#input\_crawler\_role\_arn) | IAM role ARN that the crawler uses to access S3 and Glue. | `string` | `null` | no | -| [crawler\_description](#input\_crawler\_description) | Description of the Glue crawler. | `string` | `null` | no | -| [crawler\_schedule](#input\_crawler\_schedule) | Cron expression for the crawler schedule (e.g., `cron(0 12 * * ? *)`). Omit for on-demand. | `string` | `null` | no | -| [crawler\_table\_prefix](#input\_crawler\_table\_prefix) | Prefix for table names created by the crawler. | `string` | `null` | no | -| [crawler\_classifiers](#input\_crawler\_classifiers) | List of custom classifier names to associate with the crawler. | `list(string)` | `[]` | no | -| [crawler\_configuration](#input\_crawler\_configuration) | JSON string of Glue crawler configuration (e.g., grouping behavior). | `string` | `null` | no | -| [s3\_targets](#input\_s3\_targets) | List of S3 target configurations for the crawler. Each object: `path` (required), `exclusions`, `connection_name`, `event_queue_arn`, `dlq_event_queue_arn`, `sample_size`. | `list(object({...}))` | `[]` | no | -| [schema\_change\_policy](#input\_schema\_change\_policy) | Behavior when the crawler discovers a changed schema. `update_behavior`: UPDATE\_IN\_DATABASE or LOG. `delete_behavior`: DELETE\_FROM\_DATABASE, LOG, or DEPRECATE\_IN\_DATABASE. | `object({...})` | `{}` | no | -| [recrawl\_policy](#input\_recrawl\_policy) | Recrawl behavior. `recrawl_behavior`: CRAWL\_EVERYTHING, CRAWL\_NEW\_FOLDERS\_ONLY, or CRAWL\_EVENT\_MODE. | `object({...})` | `{}` | no | -| [tags](#input\_tags) | Map of tags to apply to all resources. | `map(string)` | `{}` | no | - -## Outputs - -| Name | Description | -|------|-------------| -| [database\_name](#output\_database\_name) | Name of the Glue catalog database. | -| [database\_id](#output\_database\_id) | ID of the Glue catalog database (`catalog_id:name`). | -| [crawler\_id](#output\_crawler\_id) | ID of the Glue crawler (same as its name). | -| [crawler\_arn](#output\_crawler\_arn) | ARN of the Glue crawler. | -| [crawler\_name](#output\_crawler\_name) | Name of the Glue crawler. | - diff --git a/modules/aws-glue/versions.tf b/modules/aws-glue/versions.tf deleted file mode 100644 index 29ec41d..0000000 --- a/modules/aws-glue/versions.tf +++ /dev/null @@ -1,10 +0,0 @@ -terraform { - required_version = ">= 1.3" - - required_providers { - aws = { - source = "hashicorp/aws" - version = ">= 5.0" - } - } -} From 343b2815775f5b0c90679611ed66ee4acd63d730 Mon Sep 17 00:00:00 2001 From: bagusramadan Date: Fri, 22 May 2026 19:49:12 +0700 Subject: [PATCH 5/8] feat: add aws glue and athena to release please config --- release-please-config.json | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/release-please-config.json b/release-please-config.json index 968ec86..c8a76f7 100644 --- a/release-please-config.json +++ b/release-please-config.json @@ -602,6 +602,24 @@ "bump-patch-for-minor-pre-major": false, "draft": false, "prerelease": false + }, + "modules/aws-glue": { + "component": "aws-glue", + "changelog-path": "CHANGELOG.md", + "release-type": "terraform-module", + "bump-minor-pre-major": true, + "bump-patch-for-minor-pre-major": false, + "draft": false, + "prerelease": false + }, + "modules/aws-athena": { + "component": "aws-athena", + "changelog-path": "CHANGELOG.md", + "release-type": "terraform-module", + "bump-minor-pre-major": true, + "bump-patch-for-minor-pre-major": false, + "draft": false, + "prerelease": false } }, "separate-pull-requests": true, From 7438822d6284f24f31e54a3b586251cd5af9afc0 Mon Sep 17 00:00:00 2001 From: bagusramadan Date: Fri, 22 May 2026 19:52:53 +0700 Subject: [PATCH 6/8] chore: add changelog --- modules/aws-athena/CHANGELOG.md | 8 ++++++++ modules/aws-glue/CHANGELOG.md | 8 ++++++++ 2 files changed, 16 insertions(+) create mode 100644 modules/aws-athena/CHANGELOG.md create mode 100644 modules/aws-glue/CHANGELOG.md diff --git a/modules/aws-athena/CHANGELOG.md b/modules/aws-athena/CHANGELOG.md new file mode 100644 index 0000000..929f77e --- /dev/null +++ b/modules/aws-athena/CHANGELOG.md @@ -0,0 +1,8 @@ +# Changelog + +## 0.1.0 (2026-05-22) + + +### Features + +* add aws-athena module with Athena workgroup and named query support diff --git a/modules/aws-glue/CHANGELOG.md b/modules/aws-glue/CHANGELOG.md new file mode 100644 index 0000000..cfc9443 --- /dev/null +++ b/modules/aws-glue/CHANGELOG.md @@ -0,0 +1,8 @@ +# Changelog + +## 0.1.0 (2026-05-22) + + +### Features + +* add aws-glue module with Glue catalog database and S3 crawler support From 7ab9c39560341d3792fdd81c8e620c2d88341ad8 Mon Sep 17 00:00:00 2001 From: bagusramadan Date: Mon, 25 May 2026 08:49:22 +0700 Subject: [PATCH 7/8] feat: add readme for athena and glue --- modules/aws-athena/README.md | 125 +++++++++++++++++++++++++++++++++++ modules/aws-glue/README.md | 119 +++++++++++++++++++++++++++++++++ 2 files changed, 244 insertions(+) create mode 100644 modules/aws-athena/README.md create mode 100644 modules/aws-glue/README.md diff --git a/modules/aws-athena/README.md b/modules/aws-athena/README.md new file mode 100644 index 0000000..2ffbaeb --- /dev/null +++ b/modules/aws-athena/README.md @@ -0,0 +1,125 @@ +# AWS Athena Terraform Module + +Terraform module which creates AWS Athena Workgroup and Named Query resources. + +## Usage + +### Basic Workgroup + +```hcl +module "athena" { + source = "git::https://github.com/kloia/platform-modules//aws-athena?ref=main" + + workgroup_name = "my-workgroup" + results_output_location = "s3://my-query-results-bucket/athena/" + + tags = { + Environment = "dev" + ManagedBy = "terraform" + } +} +``` + +### Workgroup with SSE-S3 Encryption and Cost Control + +```hcl +module "athena" { + source = "git::https://github.com/kloia/platform-modules//aws-athena?ref=main" + + workgroup_name = "analytics-workgroup" + workgroup_description = "Workgroup for the analytics team" + results_output_location = "s3://my-query-results-bucket/analytics/" + + enforce_workgroup_configuration = true + publish_cloudwatch_metrics_enabled = true + bytes_scanned_cutoff_per_query = 1073741824 # 1 GB + + encryption_option = "SSE_S3" + + tags = { + Environment = "prod" + ManagedBy = "terraform" + } +} +``` + +### Workgroup with KMS Encryption and Named Queries + +```hcl +module "athena" { + source = "git::https://github.com/kloia/platform-modules//aws-athena?ref=main" + + workgroup_name = "secure-workgroup" + results_output_location = "s3://my-secure-results-bucket/athena/" + + encryption_option = "SSE_KMS" + kms_key_arn = "arn:aws:kms:us-east-1:123456789012:key/mrk-abc123" + + named_queries = { + daily_summary = { + database = "my_database" + query = "SELECT date, COUNT(*) AS events FROM events WHERE date = current_date GROUP BY date;" + description = "Daily event count summary" + } + top_users = { + database = "my_database" + query = "SELECT user_id, COUNT(*) AS actions FROM events GROUP BY user_id ORDER BY actions DESC LIMIT 100;" + } + } + + tags = { + Environment = "prod" + ManagedBy = "terraform" + } +} +``` + +### Named Queries Against an Existing Workgroup + +```hcl +module "athena" { + source = "git::https://github.com/kloia/platform-modules//aws-athena?ref=main" + + create_workgroup = false + workgroup_name = "existing-workgroup" + results_output_location = "s3://my-bucket/results/" + + named_queries = { + weekly_report = { + database = "reporting" + query = "SELECT * FROM weekly_aggregates WHERE week = date_trunc('week', current_date);" + } + } + + tags = { + Environment = "staging" + ManagedBy = "terraform" + } +} +``` + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|----------| +| `workgroup_name` | Name of the Athena workgroup | `string` | — | yes | +| `results_output_location` | S3 URL for query results (e.g., `s3://my-bucket/prefix/`) | `string` | — | yes | +| `create_workgroup` | Whether to create the Athena workgroup | `bool` | `true` | no | +| `workgroup_description` | Description of the Athena workgroup | `string` | `null` | no | +| `workgroup_state` | State of the workgroup: `ENABLED` or `DISABLED` | `string` | `"ENABLED"` | no | +| `enforce_workgroup_configuration` | Enforce workgroup result location and encryption for all queries | `bool` | `true` | no | +| `publish_cloudwatch_metrics_enabled` | Publish workgroup-level query metrics to CloudWatch | `bool` | `true` | no | +| `bytes_scanned_cutoff_per_query` | Maximum bytes scanned per query. `null` disables the limit | `number` | `null` | no | +| `encryption_option` | Encryption option for results: `SSE_S3`, `SSE_KMS`, `CSE_KMS`, or `null` | `string` | `null` | no | +| `kms_key_arn` | KMS key ARN for `SSE_KMS` or `CSE_KMS` encryption | `string` | `null` | no | +| `named_queries` | Map of named Athena queries. Key is the query name | `map(object)` | `{}` | no | +| `tags` | Map of tags to apply to all resources | `map(string)` | `{}` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| `workgroup_id` | ID of the Athena workgroup | +| `workgroup_arn` | ARN of the Athena workgroup | +| `workgroup_name` | Name of the Athena workgroup | +| `named_query_ids` | Map of named query name to its ID | diff --git a/modules/aws-glue/README.md b/modules/aws-glue/README.md new file mode 100644 index 0000000..b4707ea --- /dev/null +++ b/modules/aws-glue/README.md @@ -0,0 +1,119 @@ +# AWS Glue Terraform Module + +Terraform module which creates AWS Glue Catalog Database and Crawler resources. + +## Usage + +### Database Only + +```hcl +module "glue" { + source = "git::https://github.com/kloia/platform-modules//aws-glue?ref=main" + + database_name = "my-data-catalog" + database_description = "Data catalog for raw S3 data" + + create_crawler = false + + tags = { + Environment = "dev" + ManagedBy = "terraform" + } +} +``` + +### Database with S3 Crawler + +```hcl +module "glue" { + source = "git::https://github.com/kloia/platform-modules//aws-glue?ref=main" + + database_name = "my-data-catalog" + database_description = "Data catalog for raw S3 data" + + create_crawler = true + crawler_name = "my-s3-crawler" + crawler_role_arn = "arn:aws:iam::123456789012:role/GlueCrawlerRole" + crawler_schedule = "cron(0 12 * * ? *)" + crawler_table_prefix = "raw_" + + s3_targets = [ + { + path = "s3://my-data-bucket/raw/" + exclusions = ["**.tmp"] + } + ] + + schema_change_policy = { + update_behavior = "UPDATE_IN_DATABASE" + delete_behavior = "LOG" + } + + tags = { + Environment = "prod" + ManagedBy = "terraform" + } +} +``` + +### Crawler Against Existing Database + +```hcl +module "glue" { + source = "git::https://github.com/kloia/platform-modules//aws-glue?ref=main" + + create_database = false + database_name = "existing-catalog" + + create_crawler = true + crawler_name = "incremental-crawler" + crawler_role_arn = "arn:aws:iam::123456789012:role/GlueCrawlerRole" + + s3_targets = [ + { path = "s3://my-bucket/events/" }, + { path = "s3://my-bucket/metrics/" } + ] + + recrawl_policy = { + recrawl_behavior = "CRAWL_NEW_FOLDERS_ONLY" + } + + tags = { + Environment = "staging" + ManagedBy = "terraform" + } +} +``` + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|----------| +| `database_name` | Name of the Glue catalog database | `string` | — | yes | +| `create_database` | Whether to create the Glue catalog database | `bool` | `true` | no | +| `database_description` | Description of the Glue catalog database | `string` | `null` | no | +| `catalog_id` | AWS account ID of the Glue data catalog | `string` | `null` | no | +| `database_location_uri` | Location of the database (e.g., an HDFS path) | `string` | `null` | no | +| `database_parameters` | Additional parameters for the Glue catalog database | `map(string)` | `{}` | no | +| `create_crawler` | Whether to create the Glue crawler | `bool` | `true` | no | +| `crawler_name` | Name of the Glue crawler | `string` | `null` | no | +| `crawler_role_arn` | IAM role ARN that the crawler uses to access S3 and Glue | `string` | `null` | no | +| `crawler_description` | Description of the Glue crawler | `string` | `null` | no | +| `crawler_schedule` | Cron expression for the crawler schedule. Omit for on-demand | `string` | `null` | no | +| `crawler_table_prefix` | Prefix for table names created by the crawler | `string` | `null` | no | +| `crawler_classifiers` | List of custom classifier names to associate with the crawler | `list(string)` | `[]` | no | +| `crawler_configuration` | JSON string of Glue crawler configuration | `string` | `null` | no | +| `s3_targets` | List of S3 target configurations for the crawler | `list(object)` | `[]` | no | +| `schema_change_policy` | Behavior when the crawler discovers a changed schema | `object` | `{}` | no | +| `recrawl_policy` | Recrawl behavior configuration | `object` | `{}` | no | +| `tags` | Map of tags to apply to all resources | `map(string)` | `{}` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| `database_name` | Name of the Glue catalog database | +| `database_id` | ID of the Glue catalog database (`catalog_id:name`) | +| `crawler_id` | ID of the Glue crawler | +| `crawler_arn` | ARN of the Glue crawler | +| `crawler_name` | Name of the Glue crawler | From 76ce78ad894353b43066141403ac5f1da6ee52cb Mon Sep 17 00:00:00 2001 From: bagusramadan Date: Mon, 25 May 2026 11:59:01 +0700 Subject: [PATCH 8/8] chore: fix error validation on athena variables --- modules/aws-athena/variables.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/aws-athena/variables.tf b/modules/aws-athena/variables.tf index a7c73bf..6b2d9e5 100644 --- a/modules/aws-athena/variables.tf +++ b/modules/aws-athena/variables.tf @@ -59,7 +59,7 @@ variable "encryption_option" { default = null validation { - condition = var.encryption_option == null || contains(["SSE_S3", "SSE_KMS", "CSE_KMS"], var.encryption_option) + condition = var.encryption_option == null ? true : contains(["SSE_S3", "SSE_KMS", "CSE_KMS"], var.encryption_option) error_message = "encryption_option must be SSE_S3, SSE_KMS, CSE_KMS, or null." } }