diff --git a/.gitignore b/.gitignore index 7b5d96e..2870903 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,14 @@ __pycache__ uploads/* !uploads/.gitkeep .env +# Local .terraform directories +**/.terraform/* + +# .tfstate files (Contains sensitive AWS credentials!) +*.tfstate +*.tfstate.* + +# Exclude all .tfvars files +*.tfvars +*.tfvars.json +*tfplan* diff --git a/README.md b/README.md index acc481b..03092e1 100644 --- a/README.md +++ b/README.md @@ -5,11 +5,14 @@ A FastAPI-based KYC (Know Your Customer) system that automatically extracts stru ## Features - **JWT Authentication**: Secure endpoint access with token-based authentication +- **High Throughput**: Capable of handling massive concurrent read-requests via FastAPI +- **Low Latency**: Reduced API response latency by offloading document analysis to background workers. - **Async Processing**: Celery task queue for background document analysis - **AWS Textract Integration**: Automatic extraction of ID field data (name, DOB, document number, etc.) - **S3 Storage**: Secure file upload and storage with presigned URLs - **PostgreSQL Persistence**: Track KYC task status and results - **Image Validation**: Support for JPEG, PNG, and WebP formats +- **Infrastructure as Code**: Automated AWS provisioning (RDS, EC2, S3) using Terraform - **Structured Logging**: Comprehensive logging for debugging and monitoring ## Tech Stack @@ -20,6 +23,7 @@ A FastAPI-based KYC (Know Your Customer) system that automatically extracts stru - **Cloud Services**: AWS Textract, S3 - **Authentication**: PyJWT - **Server**: Uvicorn +- **Infrastructure**: Terraform, Docker ## Project Structure @@ -36,17 +40,37 @@ A FastAPI-based KYC (Know Your Customer) system that automatically extracts stru ├── db/ │ ├── database.py # Database connection setup │ └── models.py # SQLAlchemy models +├── terraform/ # Terraform IaC configuration +│ └── main.tf # AWS infrastructure definitions └── uploads/ # Local upload directory (for development) ``` ## Architecture Overview - **Architecture Diagram** -![Kyc-API Architecture](arch-images/kyc-arch.png) +![Kyc-API Architecture](images/kyc-arch.png) - **Data Flow Diagram** -![Kyc-API DFD](arch-images/kyc-dfd.jpg) +![Kyc-API DFD](images/kyc-dfd.jpg) +## Live Demo +![GIF](https://github.com/6six7sven/kyc-extraction-api-python-aws-postgresql/blob/terraform/images/api-showcase.gif?raw=true) + +## Screenshots + + ### Swagger/OpenAPI Documentation + ![swagger-openapi-doc](images/swagger-openapi-doc.jpg) + + ### Completed Task Status Example + ![task-status-example](images/task-status-example.jpg) + + ### PostgreSQL Schema + ![postgresql-schema](images/schema-diagram.jpg) + + ### Github Actions Workflow + ![github-actions-workflow](images/gh-action-workflow.jpg) + + ## Engineering Decisions diff --git a/images/api-showcase.gif b/images/api-showcase.gif new file mode 100644 index 0000000..213fc87 Binary files /dev/null and b/images/api-showcase.gif differ diff --git a/images/gh-action-workflow.jpg b/images/gh-action-workflow.jpg new file mode 100644 index 0000000..3754b42 Binary files /dev/null and b/images/gh-action-workflow.jpg differ diff --git a/arch-images/kyc-arch.png b/images/kyc-arch.png similarity index 100% rename from arch-images/kyc-arch.png rename to images/kyc-arch.png diff --git a/arch-images/kyc-dfd.jpg b/images/kyc-dfd.jpg similarity index 100% rename from arch-images/kyc-dfd.jpg rename to images/kyc-dfd.jpg diff --git a/images/schema-diagram.jpg b/images/schema-diagram.jpg new file mode 100644 index 0000000..da5e50a Binary files /dev/null and b/images/schema-diagram.jpg differ diff --git a/images/swagger-openapi-doc.jpg b/images/swagger-openapi-doc.jpg new file mode 100644 index 0000000..1e89520 Binary files /dev/null and b/images/swagger-openapi-doc.jpg differ diff --git a/images/task-status-example.jpg b/images/task-status-example.jpg new file mode 100644 index 0000000..0c6d637 Binary files /dev/null and b/images/task-status-example.jpg differ diff --git a/terraform/.terraform.lock.hcl b/terraform/.terraform.lock.hcl new file mode 100644 index 0000000..92a2bcc --- /dev/null +++ b/terraform/.terraform.lock.hcl @@ -0,0 +1,25 @@ +# This file is maintained automatically by "terraform init". +# Manual edits may be lost in future updates. + +provider "registry.terraform.io/hashicorp/aws" { + version = "5.100.0" + constraints = "~> 5.0" + hashes = [ + "h1:H3mU/7URhP0uCRGK8jeQRKxx2XFzEqLiOq/L2Bbiaxs=", + "zh:054b8dd49f0549c9a7cc27d159e45327b7b65cf404da5e5a20da154b90b8a644", + "zh:0b97bf8d5e03d15d83cc40b0530a1f84b459354939ba6f135a0086c20ebbe6b2", + "zh:1589a2266af699cbd5d80737a0fe02e54ec9cf2ca54e7e00ac51c7359056f274", + "zh:6330766f1d85f01ae6ea90d1b214b8b74cc8c1badc4696b165b36ddd4cc15f7b", + "zh:7c8c2e30d8e55291b86fcb64bdf6c25489d538688545eb48fd74ad622e5d3862", + "zh:99b1003bd9bd32ee323544da897148f46a527f622dc3971af63ea3e251596342", + "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425", + "zh:9f8b909d3ec50ade83c8062290378b1ec553edef6a447c56dadc01a99f4eaa93", + "zh:aaef921ff9aabaf8b1869a86d692ebd24fbd4e12c21205034bb679b9caf883a2", + "zh:ac882313207aba00dd5a76dbd572a0ddc818bb9cbf5c9d61b28fe30efaec951e", + "zh:bb64e8aff37becab373a1a0cc1080990785304141af42ed6aa3dd4913b000421", + "zh:dfe495f6621df5540d9c92ad40b8067376350b005c637ea6efac5dc15028add4", + "zh:f0ddf0eaf052766cfe09dea8200a946519f653c384ab4336e2a4a64fdd6310e9", + "zh:f1b7e684f4c7ae1eed272b6de7d2049bb87a0275cb04dbb7cda6636f600699c9", + "zh:ff461571e3f233699bf690db319dfe46aec75e58726636a0d97dd9ac6e32fb70", + ] +} diff --git a/terraform/main.tf b/terraform/main.tf new file mode 100644 index 0000000..c33e53f --- /dev/null +++ b/terraform/main.tf @@ -0,0 +1,226 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + } +} + +# Configure the AWS Provider +provider "aws" { + region = var.aws_region +} + +variable "aws_region" { + description = "AWS region for all resources." + type = string + default = "ap-south-1" +} + +variable "bucket_name" { + description = "Name of the S3 bucket (must be globally unique across all of AWS)" + type = string + default = "kyc-api-documents-mumbai-123" # Start fresh with a new bucket name +} + +variable "key_name" { + description = "Name of an existing AWS Key Pair to allow SSH access to the EC2 instance" + type = string +} + +variable "db_password" { + description = "Password for the RDS PostgreSQL database" + type = string + sensitive = true +} + +# 1. Create the Private S3 Bucket +resource "aws_s3_bucket" "kyc_bucket" { + bucket = var.bucket_name +} + +# Block all public access to the bucket (Security Best Practice) +resource "aws_s3_bucket_public_access_block" "kyc_bucket_access" { + bucket = aws_s3_bucket.kyc_bucket.id + block_public_acls = true + block_public_policy = true + ignore_public_acls = true + restrict_public_buckets = true +} + +# 2. Create the IAM User for the API +resource "aws_iam_user" "api_user" { + name = "kyc_api_service_account" +} + +# Generate Access Keys for the User +resource "aws_iam_access_key" "api_user_key" { + user = aws_iam_user.api_user.name +} + +# 3. Attach AWS Textract Permissions +resource "aws_iam_user_policy_attachment" "textract_access" { + user = aws_iam_user.api_user.name + policy_arn = "arn:aws:iam::aws:policy/AmazonTextractFullAccess" +} + +# 4. Create and Attach Scoped S3 Permissions +resource "aws_iam_user_policy" "s3_access" { + name = "kyc_api_s3_access" + user = aws_iam_user.api_user.name + + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Action = [ + "s3:PutObject", + "s3:GetObject", + "s3:DeleteObject", + "s3:ListBucket" + ] + Effect = "Allow" + Resource = [ + aws_s3_bucket.kyc_bucket.arn, + "${aws_s3_bucket.kyc_bucket.arn}/*" + ] + } + ] + }) +} + +# 5. Get the Default VPC +data "aws_vpc" "default" { + default = true +} + +# 6. Create Security Groups +resource "aws_security_group" "ec2_sg" { + name = "kyc_api_ec2_sg" + description = "Allow HTTP, HTTPS, and SSH inbound traffic" + vpc_id = data.aws_vpc.default.id + + ingress { + description = "HTTP" + from_port = 80 + to_port = 80 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + description = "HTTPS" + from_port = 443 + to_port = 443 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + description = "SSH" + from_port = 22 + to_port = 22 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] # In production, consider restricting to your own IP! + } + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } +} + +resource "aws_security_group" "rds_sg" { + name = "kyc_api_rds_sg" + description = "Allow PostgreSQL traffic only from the EC2 instance" + vpc_id = data.aws_vpc.default.id + + ingress { + description = "PostgreSQL from EC2" + from_port = 5432 + to_port = 5432 + protocol = "tcp" + security_groups = [aws_security_group.ec2_sg.id] + } + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } +} + +# 7. Find the latest Ubuntu 22.04 LTS AMI +data "aws_ami" "ubuntu" { + most_recent = true + owners = ["099720109477"] # Canonical + + filter { + name = "name" + values = ["ubuntu/images/hvm-ssd/ubuntu-jammy-22.04-amd64-server-*"] + } + + filter { + name = "virtualization-type" + values = ["hvm"] + } +} + +# 8. Create the EC2 Instance +resource "aws_instance" "app_server" { + ami = data.aws_ami.ubuntu.id + instance_type = "t2.micro" # Free-tier eligible + key_name = var.key_name + vpc_security_group_ids = [aws_security_group.ec2_sg.id] + + tags = { + Name = "KYC-API-Server" + } +} + +# 9. Create the RDS PostgreSQL Database +resource "aws_db_instance" "postgres_db" { + identifier = "kyc-api-db" + allocated_storage = 20 + engine = "postgres" + engine_version = "14" + instance_class = "db.t3.micro" # Free-tier eligible + db_name = "kyc_db" + username = "postgres" + password = var.db_password + parameter_group_name = "default.postgres14" + skip_final_snapshot = true # Allows terraform destroy without waiting for backups + vpc_security_group_ids = [aws_security_group.rds_sg.id] + publicly_accessible = false # Keeps database off the public internet! +} + +# 10. Output variables for easy access +output "AWS_REGION" { + value = var.aws_region +} + +output "S3_BUCKET_NAME" { + value = aws_s3_bucket.kyc_bucket.id +} + +output "AWS_ACCESS_KEY_ID" { + value = aws_iam_access_key.api_user_key.id +} + +output "AWS_SECRET_ACCESS_KEY" { + value = aws_iam_access_key.api_user_key.secret + sensitive = true +} + +output "EC2_PUBLIC_IP" { + value = aws_instance.app_server.public_ip +} + +output "RDS_DATABASE_URL" { + value = "postgresql://postgres:${var.db_password}@${aws_db_instance.postgres_db.endpoint}/kyc_db" + sensitive = true +} \ No newline at end of file