Save on NAT by switching to IPv6 in 4 simple steps

2023-07-31

Did you know that each NAT gateway costs you at least $360 per year, in addition to data transfer and public IPv4 fees?

The good news is that you can save money by switching your private subnets to free Egress-only Internet Gateways (EIGW).

Limitations

There are a caveats though

The first limitation is not an issue with a proper network architecture.

The second might be problematic if you need to debug or operate EC2 instances.

The last limitation can be a real problem. Ensure all external resources you make requests to support IPv6 (e.g. GitHub doesn’t).

dig gitlab.com AAAA +short
# 2606:4700:90:0:f22e:fbec:5bed:a9b9

Architecture

As an example, let's consider a simple architecture with a single EC2 instance fronted by a load balancer.

AWS Architecture diagram "before"

You can implement this architecture as code with Terraform definitions.

main.tf
terraform {
  required_version = "1.5.4"
 
  required_providers {
    aws = {
      source  = "hashicorp/aws"
      version = "5.10"
    }
  }
}
 
provider "aws" {
  region = "us-west-2"
}
vpc.tf
locals {
  cidr_block         = "10.10.0.0/16"
  availability_zones = ["us-west-2a", "us-west-2b", "us-west-2c"]
  public_subnets     = ["10.10.0.0/20", "10.10.16.0/20", "10.10.32.0/20"]
  private_subnets    = ["10.10.128.0/20", "10.10.144.0/20", "10.10.160.0/20"]
}
 
resource "aws_vpc" "demo" {
  cidr_block           = local.cidr_block
  enable_dns_support   = true
  enable_dns_hostnames = true
 
  tags = {
    Name = "demo-vpc"
  }
}
 
resource "aws_subnet" "public_subnets" {
  count = length(local.availability_zones)
 
  vpc_id            = aws_vpc.demo.id
  availability_zone = element(local.availability_zones, count.index)
  cidr_block        = element(local.public_subnets, count.index)
 
  tags = {
    Name = "demo-subnet-public${count.index + 1}-${element(local.availability_zones, count.index)}"
  }
}
 
resource "aws_subnet" "private_subnets" {
  count = length(local.availability_zones)
 
  vpc_id            = aws_vpc.demo.id
  availability_zone = element(local.availability_zones, count.index)
  cidr_block        = element(local.private_subnets, count.index)
 
  tags = {
    Name = "demo-subnet-private${count.index + 1}-${element(local.availability_zones, count.index)}"
  }
}
 
resource "aws_internet_gateway" "demo" {
  vpc_id = aws_vpc.demo.id
 
  tags = {
    Name = "demo-igw"
  }
}
 
resource "aws_eip" "demo" {
  count = length(local.availability_zones)
 
  tags = {
    Name = "demo-nat-eip${count.index + 1}-${element(local.availability_zones, count.index)}"
  }
}
 
resource "aws_nat_gateway" "demo" {
  count = length(local.availability_zones)
 
  subnet_id     = element(aws_subnet.public_subnets.*.id, count.index)
  allocation_id = element(aws_eip.demo.*.id, count.index)
 
  depends_on = [aws_internet_gateway.demo]
 
  tags = {
    Name = "demo-nat-public${count.index + 1}-${element(local.availability_zones, count.index)}"
  }
}
 
resource "aws_route_table" "public_rtb" {
  vpc_id = aws_vpc.demo.id
 
  route {
    cidr_block = "0.0.0.0/0"
    gateway_id = aws_internet_gateway.demo.id
  }
 
  tags = {
    Name = "demo-rtb-public"
  }
}
 
resource "aws_route_table" "private_rtb" {
  count = length(local.availability_zones)
 
  vpc_id = aws_vpc.demo.id
 
  route {
    cidr_block     = "0.0.0.0/0"
    nat_gateway_id = element(aws_nat_gateway.demo.*.id, count.index)
  }
 
  tags = {
    Name = "demo-rtb-private${count.index + 1}-${element(local.availability_zones, count.index)}"
  }
}
 
resource "aws_route_table_association" "public_rtba" {
  count = length(local.availability_zones)
 
  subnet_id      = element(aws_subnet.public_subnets.*.id, count.index)
  route_table_id = aws_route_table.public_rtb.id
}
 
resource "aws_route_table_association" "private_rtba" {
  count = length(local.availability_zones)
 
  subnet_id      = element(aws_subnet.private_subnets.*.id, count.index)
  route_table_id = element(aws_route_table.private_rtb.*.id, count.index)
}
security_groups.tf
resource "aws_security_group" "demo_lb" {
  name        = "demo-lb"
  description = "Allow external connections"
  vpc_id      = aws_vpc.demo.id
}
 
resource "aws_vpc_security_group_ingress_rule" "demo_lb_ingress" {
  security_group_id = aws_security_group.demo_lb.id
 
  description = "Allow public traffic on port 80"
  from_port   = 80
  to_port     = 80
  ip_protocol = "tcp"
  cidr_ipv4   = "0.0.0.0/0"
}
 
resource "aws_vpc_security_group_egress_rule" "demo_lb_egress" {
  security_group_id = aws_security_group.demo_lb.id
 
  description = "Allow access all resources"
  ip_protocol = "-1"
  cidr_ipv4   = "0.0.0.0/0"
}
 
resource "aws_security_group" "demo_nginx" {
  name        = "demo-nginx"
  description = "Allow accessing other VPC resources and Internet"
  vpc_id      = aws_vpc.demo.id
}
 
resource "aws_vpc_security_group_ingress_rule" "demo_nginx_ingress" {
  security_group_id = aws_security_group.demo_nginx.id
 
  description                  = "Allow traffic on port 80 from the LB"
  from_port                    = 80
  to_port                      = 80
  ip_protocol                  = "tcp"
  referenced_security_group_id = aws_security_group.demo_lb.id
}
 
resource "aws_vpc_security_group_egress_rule" "demo_nginx_egress" {
  security_group_id = aws_security_group.demo_nginx.id
 
  description = "Allow access all resources"
  ip_protocol = "-1"
  cidr_ipv4   = "0.0.0.0/0"
}
ec2.tf
locals {
  ec2_public_key = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGLOy2T25dmVF9MNjgWSpHSUxAjVz3KUdOcTGy/AJG6D semyon@pisarev.us"
}
 
data "aws_iam_policy_document" "ec2_assume_role_policy" {
  statement {
    actions = ["sts:AssumeRole"]
 
    principals {
      type        = "Service"
      identifiers = ["ec2.amazonaws.com"]
    }
  }
}
 
data "aws_ami" "demo_al2" {
  most_recent = true
  owners      = ["amazon"]
 
  filter {
    name   = "name"
    values = ["amzn2-ami-kernel-5.10-*"]
  }
 
  filter {
    name   = "architecture"
    values = ["x86_64"]
  }
 
  filter {
    name   = "root-device-type"
    values = ["ebs"]
  }
 
  filter {
    name   = "virtualization-type"
    values = ["hvm"]
  }
}
 
resource "aws_key_pair" "demo_nginx" {
  key_name   = "demo-nginx"
  public_key = local.ec2_public_key
}
 
resource "aws_iam_role" "demo_nginx" {
  name               = "demo-nginx"
  assume_role_policy = data.aws_iam_policy_document.ec2_assume_role_policy.json
}
 
resource "aws_iam_role_policy_attachment" "demo_nginx_ssm" {
  role       = aws_iam_role.demo_nginx.name
  policy_arn = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore"
}
 
resource "aws_iam_instance_profile" "demo_nginx" {
  name = "demo-nginx"
  role = aws_iam_role.demo_nginx.name
}
 
resource "aws_instance" "demo_nginx" {
  ami                         = data.aws_ami.demo_al2.id
  instance_type               = "t3.micro"
  key_name                    = aws_key_pair.demo_nginx.key_name
  vpc_security_group_ids      = [aws_security_group.demo_nginx.id]
  subnet_id                   = element(aws_subnet.private_subnets.*.id, 0)
  iam_instance_profile        = aws_iam_instance_profile.demo_nginx.name
  user_data_replace_on_change = true
 
  user_data = <<-EOF
  #!/bin/bash
 
  amazon-linux-extras install nginx1
  systemctl enable nginx
  echo "IPv4 -> IPv6" > /usr/share/nginx/html/index.html
 
  systemctl start nginx
  EOF
 
  tags = {
    Name = "demo-nginx"
  }
}
elb.tf
resource "aws_lb" "demo" {
  name               = "demo"
  internal           = false
  load_balancer_type = "application"
  security_groups    = [aws_security_group.demo_lb.id]
  subnets            = aws_subnet.public_subnets.*.id
 
  enable_deletion_protection = true
}
 
resource "aws_lb_target_group" "demo_nginx" {
  vpc_id = aws_vpc.demo.id
 
  name            = "demo-nginx"
  port            = 80
  protocol        = "HTTP"
  target_type     = "instance"
 
  health_check {
    matcher = "200"
    path    = "/"
  }
}
 
resource "aws_lb_listener" "demo_http_80" {
  load_balancer_arn = aws_lb.demo.arn
  port              = "80"
  protocol          = "HTTP"
 
  default_action {
    type             = "forward"
    target_group_arn = aws_lb_target_group.demo_nginx.arn
  }
}
 
resource "aws_lb_target_group_attachment" "demo_nginx" {
  target_group_arn = aws_lb_target_group.demo_nginx.arn
  target_id        = aws_instance.demo_nginx.id
  port             = 80
}
outputs.tf
output "demo_lb_dns_name" {
  value = aws_lb.demo.dns_name
}

Switchover

This short guide will show you how to switch your VPC to dual stack and replace NAT gateway with EIGW in 4 simple steps!

Let's capture the "before" state of the IP assignment.

terraform output
# demo_lb_dns_name = "demo-901283481.us-west-2.elb.amazonaws.com"
 
dig demo-901283481.us-west-2.elb.amazonaws.com A demo-901283481.us-west-2.elb.amazonaws.com AAAA +short
# 35.82.235.126
# 54.71.26.163
 
curl http://demo-901283481.us-west-2.elb.amazonaws.com
# IPv4 -> IPv6
  1. Update the VPC and subnets to support dualstack. Amazon will assign an IPv6 CIDR block for us, all we need is to allocate subnet blocks and enable IPv6 support.

    # vpc.tf
    locals {
      # ...
    +
    + public_subnet_ipv6_prefixes  = [0, 1, 2]
    + private_subnet_ipv6_prefixes = [3, 4, 5]
    }
     
    resource "aws_vpc" "demo" {
      # ...
     
    + assign_generated_ipv6_cidr_block = true
    }
     
    resource "aws_subnet" "public_subnets" {
      count = length(local.availability_zones)
      vpc_id            = aws_vpc.demo.id
      availability_zone = element(local.availability_zones, count.index)
      cidr_block        = element(local.public_subnets, count.index)
    + ipv6_cidr_block   = cidrsubnet(aws_vpc.demo.ipv6_cidr_block, 8, local.public_subnet_ipv6_prefixes[count.index])
      
      # ...
    }
     
    resource "aws_subnet" "private_subnets" {
      count = length(local.availability_zones)
      vpc_id            = aws_vpc.demo.id
      availability_zone = element(local.availability_zones, count.index)
      cidr_block        = element(local.private_subnets, count.index)
    + ipv6_cidr_block   = cidrsubnet(aws_vpc.demo.ipv6_cidr_block, 8, local.private_subnet_ipv6_prefixes[count.index])
     
      # ...
    }
  2. Add Egress-only Internet Gateway, update routing tables and security groups to handle IPv6 traffic.

    # vpc.tf
     
    + resource "aws_egress_only_internet_gateway" "demo" {
    +   vpc_id = aws_vpc.demo.id
    + }
     
    resource "aws_route_table" "public_rtb" {
      vpc_id = aws_vpc.demo.id
      route {
        cidr_block = "0.0.0.0/0"
        gateway_id = aws_internet_gateway.demo.id
      }
    +
    + route {
    +   ipv6_cidr_block = "::/0"
    +   gateway_id      = aws_internet_gateway.demo.id
    + }
    }
     
    resource "aws_route_table" "private_rtb" {
      count = length(local.availability_zones)
      vpc_id = aws_vpc.demo.id
      route {
        cidr_block     = "0.0.0.0/0"
        nat_gateway_id = element(aws_nat_gateway.demo.*.id, count.index)
      }
    +
    + route {
    +   ipv6_cidr_block = "::/0"
    +   egress_only_gateway_id = aws_egress_only_internet_gateway.demo.id
    + }
    }
    # security_groups.tf
     
    + resource "aws_vpc_security_group_ingress_rule" "demo_lb_ingress_ipv6" {
    +   security_group_id = aws_security_group.demo_lb.id
    + 
    +   description = "Allow public traffic on port 80"
    +   from_port   = 80
    +   to_port     = 80
    +   ip_protocol = "tcp"
    +   cidr_ipv6   = "::/0"
    + }
     
    + resource "aws_vpc_security_group_egress_rule" "demo_lb_egress_ipv6" {
    +   security_group_id = aws_security_group.demo_lb.id
    + 
    +   description = "Allow access all resources"
    +   ip_protocol = "-1"
    +   cidr_ipv6   = "::/0"
    + }
     
    + resource "aws_vpc_security_group_egress_rule" "demo_nginx_egress_ipv6" {
    +   security_group_id = aws_security_group.demo_nginx.id
    + 
    +   description = "Allow access all resources"
    +   ip_protocol = "-1"
    +   cidr_ipv6   = "::/0"
    + }
  3. Enable dualstack support on the Load Balancer to assign public IPv6 addresses

    # elb.tf
     
    resource "aws_lb" "demo" {
      # ...
    + ip_address_type    = "dualstack"
    }
  4. Remove NAT gateways 🎉

    # vpc.tf
     
    - resource "aws_eip" "demo" {
    -   count = length(local.availability_zones)
     
    -   tags = {
    -     Name = "demo-nat-eip${count.index + 1}-${element(local.availability_zones, count.index)}"
    -   }
    - }
     
    - resource "aws_nat_gateway" "demo" {
    -   count = length(local.availability_zones)
     
    -   subnet_id     = element(aws_subnet.public_subnets.*.id, count.index)
    -   allocation_id = element(aws_eip.demo.*.id, count.index)
     
    -   depends_on = [aws_internet_gateway.demo]
     
    -   tags = {
    -     Name = "demo-nat-public${count.index + 1}-${element(local.availability_zones, count.index)}"
    -   }
    - }
     
    resource "aws_route_table" "private_rtb" {
      count = length(local.availability_zones)
      vpc_id = aws_vpc.demo.id
     
    - route {
    -   cidr_block     = "0.0.0.0/0"
    -   nat_gateway_id = element(aws_nat_gateway.demo.*.id, count.index)
    - }
     
      # ...
    }

Results

We no longer use NAT gateways and the architecture looks the following way.

AWS Architecture diagram "after"

And IPs assignment

dig demo-901283481.us-west-2.elb.amazonaws.com A demo-901283481.us-west-2.elb.amazonaws.com AAAA +short
# 54.71.26.163
# 35.82.235.126
# 2600:1f14:2ee9:3202:4575:166b:8cca:a798
# 2600:1f14:2ee9:3200:889a:35ea:8bfc:a74d
 
curl http://demo-901283481.us-west-2.elb.amazonaws.com
# IPv4 -> IPv6

Links