Глава 16. DevOps и инфраструктура в Cursor
16.1. Docker и контейнеризация
Создание Docker контейнеров с Cursor
You: Создай полный Docker setup для веб-приложения
AI:# Dockerfile для Node.js приложения
# Multi-stage build для оптимизации размера
# Stage 1: Build
FROM node:18-alpine AS builder
# Set working directory
WORKDIR /app
# Copy package files
COPY package*.json ./
# Install dependencies
RUN npm ci --only=production && \
npm cache clean --force
# Copy source code
COPY . .
# Build application (если используется TypeScript или сборка)
RUN npm run build
# Stage 2: Production
FROM node:18-alpine
# Install dumb-init for proper signal handling
RUN apk add --no-cache dumb-init
# Create app user
RUN addgroup -g 1001 -S nodejs && \
adduser -S nodejs -u 1001
# Set working directory
WORKDIR /app
# Copy built application from builder
COPY --from=builder --chown=nodejs:nodejs /app/dist ./dist
COPY --from=builder --chown=nodejs:nodejs /app/node_modules ./node_modules
COPY --from=builder --chown=nodejs:nodejs /app/package*.json ./
# Switch to non-root user
USER nodejs
# Expose port
EXPOSE 3000
# Health check
HEALTHCHECK --interval=30s --timeout=3s --start-period=40s --retries=3 \
CMD node -e "require('http').get('http://localhost:3000/health', (r) => {process.exit(r.statusCode === 200 ? 0 : 1)})"
# Use dumb-init to handle signals properly
ENTRYPOINT ["dumb-init", "--"]
# Start application
CMD ["node", "dist/server.js"]
# Dockerfile для Python приложения
FROM python:3.11-slim
# Set environment variables
ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
PIP_NO_CACHE_DIR=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1
# Install system dependencies
RUN apt-get update && \
apt-get install -y --no-install-recommends \
gcc \
postgresql-client && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Create app user
RUN useradd -m -u 1000 appuser
# Set working directory
WORKDIR /app
# Copy requirements
COPY requirements.txt .
# Install Python dependencies
RUN pip install --no-cache-dir -r requirements.txt
# Copy application
COPY --chown=appuser:appuser . .
# Switch to non-root user
USER appuser
# Expose port
EXPOSE 8000
# Health check
HEALTHCHECK --interval=30s --timeout=3s --start-period=40s --retries=3 \
CMD python -c "import requests; requests.get('http://localhost:8000/health')"
# Run application
CMD ["gunicorn", "--bind", "0.0.0.0:8000", "--workers", "4", "app:app"]
# docker-compose.yml
# Complete application stack
version: '3.9'
services:
# Frontend
frontend:
build:
context: ./frontend
dockerfile: Dockerfile
ports:
- "3000:3000"
environment:
- NODE_ENV=production
- API_URL=http://backend:8000
depends_on:
- backend
networks:
- app-network
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
# Backend API
backend:
build:
context: ./backend
dockerfile: Dockerfile
ports:
- "8000:8000"
environment:
- DATABASE_URL=postgresql://user:password@postgres:5432/appdb
- REDIS_URL=redis://redis:6379/0
- SECRET_KEY=${SECRET_KEY}
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
networks:
- app-network
volumes:
- ./backend/logs:/app/logs
restart: unless-stopped
# PostgreSQL Database
postgres:
image: postgres:15-alpine
environment:
- POSTGRES_USER=user
- POSTGRES_PASSWORD=password
- POSTGRES_DB=appdb
ports:
- "5432:5432"
volumes:
- postgres-data:/var/lib/postgresql/data
- ./init-scripts:/docker-entrypoint-initdb.d
networks:
- app-network
restart: unless-stopped
healthcheck:
test: ["CMD-SHELL", "pg_isready -U user"]
interval: 10s
timeout: 5s
retries: 5
# Redis Cache
redis:
image: redis:7-alpine
ports:
- "6379:6379"
volumes:
- redis-data:/data
networks:
- app-network
restart: unless-stopped
command: redis-server --appendonly yes --requirepass ${REDIS_PASSWORD}
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 10s
timeout: 3s
retries: 5
# Nginx Reverse Proxy
nginx:
image: nginx:alpine
ports:
- "80:80"
- "443:443"
volumes:
- ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
- ./nginx/ssl:/etc/nginx/ssl:ro
- ./nginx/logs:/var/log/nginx
depends_on:
- frontend
- backend
networks:
- app-network
restart: unless-stopped
# Monitoring - Prometheus
prometheus:
image: prom/prometheus:latest
ports:
- "9090:9090"
volumes:
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- prometheus-data:/prometheus
networks:
- app-network
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
restart: unless-stopped
# Monitoring - Grafana
grafana:
image: grafana/grafana:latest
ports:
- "3001:3000"
environment:
- GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD}
- GF_INSTALL_PLUGINS=grafana-piechart-panel
volumes:
- grafana-data:/var/lib/grafana
- ./grafana/dashboards:/etc/grafana/provisioning/dashboards:ro
- ./grafana/datasources:/etc/grafana/provisioning/datasources:ro
depends_on:
- prometheus
networks:
- app-network
restart: unless-stopped
networks:
app-network:
driver: bridge
volumes:
postgres-data:
redis-data:
prometheus-data:
grafana-data:
# nginx/nginx.conf
user nginx;
worker_processes auto;
error_log /var/log/nginx/error.log warn;
pid /var/run/nginx.pid;
events {
worker_connections 1024;
}
http {
include /etc/nginx/mime.types;
default_type application/octet-stream;
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for"';
access_log /var/log/nginx/access.log main;
sendfile on;
tcp_nopush on;
tcp_nodelay on;
keepalive_timeout 65;
types_hash_max_size 2048;
# Gzip compression
gzip on;
gzip_vary on;
gzip_proxied any;
gzip_comp_level 6;
gzip_types text/plain text/css text/xml text/javascript
application/json application/javascript application/xml+rss;
# Rate limiting
limit_req_zone $binary_remote_addr zone=api_limit:10m rate=10r/s;
# Upstream backends
upstream frontend {
least_conn;
server frontend:3000 max_fails=3 fail_timeout=30s;
}
upstream backend {
least_conn;
server backend:8000 max_fails=3 fail_timeout=30s;
}
# HTTP server
server {
listen 80;
server_name example.com www.example.com;
# Redirect to HTTPS
return 301 https://$server_name$request_uri;
}
# HTTPS server
server {
listen 443 ssl http2;
server_name example.com www.example.com;
# SSL configuration
ssl_certificate /etc/nginx/ssl/cert.pem;
ssl_certificate_key /etc/nginx/ssl/key.pem;
ssl_protocols TLSv1.2 TLSv1.3;
ssl_ciphers HIGH:!aNULL:!MD5;
ssl_prefer_server_ciphers on;
# Security headers
add_header X-Frame-Options "SAMEORIGIN" always;
add_header X-Content-Type-Options "nosniff" always;
add_header X-XSS-Protection "1; mode=block" always;
add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;
# Frontend
location / {
proxy_pass http://frontend;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection 'upgrade';
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_cache_bypass $http_upgrade;
}
# Backend API
location /api/ {
limit_req zone=api_limit burst=20 nodelay;
proxy_pass http://backend/;
proxy_http_version 1.1;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
# Timeouts
proxy_connect_timeout 60s;
proxy_send_timeout 60s;
proxy_read_timeout 60s;
}
# Health check endpoint
location /health {
access_log off;
return 200 "healthy\n";
add_header Content-Type text/plain;
}
# Static files with caching
location ~* \.(jpg|jpeg|png|gif|ico|css|js|svg|woff|woff2)$ {
expires 1y;
add_header Cache-Control "public, immutable";
proxy_pass http://frontend;
}
}
}
# deploy.sh
#!/bin/bash
set -e
echo "================================"
echo "Deployment Script"
echo "================================"
# Load environment variables
if [ -f .env ]; then
export $(cat .env | grep -v '^#' | xargs)
fi
# Build images
echo "Building Docker images..."
docker-compose build --no-cache
# Run database migrations
echo "Running database migrations..."
docker-compose run --rm backend python manage.py migrate
# Start services
echo "Starting services..."
docker-compose up -d
# Wait for services to be healthy
echo "Waiting for services to be ready..."
sleep 10
# Check health
echo "Checking service health..."
docker-compose ps
# Run health checks
for service in frontend backend postgres redis; do
echo "Checking $service..."
if docker-compose exec -T $service echo "OK" > /dev/null 2>&1; then
echo "✓ $service is running"
else
echo "✗ $service failed to start"
exit 1
fi
done
echo "================================"
echo "Deployment completed successfully!"
echo "================================"
# Show logs
docker-compose logs --tail=50
# Makefile
.PHONY: help build start stop restart logs clean test
help:
@echo "Available commands:"
@echo " make build - Build Docker images"
@echo " make start - Start all services"
@echo " make stop - Stop all services"
@echo " make restart - Restart all services"
@echo " make logs - Show logs"
@echo " make clean - Remove containers and volumes"
@echo " make test - Run tests"
build:
docker-compose build
start:
docker-compose up -d
@echo "Services started. Access at http://localhost"
stop:
docker-compose down
restart: stop start
logs:
docker-compose logs -f
clean:
docker-compose down -v
docker system prune -f
test:
docker-compose run --rm backend pytest
docker-compose run --rm frontend npm test
16.2. Kubernetes и оркестрация
You: Создай Kubernetes манифесты для деплоя приложения
AI:# kubernetes/namespace.yaml
apiVersion: v1
kind: Namespace
metadata:
name: production
labels:
name: production
environment: production
# kubernetes/deployment-backend.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: backend
namespace: production
labels:
app: backend
tier: api
spec:
replicas: 3
strategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 1
maxUnavailable: 0
selector:
matchLabels:
app: backend
template:
metadata:
labels:
app: backend
tier: api
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8000"
prometheus.io/path: "/metrics"
spec:
serviceAccountName: backend-sa
securityContext:
runAsNonRoot: true
runAsUser: 1000
fsGroup: 1000
containers:
- name: backend
image: myregistry/backend:latest
imagePullPolicy: Always
ports:
- containerPort: 8000
name: http
protocol: TCP
env:
- name: DATABASE_URL
valueFrom:
secretKeyRef:
name: database-secret
key: url
- name: REDIS_URL
valueFrom:
configMapKeyRef:
name: app-config
key: redis_url
- name: LOG_LEVEL
value: "INFO"
resources:
requests:
cpu: "100m"
memory: "256Mi"
limits:
cpu: "500m"
memory: "512Mi"
livenessProbe:
httpGet:
path: /health
port: 8000
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 3
readinessProbe:
httpGet:
path: /ready
port: 8000
initialDelaySeconds: 10
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 3
volumeMounts:
- name: config
mountPath: /app/config
readOnly: true
- name: logs
mountPath: /app/logs
volumes:
- name: config
configMap:
name: app-config
- name: logs
emptyDir: {}
# kubernetes/service.yaml
apiVersion: v1
kind: Service
metadata:
name: backend-service
namespace: production
labels:
app: backend
spec:
type: ClusterIP
ports:
- port: 80
targetPort: 8000
protocol: TCP
name: http
selector:
app: backend
---
apiVersion: v1
kind: Service
metadata:
name: frontend-service
namespace: production
spec:
type: LoadBalancer
ports:
- port: 80
targetPort: 3000
protocol: TCP
selector:
app: frontend
# kubernetes/ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: app-ingress
namespace: production
annotations:
kubernetes.io/ingress.class: "nginx"
cert-manager.io/cluster-issuer: "letsencrypt-prod"
nginx.ingress.kubernetes.io/rate-limit: "100"
nginx.ingress.kubernetes.io/ssl-redirect: "true"
spec:
tls:
- hosts:
- app.example.com
- api.example.com
secretName: app-tls-secret
rules:
- host: app.example.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: frontend-service
port:
number: 80
- host: api.example.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: backend-service
port:
number: 80
# kubernetes/hpa.yaml
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: backend-hpa
namespace: production
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: backend
minReplicas: 3
maxReplicas: 10
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
behavior:
scaleDown:
stabilizationWindowSeconds: 300
policies:
- type: Percent
value: 50
periodSeconds: 60
scaleUp:
stabilizationWindowSeconds: 0
policies:
- type: Percent
value: 100
periodSeconds: 15
- type: Pods
value: 4
periodSeconds: 15
selectPolicy: Max
# kubernetes/configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: app-config
namespace: production
data:
redis_url: "redis://redis-service:6379"
log_level: "INFO"
max_connections: "100"
app_config.yaml: |
server:
port: 8000
host: 0.0.0.0
database:
pool_size: 20
max_overflow: 10
cache:
ttl: 3600
# kubernetes/secret.yaml
apiVersion: v1
kind: Secret
metadata:
name: database-secret
namespace: production
type: Opaque
stringData:
url: "postgresql://user:password@postgres:5432/appdb"
password: "secure-password-here"
# kubernetes/statefulset-postgres.yaml
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: postgres
namespace: production
spec:
serviceName: postgres-service
replicas: 1
selector:
matchLabels:
app: postgres
template:
metadata:
labels:
app: postgres
spec:
containers:
- name: postgres
image: postgres:15
ports:
- containerPort: 5432
name: postgres
env:
- name: POSTGRES_DB
value: "appdb"
- name: POSTGRES_USER
valueFrom:
secretKeyRef:
name: database-secret
key: user
- name: POSTGRES_PASSWORD
valueFrom:
secretKeyRef:
name: database-secret
key: password
- name: PGDATA
value: /var/lib/postgresql/data/pgdata
volumeMounts:
- name: postgres-storage
mountPath: /var/lib/postgresql/data
resources:
requests:
cpu: "500m"
memory: "1Gi"
limits:
cpu: "1000m"
memory: "2Gi"
volumeClaimTemplates:
- metadata:
name: postgres-storage
spec:
accessModes: ["ReadWriteOnce"]
storageClassName: "fast-ssd"
resources:
requests:
storage: 10Gi
# deploy-k8s.sh
#!/bin/bash
set -e
echo "Deploying to Kubernetes..."
# Apply namespace
kubectl apply -f kubernetes/namespace.yaml
# Apply ConfigMaps and Secrets
kubectl apply -f kubernetes/configmap.yaml
kubectl apply -f kubernetes/secret.yaml
# Apply StatefulSets
kubectl apply -f kubernetes/statefulset-postgres.yaml
# Wait for StatefulSets to be ready
kubectl wait --for=condition=ready pod -l app=postgres -n production --timeout=300s
# Apply Deployments
kubectl apply -f kubernetes/deployment-backend.yaml
kubectl apply -f kubernetes/deployment-frontend.yaml
# Apply Services
kubectl apply -f kubernetes/service.yaml
# Apply Ingress
kubectl apply -f kubernetes/ingress.yaml
# Apply HPA
kubectl apply -f kubernetes/hpa.yaml
# Wait for deployments
kubectl rollout status deployment/backend -n production
kubectl rollout status deployment/frontend -n production
echo "Deployment completed!"
echo "Checking pod status..."
kubectl get pods -n production
echo "Checking services..."
kubectl get svc -n production
echo "Checking ingress..."
kubectl get ingress -n production
16.3. Infrastructure as Code
Terraform для облачной инфраструктуры
You: Создай Terraform конфигурацию для AWS инфраструктуры
AI:# terraform/main.tf
terraform {
required_version = ">= 1.0"
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 5.0"
}
}
backend "s3" {
bucket = "my-terraform-state"
key = "production/terraform.tfstate"
region = "us-east-1"
encrypt = true
dynamodb_table = "terraform-locks"
}
}
provider "aws" {
region = var.aws_region
default_tags {
tags = {
Environment = var.environment
Project = var.project_name
ManagedBy = "Terraform"
}
}
}
# terraform/variables.tf
variable "aws_region" {
description = "AWS region"
type = string
default = "us-east-1"
}
variable "environment" {
description = "Environment name"
type = string
default = "production"
}
variable "project_name" {
description = "Project name"
type = string
default = "my-app"
}
variable "vpc_cidr" {
description = "VPC CIDR block"
type = string
default = "10.0.0.0/16"
}
variable "availability_zones" {
description = "Availability zones"
type = list(string)
default = ["us-east-1a", "us-east-1b", "us-east-1c"]
}
variable "db_instance_class" {
description = "RDS instance class"
type = string
default = "db.t3.medium"
}
variable "db_password" {
description = "Database password"
type = string
sensitive = true
}
# terraform/vpc.tf
module "vpc" {
source = "terraform-aws-modules/vpc/aws"
version = "~> 5.0"
name = "${var.project_name}-${var.environment}-vpc"
cidr = var.vpc_cidr
azs = var.availability_zones
private_subnets = ["10.0.1.0/24", "10.0.2.0/24", "10.0.3.0/24"]
public_subnets = ["10.0.101.0/24", "10.0.102.0/24", "10.0.103.0/24"]
database_subnets = ["10.0.201.0/24", "10.0.202.0/24", "10.0.203.0/24"]
enable_nat_gateway = true
single_nat_gateway = false
enable_dns_hostnames = true
enable_dns_support = true
enable_vpn_gateway = false
tags = {
Name = "${var.project_name}-${var.environment}-vpc"
}
}
# terraform/security-groups.tf
resource "aws_security_group" "alb" {
name_prefix = "${var.project_name}-alb-"
description = "Security group for Application Load Balancer"
vpc_id = module.vpc.vpc_id
ingress {
from_port = 80
to_port = 80
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
description = "HTTP from anywhere"
}
ingress {
from_port = 443
to_port = 443
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
description = "HTTPS from anywhere"
}
egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
description = "Allow all outbound traffic"
}
tags = {
Name = "${var.project_name}-alb-sg"
}
}
resource "aws_security_group" "ecs_tasks" {
name_prefix = "${var.project_name}-ecs-tasks-"
description = "Security group for ECS tasks"
vpc_id = module.vpc.vpc_id
ingress {
from_port = 8000
to_port = 8000
protocol = "tcp"
security_groups = [aws_security_group.alb.id]
description = "Allow traffic from ALB"
}
egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
description = "Allow all outbound traffic"
}
tags = {
Name = "${var.project_name}-ecs-tasks-sg"
}
}
resource "aws_security_group" "rds" {
name_prefix = "${var.project_name}-rds-"
description = "Security group for RDS instance"
vpc_id = module.vpc.vpc_id
ingress {
from_port = 5432
to_port = 5432
protocol = "tcp"
security_groups = [aws_security_group.ecs_tasks.id]
description = "PostgreSQL from ECS tasks"
}
egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
tags = {
Name = "${var.project_name}-rds-sg"
}
}
# terraform/rds.tf
resource "aws_db_subnet_group" "main" {
name = "${var.project_name}-${var.environment}-db-subnet"
subnet_ids = module.vpc.database_subnets
tags = {
Name = "${var.project_name}-db-subnet-group"
}
}
resource "aws_db_instance" "main" {
identifier = "${var.project_name}-${var.environment}-db"
engine = "postgres"
engine_version = "15.3"
instance_class = var.db_instance_class
allocated_storage = 100
max_allocated_storage = 1000
storage_type = "gp3"
storage_encrypted = true
db_name = "appdb"
username = "dbadmin"
password = var.db_password
multi_az = true
db_subnet_group_name = aws_db_subnet_group.main.name
vpc_security_group_ids = [aws_security_group.rds.id]
backup_retention_period = 7
backup_window = "03:00-04:00"
maintenance_window = "Mon:04:00-Mon:05:00"
enabled_cloudwatch_logs_exports = ["postgresql", "upgrade"]
performance_insights_enabled = true
monitoring_interval = 60
monitoring_role_arn = aws_iam_role.rds_monitoring.arn
deletion_protection = true
skip_final_snapshot = false
final_snapshot_identifier = "${var.project_name}-final-snapshot-${formatdate("YYYY-MM-DD-hhmm", timestamp())}"
tags = {
Name = "${var.project_name}-database"
}
}
# terraform/ecs.tf
resource "aws_ecs_cluster" "main" {
name = "${var.project_name}-${var.environment}-cluster"
setting {
name = "containerInsights"
value = "enabled"
}
tags = {
Name = "${var.project_name}-ecs-cluster"
}
}
resource "aws_ecs_cluster_capacity_providers" "main" {
cluster_name = aws_ecs_cluster.main.name
capacity_providers = ["FARGATE", "FARGATE_SPOT"]
default_capacity_provider_strategy {
base = 1
weight = 100
capacity_provider = "FARGATE"
}
}
resource "aws_ecs_task_definition" "backend" {
family = "${var.project_name}-backend"
network_mode = "awsvpc"
requires_compatibilities = ["FARGATE"]
cpu = "512"
memory = "1024"
execution_role_arn = aws_iam_role.ecs_execution.arn
task_role_arn = aws_iam_role.ecs_task.arn
container_definitions = jsonencode([
{
name = "backend"
image = "${aws_ecr_repository.backend.repository_url}:latest"
portMappings = [
{
containerPort = 8000
protocol = "tcp"
}
]
environment = [
{
name = "ENVIRONMENT"
value = var.environment
},
{
name = "DATABASE_HOST"
value = aws_db_instance.main.address
}
]
secrets = [
{
name = "DATABASE_PASSWORD"
valueFrom = aws_secretsmanager_secret.db_password.arn
}
]
logConfiguration = {
logDriver = "awslogs"
options = {
"awslogs-group" = aws_cloudwatch_log_group.backend.name
"awslogs-region" = var.aws_region
"awslogs-stream-prefix" = "backend"
}
}
healthCheck = {
command = ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"]
interval = 30
timeout = 5
retries = 3
startPeriod = 60
}
}
])
tags = {
Name = "${var.project_name}-backend-task"
}
}
resource "aws_ecs_service" "backend" {
name = "${var.project_name}-backend-service"
cluster = aws_ecs_cluster.main.id
task_definition = aws_ecs_task_definition.backend.arn
desired_count = 3
launch_type = "FARGATE"
network_configuration {
subnets = module.vpc.private_subnets
security_groups = [aws_security_group.ecs_tasks.id]
assign_public_ip = false
}
load_balancer {
target_group_arn = aws_lb_target_group.backend.arn
container_name = "backend"
container_port = 8000
}
deployment_configuration {
maximum_percent = 200
minimum_healthy_percent = 100
}
deployment_circuit_breaker {
enable = true
rollback = true
}
tags = {
Name = "${var.project_name}-backend-service"
}
}
# terraform/alb.tf
resource "aws_lb" "main" {
name = "${var.project_name}-${var.environment}-alb"
internal = false
load_balancer_type = "application"
security_groups = [aws_security_group.alb.id]
subnets = module.vpc.public_subnets
enable_deletion_protection = true
enable_http2 = true
enable_cross_zone_load_balancing = true
access_logs {
bucket = aws_s3_bucket.alb_logs.id
enabled = true
}
tags = {
Name = "${var.project_name}-alb"
}
}
resource "aws_lb_target_group" "backend" {
name = "${var.project_name}-backend-tg"
port = 8000
protocol = "HTTP"
vpc_id = module.vpc.vpc_id
target_type = "ip"
health_check {
enabled = true
healthy_threshold = 2
interval = 30
matcher = "200"
path = "/health"
port = "traffic-port"
protocol = "HTTP"
timeout = 5
unhealthy_threshold = 3
}
deregistration_delay = 30
tags = {
Name = "${var.project_name}-backend-tg"
}
}
resource "aws_lb_listener" "https" {
load_balancer_arn = aws_lb.main.arn
port = "443"
protocol = "HTTPS"
ssl_policy = "ELBSecurityPolicy-TLS-1-2-2017-01"
certificate_arn = aws_acm_certificate.main.arn
default_action {
type = "forward"
target_group_arn = aws_lb_target_group.backend.arn
}
}
resource "aws_lb_listener" "http" {
load_balancer_arn = aws_lb.main.arn
port = "80"
protocol = "HTTP"
default_action {
type = "redirect"
redirect {
port = "443"
protocol = "HTTPS"
status_code = "HTTP_301"
}
}
}
# terraform/autoscaling.tf
resource "aws_appautoscaling_target" "ecs_target" {
max_capacity = 10
min_capacity = 3
resource_id = "service/${aws_ecs_cluster.main.name}/${aws_ecs_service.backend.name}"
scalable_dimension = "ecs:service:DesiredCount"
service_namespace = "ecs"
}
resource "aws_appautoscaling_policy" "ecs_cpu" {
name = "${var.project_name}-cpu-scaling"
policy_type = "TargetTrackingScaling"
resource_id = aws_appautoscaling_target.ecs_target.resource_id
scalable_dimension = aws_appautoscaling_target.ecs_target.scalable_dimension
service_namespace = aws_appautoscaling_target.ecs_target.service_namespace
target_tracking_scaling_policy_configuration {
predefined_metric_specification {
predefined_metric_type = "ECSServiceAverageCPUUtilization"
}
target_value = 70.0
scale_in_cooldown = 300
scale_out_cooldown = 60
}
}
resource "aws_appautoscaling_policy" "ecs_memory" {
name = "${var.project_name}-memory-scaling"
policy_type = "TargetTrackingScaling"
resource_id = aws_appautoscaling_target.ecs_target.resource_id
scalable_dimension = aws_appautoscaling_target.ecs_target.scalable_dimension
service_namespace = aws_appautoscaling_target.ecs_target.service_namespace
target_tracking_scaling_policy_configuration {
predefined_metric_specification {
predefined_metric_type = "ECSServiceAverageMemoryUtilization"
}
target_value = 80.0
scale_in_cooldown = 300
scale_out_cooldown = 60
}
}
# terraform/cloudwatch.tf
resource "aws_cloudwatch_log_group" "backend" {
name = "/ecs/${var.project_name}/backend"
retention_in_days = 30
tags = {
Name = "${var.project_name}-backend-logs"
}
}
resource "aws_cloudwatch_metric_alarm" "ecs_cpu_high" {
alarm_name = "${var.project_name}-ecs-cpu-high"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = "2"
metric_name = "CPUUtilization"
namespace = "AWS/ECS"
period = "300"
statistic = "Average"
threshold = "80"
alarm_description = "This metric monitors ECS CPU utilization"
alarm_actions = [aws_sns_topic.alerts.arn]
dimensions = {
ClusterName = aws_ecs_cluster.main.name
ServiceName = aws_ecs_service.backend.name
}
}
resource "aws_cloudwatch_metric_alarm" "rds_cpu_high" {
alarm_name = "${var.project_name}-rds-cpu-high"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = "2"
metric_name = "CPUUtilization"
namespace = "AWS/RDS"
period = "300"
statistic = "Average"
threshold = "80"
alarm_description = "RDS CPU utilization is too high"
alarm_actions = [aws_sns_topic.alerts.arn]
dimensions = {
DBInstanceIdentifier = aws_db_instance.main.id
}
}
# terraform/outputs.tf
output "alb_dns_name" {
description = "DNS name of the load balancer"
value = aws_lb.main.dns_name
}
output "ecs_cluster_name" {
description = "Name of the ECS cluster"
value = aws_ecs_cluster.main.name
}
output "rds_endpoint" {
description = "RDS instance endpoint"
value = aws_db_instance.main.endpoint
sensitive = true
}
output "ecr_repository_url" {
description = "URL of the ECR repository"
value = aws_ecr_repository.backend.repository_url
}
16.4. Скрипты автоматизации
Ansible для конфигурации серверов
You: Создай Ansible playbooks для настройки серверов
AI:# ansible/inventory/production.yml
all:
children:
webservers:
hosts:
web1:
ansible_host: 10.0.1.10
ansible_user: ubuntu
web2:
ansible_host: 10.0.1.11
ansible_user: ubuntu
databases:
hosts:
db1:
ansible_host: 10.0.2.10
ansible_user: ubuntu
postgresql_role: primary
db2:
ansible_host: 10.0.2.11
ansible_user: ubuntu
postgresql_role: replica
loadbalancers:
hosts:
lb1:
ansible_host: 10.0.3.10
ansible_user: ubuntu
vars:
ansible_python_interpreter: /usr/bin/python3
app_name: myapp
app_version: "1.0.0"
environment: production
# ansible/playbook.yml
---
- name: Configure All Servers
hosts: all
become: yes
roles:
- common
- security
- monitoring
- name: Configure Web Servers
hosts: webservers
become: yes
roles:
- nginx
- application
- ssl
- name: Configure Database Servers
hosts: databases
become: yes
roles:
- postgresql
- backup
- name: Configure Load Balancers
hosts: loadbalancers
become: yes
roles:
- haproxy
# ansible/roles/common/tasks/main.yml
---
- name: Update apt cache
apt:
update_cache: yes
cache_valid_time: 3600
- name: Install common packages
apt:
name:
- curl
- wget
- git
- vim
- htop
- net-tools
- ufw
- fail2ban
- unattended-upgrades
state: present
- name: Set timezone
timezone:
name: UTC
- name: Configure NTP
apt:
name: chrony
state: present
notify: restart chrony
- name: Enable NTP service
service:
name: chrony
state: started
enabled: yes
- name: Set hostname
hostname:
name: "{{ inventory_hostname }}"
- name: Configure /etc/hosts
template:
src: hosts.j2
dest: /etc/hosts
owner: root
group: root
mode: '0644'
- name: Create app user
user:
name: "{{ app_user }}"
shell: /bin/bash
create_home: yes
groups: sudo
append: yes
- name: Configure sudo for app user
template:
src: sudoers.j2
dest: /etc/sudoers.d/{{ app_user }}
validate: 'visudo -cf %s'
mode: '0440'
# ansible/roles/security/tasks/main.yml
---
- name: Configure UFW defaults
ufw:
direction: "{{ item.direction }}"
policy: "{{ item.policy }}"
loop:
- { direction: 'incoming', policy: 'deny' }
- { direction: 'outgoing', policy: 'allow' }
- name: Allow SSH
ufw:
rule: allow
port: '22'
proto: tcp
- name: Allow HTTP
ufw:
rule: allow
port: '80'
proto: tcp
when: "'webservers' in group_names"
- name: Allow HTTPS
ufw:
rule: allow
port: '443'
proto: tcp
when: "'webservers' in group_names"
- name: Enable UFW
ufw:
state: enabled
- name: Configure fail2ban
template:
src: jail.local.j2
dest: /etc/fail2ban/jail.local
owner: root
group: root
mode: '0644'
notify: restart fail2ban
- name: Disable root login
lineinfile:
path: /etc/ssh/sshd_config
regexp: '^PermitRootLogin'
line: 'PermitRootLogin no'
notify: restart sshd
- name: Disable password authentication
lineinfile:
path: /etc/ssh/sshd_config
regexp: '^PasswordAuthentication'
line: 'PasswordAuthentication no'
notify: restart sshd
- name: Configure automatic security updates
template:
src: 50unattended-upgrades.j2
dest: /etc/apt/apt.conf.d/50unattended-upgrades
owner: root
group: root
mode: '0644'
# ansible/roles/nginx/tasks/main.yml
---
- name: Install Nginx
apt:
name: nginx
state: present
- name: Create web root directory
file:
path: /var/www/{{ app_name }}
state: directory
owner: www-data
group: www-data
mode: '0755'
- name: Configure Nginx site
template:
src: nginx-site.conf.j2
dest: /etc/nginx/sites-available/{{ app_name }}
owner: root
group: root
mode: '0644'
notify: reload nginx
- name: Enable Nginx site
file:
src: /etc/nginx/sites-available/{{ app_name }}
dest: /etc/nginx/sites-enabled/{{ app_name }}
state: link
notify: reload nginx
- name: Remove default Nginx site
file:
path: /etc/nginx/sites-enabled/default
state: absent
notify: reload nginx
- name: Configure Nginx
template:
src: nginx.conf.j2
dest: /etc/nginx/nginx.conf
owner: root
group: root
mode: '0644'
notify: reload nginx
- name: Start and enable Nginx
service:
name: nginx
state: started
enabled: yes
# ansible/roles/nginx/templates/nginx-site.conf.j2
upstream backend {
least_conn;
{% for host in groups['webservers'] %}
server {{ hostvars[host]['ansible_host'] }}:8000 max_fails=3 fail_timeout=30s;
{% endfor %}
}
# Rate limiting
limit_req_zone $binary_remote_addr zone=api_limit:10m rate=10r/s;
server {
listen 80;
server_name {{ domain_name }};
# Redirect to HTTPS
return 301 https://$server_name$request_uri;
}
server {
listen 443 ssl http2;
server_name {{ domain_name }};
# SSL Configuration
ssl_certificate /etc/letsencrypt/live/{{ domain_name }}/fullchain.pem;
ssl_certificate_key /etc/letsencrypt/live/{{ domain_name }}/privkey.pem;
ssl_protocols TLSv1.2 TLSv1.3;
ssl_ciphers HIGH:!aNULL:!MD5;
ssl_prefer_server_ciphers on;
# Security headers
add_header X-Frame-Options "SAMEORIGIN" always;
add_header X-Content-Type-Options "nosniff" always;
add_header X-XSS-Protection "1; mode=block" always;
add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;
# Logging
access_log /var/log/nginx/{{ app_name }}_access.log;
error_log /var/log/nginx/{{ app_name }}_error.log;
# Root directory
root /var/www/{{ app_name }}/public;
index index.html;
# Gzip compression
gzip on;
gzip_vary on;
gzip_proxied any;
gzip_comp_level 6;
gzip_types text/plain text/css text/xml text/javascript application/json application/javascript application/xml+rss;
location / {
try_files $uri $uri/ /index.html;
}
location /api/ {
limit_req zone=api_limit burst=20 nodelay;
proxy_pass http://backend/;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection 'upgrade';
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_cache_bypass $http_upgrade;
proxy_connect_timeout 60s;
proxy_send_timeout 60s;
proxy_read_timeout 60s;
}
location /health {
access_log off;
return 200 "healthy\n";
add_header Content-Type text/plain;
}
# Static files caching
location ~* \.(jpg|jpeg|png|gif|ico|css|js|svg|woff|woff2)$ {
expires 1y;
add_header Cache-Control "public, immutable";
}
}
# ansible/roles/postgresql/tasks/main.yml
---
- name: Add PostgreSQL repository
apt_repository:
repo: deb http://apt.postgresql.org/pub/repos/apt/ {{ ansible_distribution_release }}-pgdg main
state: present
- name: Add PostgreSQL repository key
apt_key:
url: https://www.postgresql.org/media/keys/ACCC4CF8.asc
state: present
- name: Install PostgreSQL
apt:
name:
- postgresql-15
- postgresql-contrib-15
- python3-psycopg2
state: present
update_cache: yes
- name: Ensure PostgreSQL is running
service:
name: postgresql
state: started
enabled: yes
- name: Configure PostgreSQL
template:
src: postgresql.conf.j2
dest: /etc/postgresql/15/main/postgresql.conf
owner: postgres
group: postgres
mode: '0644'
notify: restart postgresql
- name: Configure pg_hba
template:
src: pg_hba.conf.j2
dest: /etc/postgresql/15/main/pg_hba.conf
owner: postgres
group: postgres
mode: '0640'
notify: restart postgresql
- name: Create application database
postgresql_db:
name: "{{ db_name }}"
encoding: UTF-8
lc_collate: en_US.UTF-8
lc_ctype: en_US.UTF-8
template: template0
become_user: postgres
- name: Create application user
postgresql_user:
name: "{{ db_user }}"
password: "{{ db_password }}"
db: "{{ db_name }}"
priv: ALL
become_user: postgres
- name: Configure replication (on replica)
include_tasks: replica.yml
when: postgresql_role == 'replica'
# ansible/roles/monitoring/tasks/main.yml
---
- name: Install Node Exporter
get_url:
url: https://github.com/prometheus/node_exporter/releases/download/v1.6.1/node_exporter-1.6.1.linux-amd64.tar.gz
dest: /tmp/node_exporter.tar.gz
- name: Extract Node Exporter
unarchive:
src: /tmp/node_exporter.tar.gz
dest: /tmp/
remote_src: yes
- name: Move Node Exporter binary
copy:
src: /tmp/node_exporter-1.6.1.linux-amd64/node_exporter
dest: /usr/local/bin/node_exporter
remote_src: yes
mode: '0755'
- name: Create node_exporter systemd service
template:
src: node_exporter.service.j2
dest: /etc/systemd/system/node_exporter.service
owner: root
group: root
mode: '0644'
notify: reload systemd
- name: Start and enable node_exporter
service:
name: node_exporter
state: started
enabled: yes
- name: Install Filebeat for log shipping
apt:
deb: https://artifacts.elastic.co/downloads/beats/filebeat/filebeat-8.10.0-amd64.deb
- name: Configure Filebeat
template:
src: filebeat.yml.j2
dest: /etc/filebeat/filebeat.yml
owner: root
group: root
mode: '0644'
notify: restart filebeat
- name: Start and enable Filebeat
service:
name: filebeat
state: started
enabled: yes
# ansible/deploy.sh
#!/bin/bash
set -e
echo "Starting deployment..."
# Check syntax
ansible-playbook -i inventory/production.yml playbook.yml --syntax-check
# Dry run
echo "Running dry-run..."
ansible-playbook -i inventory/production.yml playbook.yml --check
# Confirm
read -p "Continue with actual deployment? (y/n) " -n 1 -r
echo
if [[ ! $REPLY =~ ^[Yy]$ ]]
then
echo "Deployment cancelled"
exit 1
fi
# Deploy
ansible-playbook -i inventory/production.yml playbook.yml -v
echo "Deployment completed!"
16.5. Мониторинг и логирование
Prometheus и Grafana Setup
You: Создай полную систему мониторинга с Prometheus и Grafana
AI:# prometheus/prometheus.yml
global:
scrape_interval: 15s
evaluation_interval: 15s
external_labels:
cluster: 'production'
environment: 'prod'
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
- alertmanager:9093
# Load rules
rule_files:
- '/etc/prometheus/rules/*.yml'
# Scrape configurations
scrape_configs:
# Prometheus itself
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
# Node Exporter
- job_name: 'node'
static_configs:
- targets:
- 'node1:9100'
- 'node2:9100'
- 'node3:9100'
labels:
group: 'production'
# Application metrics
- job_name: 'application'
metrics_path: '/metrics'
static_configs:
- targets:
- 'backend:8000'
labels:
service: 'backend'
environment: 'production'
# PostgreSQL Exporter
- job_name: 'postgres'
static_configs:
- targets:
- 'postgres-exporter:9187'
labels:
database: 'main'
# Redis Exporter
- job_name: 'redis'
static_configs:
- targets:
- 'redis-exporter:9121'
# Nginx Exporter
- job_name: 'nginx'
static_configs:
- targets:
- 'nginx-exporter:9113'
# Kubernetes pods (if using K8s)
- job_name: 'kubernetes-pods'
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
target_label: __address__
# prometheus/rules/alerts.yml
groups:
- name: instance_alerts
interval: 30s
rules:
- alert: InstanceDown
expr: up == 0
for: 5m
labels:
severity: critical
annotations:
summary: "Instance {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."
- alert: HighCPUUsage
expr: 100 - (avg by(instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80
for: 10m
labels:
severity: warning
annotations:
summary: "High CPU usage on {{ $labels.instance }}"
description: "CPU usage is above 80% (current value: {{ $value }}%)"
- alert: HighMemoryUsage
expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 85
for: 10m
labels:
severity: warning
annotations:
summary: "High memory usage on {{ $labels.instance }}"
description: "Memory usage is above 85% (current value: {{ $value }}%)"
- alert: DiskSpaceLow
expr: (node_filesystem_avail_bytes / node_filesystem_size_bytes) * 100 < 15
for: 10m
labels:
severity: warning
annotations:
summary: "Low disk space on {{ $labels.instance }}"
description: "Disk space is below 15% (current value: {{ $value }}%)"
- name: application_alerts
interval: 30s
rules:
- alert: HighRequestLatency
expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) > 1
for: 10m
labels:
severity: warning
annotations:
summary: "High request latency"
description: "95th percentile latency is above 1s (current value: {{ $value }}s)"
- alert: HighErrorRate
expr: rate(http_requests_total{status=~"5.."}[5m]) / rate(http_requests_total[5m]) > 0.05
for: 5m
labels:
severity: critical
annotations:
summary: "High error rate"
description: "Error rate is above 5% (current value: {{ $value }}%)"
- alert: DatabaseConnectionPoolExhausted
expr: database_connections_active / database_connections_max > 0.9
for: 5m
labels:
severity: critical
annotations:
summary: "Database connection pool almost exhausted"
description: "Using {{ $value }}% of available connections"
# grafana/provisioning/datasources/prometheus.yml
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://prometheus:9090
isDefault: true
editable: false
jsonData:
timeInterval: "15s"
// grafana/provisioning/dashboards/application.json
{
"dashboard": {
"title": "Application Metrics",
"panels": [
{
"title": "Request Rate",
"type": "graph",
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 0},
"targets": [
{
"expr": "rate(http_requests_total[5m])",
"legendFormat": "{{method}} {{path}}"
}
]
},
{
"title": "Error Rate",
"type": "graph",
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 0},
"targets": [
{
"expr": "rate(http_requests_total{status=~\"5..\"}[5m])",
"legendFormat": "Errors"
}
]
},
{
"title": "Response Time (95th percentile)",
"type": "graph",
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 8},
"targets": [
{
"expr": "histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m]))",
"legendFormat": "p95"
},
{
"expr": "histogram_quantile(0.99, rate(http_request_duration_seconds_bucket[5m]))",
"legendFormat": "p99"
}
]
},
{
"title": "Active Connections",
"type": "graph",
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 8},
"targets": [
{
"expr": "sum(active_connections)",
"legendFormat": "Active"
}
]
}
]
}
}
# app/metrics.py
from prometheus_client import Counter, Histogram, Gauge, generate_latest, CONTENT_TYPE_LATEST
from functools import wraps
import time
# Metrics
REQUEST_COUNT = Counter(
'http_requests_total',
'Total HTTP requests',
['method', 'endpoint', 'status']
)
REQUEST_LATENCY = Histogram(
'http_request_duration_seconds',
'HTTP request latency',
['method', 'endpoint']
)
ACTIVE_REQUESTS = Gauge(
'http_requests_active',
'Number of active HTTP requests'
)
DATABASE_CONNECTIONS = Gauge(
'database_connections_active',
'Number of active database connections'
)
DATABASE_POOL_SIZE = Gauge(
'database_connections_max',
'Maximum database connection pool size'
)
def track_metrics(f):
"""Decorator to track request metrics"""
@wraps(f)
def decorated_function(*args, **kwargs):
start_time = time.time()
ACTIVE_REQUESTS.inc()
try:
response = f(*args, **kwargs)
status = response.status_code if hasattr(response, 'status_code') else 200
REQUEST_COUNT.labels(
method=request.method,
endpoint=request.endpoint,
status=status
).inc()
return response
except Exception as e:
REQUEST_COUNT.labels(
method=request.method,
endpoint=request.endpoint,
status=500
).inc()
raise
finally:
REQUEST_LATENCY.labels(
method=request.method,
endpoint=request.endpoint
).observe(time.time() - start_time)
ACTIVE_REQUESTS.dec()
return decorated_function
def metrics_endpoint():
"""Prometheus metrics endpoint"""
return generate_latest(), 200, {'Content-Type': CONTENT_TYPE_LATEST}
# Usage in Flask
from flask import Flask, request
app = Flask(__name__)
@app.route('/metrics')
def metrics():
return metrics_endpoint()
@app.route('/api/users')
@track_metrics
def get_users():
# Your logic here
return {'users': []}