DevOps и инфраструктура в Cursor | Глава 16

Назад в Docs

Cursor Rules: Настройка поведения AI | Глава 9

Основы работы с Cursor | Глава 3

18 октября 2025

698

Глава 16. DevOps и инфраструктура в Cursor

16.1. Docker и контейнеризация

Создание Docker контейнеров с Cursor

You: Создай полный Docker setup для веб-приложения

AI:

# Dockerfile для Node.js приложения

# Multi-stage build для оптимизации размера



# Stage 1: Build

FROM node:18-alpine AS builder



# Set working directory

WORKDIR /app



# Copy package files

COPY package*.json ./



# Install dependencies

RUN npm ci --only=production && \

    npm cache clean --force



# Copy source code

COPY . .



# Build application (если используется TypeScript или сборка)

RUN npm run build



# Stage 2: Production

FROM node:18-alpine



# Install dumb-init for proper signal handling

RUN apk add --no-cache dumb-init



# Create app user

RUN addgroup -g 1001 -S nodejs && \

    adduser -S nodejs -u 1001



# Set working directory

WORKDIR /app



# Copy built application from builder

COPY --from=builder --chown=nodejs:nodejs /app/dist ./dist

COPY --from=builder --chown=nodejs:nodejs /app/node_modules ./node_modules

COPY --from=builder --chown=nodejs:nodejs /app/package*.json ./



# Switch to non-root user

USER nodejs



# Expose port

EXPOSE 3000



# Health check

HEALTHCHECK --interval=30s --timeout=3s --start-period=40s --retries=3 \

    CMD node -e "require('http').get('http://localhost:3000/health', (r) => {process.exit(r.statusCode === 200 ? 0 : 1)})"



# Use dumb-init to handle signals properly

ENTRYPOINT ["dumb-init", "--"]



# Start application

CMD ["node", "dist/server.js"]

# Dockerfile для Python приложения

FROM python:3.11-slim



# Set environment variables

ENV PYTHONUNBUFFERED=1 \

    PYTHONDONTWRITEBYTECODE=1 \

    PIP_NO_CACHE_DIR=1 \

    PIP_DISABLE_PIP_VERSION_CHECK=1



# Install system dependencies

RUN apt-get update && \

    apt-get install -y --no-install-recommends \

        gcc \

        postgresql-client && \

    apt-get clean && \

    rm -rf /var/lib/apt/lists/*



# Create app user

RUN useradd -m -u 1000 appuser



# Set working directory

WORKDIR /app



# Copy requirements

COPY requirements.txt .



# Install Python dependencies

RUN pip install --no-cache-dir -r requirements.txt



# Copy application

COPY --chown=appuser:appuser . .



# Switch to non-root user

USER appuser



# Expose port

EXPOSE 8000



# Health check

HEALTHCHECK --interval=30s --timeout=3s --start-period=40s --retries=3 \

    CMD python -c "import requests; requests.get('http://localhost:8000/health')"



# Run application

CMD ["gunicorn", "--bind", "0.0.0.0:8000", "--workers", "4", "app:app"]

# docker-compose.yml

# Complete application stack



version: '3.9'



services:

  # Frontend

  frontend:

    build:

      context: ./frontend

      dockerfile: Dockerfile

    ports:

      - "3000:3000"

    environment:

      - NODE_ENV=production

      - API_URL=http://backend:8000

    depends_on:

      - backend

    networks:

      - app-network

    restart: unless-stopped

    healthcheck:

      test: ["CMD", "curl", "-f", "http://localhost:3000/health"]

      interval: 30s

      timeout: 10s

      retries: 3

      start_period: 40s



  # Backend API

  backend:

    build:

      context: ./backend

      dockerfile: Dockerfile

    ports:

      - "8000:8000"

    environment:

      - DATABASE_URL=postgresql://user:password@postgres:5432/appdb

      - REDIS_URL=redis://redis:6379/0

      - SECRET_KEY=${SECRET_KEY}

    depends_on:

      postgres:

        condition: service_healthy

      redis:

        condition: service_healthy

    networks:

      - app-network

    volumes:

      - ./backend/logs:/app/logs

    restart: unless-stopped



  # PostgreSQL Database

  postgres:

    image: postgres:15-alpine

    environment:

      - POSTGRES_USER=user

      - POSTGRES_PASSWORD=password

      - POSTGRES_DB=appdb

    ports:

      - "5432:5432"

    volumes:

      - postgres-data:/var/lib/postgresql/data

      - ./init-scripts:/docker-entrypoint-initdb.d

    networks:

      - app-network

    restart: unless-stopped

    healthcheck:

      test: ["CMD-SHELL", "pg_isready -U user"]

      interval: 10s

      timeout: 5s

      retries: 5



  # Redis Cache

  redis:

    image: redis:7-alpine

    ports:

      - "6379:6379"

    volumes:

      - redis-data:/data

    networks:

      - app-network

    restart: unless-stopped

    command: redis-server --appendonly yes --requirepass ${REDIS_PASSWORD}

    healthcheck:

      test: ["CMD", "redis-cli", "ping"]

      interval: 10s

      timeout: 3s

      retries: 5



  # Nginx Reverse Proxy

  nginx:

    image: nginx:alpine

    ports:

      - "80:80"

      - "443:443"

    volumes:

      - ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro

      - ./nginx/ssl:/etc/nginx/ssl:ro

      - ./nginx/logs:/var/log/nginx

    depends_on:

      - frontend

      - backend

    networks:

      - app-network

    restart: unless-stopped



  # Monitoring - Prometheus

  prometheus:

    image: prom/prometheus:latest

    ports:

      - "9090:9090"

    volumes:

      - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro

      - prometheus-data:/prometheus

    networks:

      - app-network

    command:

      - '--config.file=/etc/prometheus/prometheus.yml'

      - '--storage.tsdb.path=/prometheus'

    restart: unless-stopped



  # Monitoring - Grafana

  grafana:

    image: grafana/grafana:latest

    ports:

      - "3001:3000"

    environment:

      - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD}

      - GF_INSTALL_PLUGINS=grafana-piechart-panel

    volumes:

      - grafana-data:/var/lib/grafana

      - ./grafana/dashboards:/etc/grafana/provisioning/dashboards:ro

      - ./grafana/datasources:/etc/grafana/provisioning/datasources:ro

    depends_on:

      - prometheus

    networks:

      - app-network

    restart: unless-stopped



networks:

  app-network:

    driver: bridge



volumes:

  postgres-data:

  redis-data:

  prometheus-data:

  grafana-data:

# nginx/nginx.conf

user nginx;

worker_processes auto;

error_log /var/log/nginx/error.log warn;

pid /var/run/nginx.pid;



events {

    worker_connections 1024;

}



http {

    include /etc/nginx/mime.types;

    default_type application/octet-stream;



    log_format main '$remote_addr - $remote_user [$time_local] "$request" '

                    '$status $body_bytes_sent "$http_referer" '

                    '"$http_user_agent" "$http_x_forwarded_for"';



    access_log /var/log/nginx/access.log main;



    sendfile on;

    tcp_nopush on;

    tcp_nodelay on;

    keepalive_timeout 65;

    types_hash_max_size 2048;



    # Gzip compression

    gzip on;

    gzip_vary on;

    gzip_proxied any;

    gzip_comp_level 6;

    gzip_types text/plain text/css text/xml text/javascript 

               application/json application/javascript application/xml+rss;



    # Rate limiting

    limit_req_zone $binary_remote_addr zone=api_limit:10m rate=10r/s;



    # Upstream backends

    upstream frontend {

        least_conn;

        server frontend:3000 max_fails=3 fail_timeout=30s;

    }



    upstream backend {

        least_conn;

        server backend:8000 max_fails=3 fail_timeout=30s;

    }



    # HTTP server

    server {

        listen 80;

        server_name example.com www.example.com;



        # Redirect to HTTPS

        return 301 https://$server_name$request_uri;

    }



    # HTTPS server

    server {

        listen 443 ssl http2;

        server_name example.com www.example.com;



        # SSL configuration

        ssl_certificate /etc/nginx/ssl/cert.pem;

        ssl_certificate_key /etc/nginx/ssl/key.pem;

        ssl_protocols TLSv1.2 TLSv1.3;

        ssl_ciphers HIGH:!aNULL:!MD5;

        ssl_prefer_server_ciphers on;



        # Security headers

        add_header X-Frame-Options "SAMEORIGIN" always;

        add_header X-Content-Type-Options "nosniff" always;

        add_header X-XSS-Protection "1; mode=block" always;

        add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;



        # Frontend

        location / {

            proxy_pass http://frontend;

            proxy_http_version 1.1;

            proxy_set_header Upgrade $http_upgrade;

            proxy_set_header Connection 'upgrade';

            proxy_set_header Host $host;

            proxy_set_header X-Real-IP $remote_addr;

            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;

            proxy_set_header X-Forwarded-Proto $scheme;

            proxy_cache_bypass $http_upgrade;

        }



        # Backend API

        location /api/ {

            limit_req zone=api_limit burst=20 nodelay;

            

            proxy_pass http://backend/;

            proxy_http_version 1.1;

            proxy_set_header Host $host;

            proxy_set_header X-Real-IP $remote_addr;

            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;

            proxy_set_header X-Forwarded-Proto $scheme;

            

            # Timeouts

            proxy_connect_timeout 60s;

            proxy_send_timeout 60s;

            proxy_read_timeout 60s;

        }



        # Health check endpoint

        location /health {

            access_log off;

            return 200 "healthy\n";

            add_header Content-Type text/plain;

        }



        # Static files with caching

        location ~* \.(jpg|jpeg|png|gif|ico|css|js|svg|woff|woff2)$ {

            expires 1y;

            add_header Cache-Control "public, immutable";

            proxy_pass http://frontend;

        }

    }

}

# deploy.sh

#!/bin/bash



set -e



echo "================================"

echo "Deployment Script"

echo "================================"



# Load environment variables

if [ -f .env ]; then

    export $(cat .env | grep -v '^#' | xargs)

fi



# Build images

echo "Building Docker images..."

docker-compose build --no-cache



# Run database migrations

echo "Running database migrations..."

docker-compose run --rm backend python manage.py migrate



# Start services

echo "Starting services..."

docker-compose up -d



# Wait for services to be healthy

echo "Waiting for services to be ready..."

sleep 10



# Check health

echo "Checking service health..."

docker-compose ps



# Run health checks

for service in frontend backend postgres redis; do

    echo "Checking $service..."

    if docker-compose exec -T $service echo "OK" > /dev/null 2>&1; then

        echo "✓ $service is running"

    else

        echo "✗ $service failed to start"

        exit 1

    fi

done



echo "================================"

echo "Deployment completed successfully!"

echo "================================"



# Show logs

docker-compose logs --tail=50

# Makefile

.PHONY: help build start stop restart logs clean test



help:

	@echo "Available commands:"

	@echo "  make build    - Build Docker images"

	@echo "  make start    - Start all services"

	@echo "  make stop     - Stop all services"

	@echo "  make restart  - Restart all services"

	@echo "  make logs     - Show logs"

	@echo "  make clean    - Remove containers and volumes"

	@echo "  make test     - Run tests"



build:

	docker-compose build



start:

	docker-compose up -d

	@echo "Services started. Access at http://localhost"



stop:

	docker-compose down



restart: stop start



logs:

	docker-compose logs -f



clean:

	docker-compose down -v

	docker system prune -f



test:

	docker-compose run --rm backend pytest

	docker-compose run --rm frontend npm test

16.2. Kubernetes и оркестрация

You: Создай Kubernetes манифесты для деплоя приложения

AI:

# kubernetes/namespace.yaml

apiVersion: v1

kind: Namespace

metadata:

  name: production

  labels:

    name: production

    environment: production

# kubernetes/deployment-backend.yaml

apiVersion: apps/v1

kind: Deployment

metadata:

  name: backend

  namespace: production

  labels:

    app: backend

    tier: api

spec:

  replicas: 3

  strategy:

    type: RollingUpdate

    rollingUpdate:

      maxSurge: 1

      maxUnavailable: 0

  selector:

    matchLabels:

      app: backend

  template:

    metadata:

      labels:

        app: backend

        tier: api

      annotations:

        prometheus.io/scrape: "true"

        prometheus.io/port: "8000"

        prometheus.io/path: "/metrics"

    spec:

      serviceAccountName: backend-sa

      securityContext:

        runAsNonRoot: true

        runAsUser: 1000

        fsGroup: 1000

      

      containers:

      - name: backend

        image: myregistry/backend:latest

        imagePullPolicy: Always

        ports:

        - containerPort: 8000

          name: http

          protocol: TCP

        

        env:

        - name: DATABASE_URL

          valueFrom:

            secretKeyRef:

              name: database-secret

              key: url

        - name: REDIS_URL

          valueFrom:

            configMapKeyRef:

              name: app-config

              key: redis_url

        - name: LOG_LEVEL

          value: "INFO"

        

        resources:

          requests:

            cpu: "100m"

            memory: "256Mi"

          limits:

            cpu: "500m"

            memory: "512Mi"

        

        livenessProbe:

          httpGet:

            path: /health

            port: 8000

          initialDelaySeconds: 30

          periodSeconds: 10

          timeoutSeconds: 5

          failureThreshold: 3

        

        readinessProbe:

          httpGet:

            path: /ready

            port: 8000

          initialDelaySeconds: 10

          periodSeconds: 5

          timeoutSeconds: 3

          failureThreshold: 3

        

        volumeMounts:

        - name: config

          mountPath: /app/config

          readOnly: true

        - name: logs

          mountPath: /app/logs

      

      volumes:

      - name: config

        configMap:

          name: app-config

      - name: logs

        emptyDir: {}

# kubernetes/service.yaml

apiVersion: v1

kind: Service

metadata:

  name: backend-service

  namespace: production

  labels:

    app: backend

spec:

  type: ClusterIP

  ports:

  - port: 80

    targetPort: 8000

    protocol: TCP

    name: http

  selector:

    app: backend

---

apiVersion: v1

kind: Service

metadata:

  name: frontend-service

  namespace: production

spec:

  type: LoadBalancer

  ports:

  - port: 80

    targetPort: 3000

    protocol: TCP

  selector:

    app: frontend

# kubernetes/ingress.yaml

apiVersion: networking.k8s.io/v1

kind: Ingress

metadata:

  name: app-ingress

  namespace: production

  annotations:

    kubernetes.io/ingress.class: "nginx"

    cert-manager.io/cluster-issuer: "letsencrypt-prod"

    nginx.ingress.kubernetes.io/rate-limit: "100"

    nginx.ingress.kubernetes.io/ssl-redirect: "true"

spec:

  tls:

  - hosts:

    - app.example.com

    - api.example.com

    secretName: app-tls-secret

  

  rules:

  - host: app.example.com

    http:

      paths:

      - path: /

        pathType: Prefix

        backend:

          service:

            name: frontend-service

            port:

              number: 80

  

  - host: api.example.com

    http:

      paths:

      - path: /

        pathType: Prefix

        backend:

          service:

            name: backend-service

            port:

              number: 80

# kubernetes/hpa.yaml

apiVersion: autoscaling/v2

kind: HorizontalPodAutoscaler

metadata:

  name: backend-hpa

  namespace: production

spec:

  scaleTargetRef:

    apiVersion: apps/v1

    kind: Deployment

    name: backend

  minReplicas: 3

  maxReplicas: 10

  metrics:

  - type: Resource

    resource:

      name: cpu

      target:

        type: Utilization

        averageUtilization: 70

  - type: Resource

    resource:

      name: memory

      target:

        type: Utilization

        averageUtilization: 80

  behavior:

    scaleDown:

      stabilizationWindowSeconds: 300

      policies:

      - type: Percent

        value: 50

        periodSeconds: 60

    scaleUp:

      stabilizationWindowSeconds: 0

      policies:

      - type: Percent

        value: 100

        periodSeconds: 15

      - type: Pods

        value: 4

        periodSeconds: 15

      selectPolicy: Max

# kubernetes/configmap.yaml

apiVersion: v1

kind: ConfigMap

metadata:

  name: app-config

  namespace: production

data:

  redis_url: "redis://redis-service:6379"

  log_level: "INFO"

  max_connections: "100"

  app_config.yaml: |

    server:

      port: 8000

      host: 0.0.0.0

    database:

      pool_size: 20

      max_overflow: 10

    cache:

      ttl: 3600

# kubernetes/secret.yaml

apiVersion: v1

kind: Secret

metadata:

  name: database-secret

  namespace: production

type: Opaque

stringData:

  url: "postgresql://user:password@postgres:5432/appdb"

  password: "secure-password-here"

# kubernetes/statefulset-postgres.yaml

apiVersion: apps/v1

kind: StatefulSet

metadata:

  name: postgres

  namespace: production

spec:

  serviceName: postgres-service

  replicas: 1

  selector:

    matchLabels:

      app: postgres

  template:

    metadata:

      labels:

        app: postgres

    spec:

      containers:

      - name: postgres

        image: postgres:15

        ports:

        - containerPort: 5432

          name: postgres

        env:

        - name: POSTGRES_DB

          value: "appdb"

        - name: POSTGRES_USER

          valueFrom:

            secretKeyRef:

              name: database-secret

              key: user

        - name: POSTGRES_PASSWORD

          valueFrom:

            secretKeyRef:

              name: database-secret

              key: password

        - name: PGDATA

          value: /var/lib/postgresql/data/pgdata

        volumeMounts:

        - name: postgres-storage

          mountPath: /var/lib/postgresql/data

        resources:

          requests:

            cpu: "500m"

            memory: "1Gi"

          limits:

            cpu: "1000m"

            memory: "2Gi"

  volumeClaimTemplates:

  - metadata:

      name: postgres-storage

    spec:

      accessModes: ["ReadWriteOnce"]

      storageClassName: "fast-ssd"

      resources:

        requests:

          storage: 10Gi

# deploy-k8s.sh

#!/bin/bash



set -e



echo "Deploying to Kubernetes..."



# Apply namespace

kubectl apply -f kubernetes/namespace.yaml



# Apply ConfigMaps and Secrets

kubectl apply -f kubernetes/configmap.yaml

kubectl apply -f kubernetes/secret.yaml



# Apply StatefulSets

kubectl apply -f kubernetes/statefulset-postgres.yaml



# Wait for StatefulSets to be ready

kubectl wait --for=condition=ready pod -l app=postgres -n production --timeout=300s



# Apply Deployments

kubectl apply -f kubernetes/deployment-backend.yaml

kubectl apply -f kubernetes/deployment-frontend.yaml



# Apply Services

kubectl apply -f kubernetes/service.yaml



# Apply Ingress

kubectl apply -f kubernetes/ingress.yaml



# Apply HPA

kubectl apply -f kubernetes/hpa.yaml



# Wait for deployments

kubectl rollout status deployment/backend -n production

kubectl rollout status deployment/frontend -n production



echo "Deployment completed!"

echo "Checking pod status..."

kubectl get pods -n production



echo "Checking services..."

kubectl get svc -n production



echo "Checking ingress..."

kubectl get ingress -n production

16.3. Infrastructure as Code

Terraform для облачной инфраструктуры

You: Создай Terraform конфигурацию для AWS инфраструктуры

AI:

# terraform/main.tf

terraform {

  required_version = ">= 1.0"

  

  required_providers {

    aws = {

      source  = "hashicorp/aws"

      version = "~> 5.0"

    }

  }

  

  backend "s3" {

    bucket         = "my-terraform-state"

    key            = "production/terraform.tfstate"

    region         = "us-east-1"

    encrypt        = true

    dynamodb_table = "terraform-locks"

  }

}



provider "aws" {

  region = var.aws_region

  

  default_tags {

    tags = {

      Environment = var.environment

      Project     = var.project_name

      ManagedBy   = "Terraform"

    }

  }

}



# terraform/variables.tf

variable "aws_region" {

  description = "AWS region"

  type        = string

  default     = "us-east-1"

}



variable "environment" {

  description = "Environment name"

  type        = string

  default     = "production"

}



variable "project_name" {

  description = "Project name"

  type        = string

  default     = "my-app"

}



variable "vpc_cidr" {

  description = "VPC CIDR block"

  type        = string

  default     = "10.0.0.0/16"

}



variable "availability_zones" {

  description = "Availability zones"

  type        = list(string)

  default     = ["us-east-1a", "us-east-1b", "us-east-1c"]

}



variable "db_instance_class" {

  description = "RDS instance class"

  type        = string

  default     = "db.t3.medium"

}



variable "db_password" {

  description = "Database password"

  type        = string

  sensitive   = true

}



# terraform/vpc.tf

module "vpc" {

  source = "terraform-aws-modules/vpc/aws"

  version = "~> 5.0"



  name = "${var.project_name}-${var.environment}-vpc"

  cidr = var.vpc_cidr



  azs             = var.availability_zones

  private_subnets = ["10.0.1.0/24", "10.0.2.0/24", "10.0.3.0/24"]

  public_subnets  = ["10.0.101.0/24", "10.0.102.0/24", "10.0.103.0/24"]

  database_subnets = ["10.0.201.0/24", "10.0.202.0/24", "10.0.203.0/24"]



  enable_nat_gateway   = true

  single_nat_gateway   = false

  enable_dns_hostnames = true

  enable_dns_support   = true



  enable_vpn_gateway = false



  tags = {

    Name = "${var.project_name}-${var.environment}-vpc"

  }

}



# terraform/security-groups.tf

resource "aws_security_group" "alb" {

  name_prefix = "${var.project_name}-alb-"

  description = "Security group for Application Load Balancer"

  vpc_id      = module.vpc.vpc_id



  ingress {

    from_port   = 80

    to_port     = 80

    protocol    = "tcp"

    cidr_blocks = ["0.0.0.0/0"]

    description = "HTTP from anywhere"

  }



  ingress {

    from_port   = 443

    to_port     = 443

    protocol    = "tcp"

    cidr_blocks = ["0.0.0.0/0"]

    description = "HTTPS from anywhere"

  }



  egress {

    from_port   = 0

    to_port     = 0

    protocol    = "-1"

    cidr_blocks = ["0.0.0.0/0"]

    description = "Allow all outbound traffic"

  }



  tags = {

    Name = "${var.project_name}-alb-sg"

  }

}



resource "aws_security_group" "ecs_tasks" {

  name_prefix = "${var.project_name}-ecs-tasks-"

  description = "Security group for ECS tasks"

  vpc_id      = module.vpc.vpc_id



  ingress {

    from_port       = 8000

    to_port         = 8000

    protocol        = "tcp"

    security_groups = [aws_security_group.alb.id]

    description     = "Allow traffic from ALB"

  }



  egress {

    from_port   = 0

    to_port     = 0

    protocol    = "-1"

    cidr_blocks = ["0.0.0.0/0"]

    description = "Allow all outbound traffic"

  }



  tags = {

    Name = "${var.project_name}-ecs-tasks-sg"

  }

}



resource "aws_security_group" "rds" {

  name_prefix = "${var.project_name}-rds-"

  description = "Security group for RDS instance"

  vpc_id      = module.vpc.vpc_id



  ingress {

    from_port       = 5432

    to_port         = 5432

    protocol        = "tcp"

    security_groups = [aws_security_group.ecs_tasks.id]

    description     = "PostgreSQL from ECS tasks"

  }



  egress {

    from_port   = 0

    to_port     = 0

    protocol    = "-1"

    cidr_blocks = ["0.0.0.0/0"]

  }



  tags = {

    Name = "${var.project_name}-rds-sg"

  }

}



# terraform/rds.tf

resource "aws_db_subnet_group" "main" {

  name       = "${var.project_name}-${var.environment}-db-subnet"

  subnet_ids = module.vpc.database_subnets



  tags = {

    Name = "${var.project_name}-db-subnet-group"

  }

}



resource "aws_db_instance" "main" {

  identifier     = "${var.project_name}-${var.environment}-db"

  engine         = "postgres"

  engine_version = "15.3"

  instance_class = var.db_instance_class



  allocated_storage     = 100

  max_allocated_storage = 1000

  storage_type          = "gp3"

  storage_encrypted     = true



  db_name  = "appdb"

  username = "dbadmin"

  password = var.db_password



  multi_az               = true

  db_subnet_group_name   = aws_db_subnet_group.main.name

  vpc_security_group_ids = [aws_security_group.rds.id]



  backup_retention_period = 7

  backup_window          = "03:00-04:00"

  maintenance_window     = "Mon:04:00-Mon:05:00"



  enabled_cloudwatch_logs_exports = ["postgresql", "upgrade"]

  

  performance_insights_enabled = true

  monitoring_interval         = 60

  monitoring_role_arn        = aws_iam_role.rds_monitoring.arn



  deletion_protection = true

  skip_final_snapshot = false

  final_snapshot_identifier = "${var.project_name}-final-snapshot-${formatdate("YYYY-MM-DD-hhmm", timestamp())}"



  tags = {

    Name = "${var.project_name}-database"

  }

}



# terraform/ecs.tf

resource "aws_ecs_cluster" "main" {

  name = "${var.project_name}-${var.environment}-cluster"



  setting {

    name  = "containerInsights"

    value = "enabled"

  }



  tags = {

    Name = "${var.project_name}-ecs-cluster"

  }

}



resource "aws_ecs_cluster_capacity_providers" "main" {

  cluster_name = aws_ecs_cluster.main.name



  capacity_providers = ["FARGATE", "FARGATE_SPOT"]



  default_capacity_provider_strategy {

    base              = 1

    weight            = 100

    capacity_provider = "FARGATE"

  }

}



resource "aws_ecs_task_definition" "backend" {

  family                   = "${var.project_name}-backend"

  network_mode             = "awsvpc"

  requires_compatibilities = ["FARGATE"]

  cpu                      = "512"

  memory                   = "1024"

  execution_role_arn       = aws_iam_role.ecs_execution.arn

  task_role_arn           = aws_iam_role.ecs_task.arn



  container_definitions = jsonencode([

    {

      name  = "backend"

      image = "${aws_ecr_repository.backend.repository_url}:latest"

      

      portMappings = [

        {

          containerPort = 8000

          protocol      = "tcp"

        }

      ]



      environment = [

        {

          name  = "ENVIRONMENT"

          value = var.environment

        },

        {

          name  = "DATABASE_HOST"

          value = aws_db_instance.main.address

        }

      ]



      secrets = [

        {

          name      = "DATABASE_PASSWORD"

          valueFrom = aws_secretsmanager_secret.db_password.arn

        }

      ]



      logConfiguration = {

        logDriver = "awslogs"

        options = {

          "awslogs-group"         = aws_cloudwatch_log_group.backend.name

          "awslogs-region"        = var.aws_region

          "awslogs-stream-prefix" = "backend"

        }

      }



      healthCheck = {

        command     = ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"]

        interval    = 30

        timeout     = 5

        retries     = 3

        startPeriod = 60

      }

    }

  ])



  tags = {

    Name = "${var.project_name}-backend-task"

  }

}



resource "aws_ecs_service" "backend" {

  name            = "${var.project_name}-backend-service"

  cluster         = aws_ecs_cluster.main.id

  task_definition = aws_ecs_task_definition.backend.arn

  desired_count   = 3

  launch_type     = "FARGATE"



  network_configuration {

    subnets          = module.vpc.private_subnets

    security_groups  = [aws_security_group.ecs_tasks.id]

    assign_public_ip = false

  }



  load_balancer {

    target_group_arn = aws_lb_target_group.backend.arn

    container_name   = "backend"

    container_port   = 8000

  }



  deployment_configuration {

    maximum_percent         = 200

    minimum_healthy_percent = 100

  }



  deployment_circuit_breaker {

    enable   = true

    rollback = true

  }



  tags = {

    Name = "${var.project_name}-backend-service"

  }

}



# terraform/alb.tf

resource "aws_lb" "main" {

  name               = "${var.project_name}-${var.environment}-alb"

  internal           = false

  load_balancer_type = "application"

  security_groups    = [aws_security_group.alb.id]

  subnets           = module.vpc.public_subnets



  enable_deletion_protection = true

  enable_http2              = true

  enable_cross_zone_load_balancing = true



  access_logs {

    bucket  = aws_s3_bucket.alb_logs.id

    enabled = true

  }



  tags = {

    Name = "${var.project_name}-alb"

  }

}



resource "aws_lb_target_group" "backend" {

  name        = "${var.project_name}-backend-tg"

  port        = 8000

  protocol    = "HTTP"

  vpc_id      = module.vpc.vpc_id

  target_type = "ip"



  health_check {

    enabled             = true

    healthy_threshold   = 2

    interval            = 30

    matcher             = "200"

    path                = "/health"

    port                = "traffic-port"

    protocol            = "HTTP"

    timeout             = 5

    unhealthy_threshold = 3

  }



  deregistration_delay = 30



  tags = {

    Name = "${var.project_name}-backend-tg"

  }

}



resource "aws_lb_listener" "https" {

  load_balancer_arn = aws_lb.main.arn

  port              = "443"

  protocol          = "HTTPS"

  ssl_policy        = "ELBSecurityPolicy-TLS-1-2-2017-01"

  certificate_arn   = aws_acm_certificate.main.arn



  default_action {

    type             = "forward"

    target_group_arn = aws_lb_target_group.backend.arn

  }

}



resource "aws_lb_listener" "http" {

  load_balancer_arn = aws_lb.main.arn

  port              = "80"

  protocol          = "HTTP"



  default_action {

    type = "redirect"



    redirect {

      port        = "443"

      protocol    = "HTTPS"

      status_code = "HTTP_301"

    }

  }

}



# terraform/autoscaling.tf

resource "aws_appautoscaling_target" "ecs_target" {

  max_capacity       = 10

  min_capacity       = 3

  resource_id        = "service/${aws_ecs_cluster.main.name}/${aws_ecs_service.backend.name}"

  scalable_dimension = "ecs:service:DesiredCount"

  service_namespace  = "ecs"

}



resource "aws_appautoscaling_policy" "ecs_cpu" {

  name               = "${var.project_name}-cpu-scaling"

  policy_type        = "TargetTrackingScaling"

  resource_id        = aws_appautoscaling_target.ecs_target.resource_id

  scalable_dimension = aws_appautoscaling_target.ecs_target.scalable_dimension

  service_namespace  = aws_appautoscaling_target.ecs_target.service_namespace



  target_tracking_scaling_policy_configuration {

    predefined_metric_specification {

      predefined_metric_type = "ECSServiceAverageCPUUtilization"

    }

    target_value       = 70.0

    scale_in_cooldown  = 300

    scale_out_cooldown = 60

  }

}



resource "aws_appautoscaling_policy" "ecs_memory" {

  name               = "${var.project_name}-memory-scaling"

  policy_type        = "TargetTrackingScaling"

  resource_id        = aws_appautoscaling_target.ecs_target.resource_id

  scalable_dimension = aws_appautoscaling_target.ecs_target.scalable_dimension

  service_namespace  = aws_appautoscaling_target.ecs_target.service_namespace



  target_tracking_scaling_policy_configuration {

    predefined_metric_specification {

      predefined_metric_type = "ECSServiceAverageMemoryUtilization"

    }

    target_value       = 80.0

    scale_in_cooldown  = 300

    scale_out_cooldown = 60

  }

}



# terraform/cloudwatch.tf

resource "aws_cloudwatch_log_group" "backend" {

  name              = "/ecs/${var.project_name}/backend"

  retention_in_days = 30



  tags = {

    Name = "${var.project_name}-backend-logs"

  }

}



resource "aws_cloudwatch_metric_alarm" "ecs_cpu_high" {

  alarm_name          = "${var.project_name}-ecs-cpu-high"

  comparison_operator = "GreaterThanThreshold"

  evaluation_periods  = "2"

  metric_name         = "CPUUtilization"

  namespace           = "AWS/ECS"

  period              = "300"

  statistic           = "Average"

  threshold           = "80"

  alarm_description   = "This metric monitors ECS CPU utilization"

  alarm_actions       = [aws_sns_topic.alerts.arn]



  dimensions = {

    ClusterName = aws_ecs_cluster.main.name

    ServiceName = aws_ecs_service.backend.name

  }

}



resource "aws_cloudwatch_metric_alarm" "rds_cpu_high" {

  alarm_name          = "${var.project_name}-rds-cpu-high"

  comparison_operator = "GreaterThanThreshold"

  evaluation_periods  = "2"

  metric_name         = "CPUUtilization"

  namespace           = "AWS/RDS"

  period              = "300"

  statistic           = "Average"

  threshold           = "80"

  alarm_description   = "RDS CPU utilization is too high"

  alarm_actions       = [aws_sns_topic.alerts.arn]



  dimensions = {

    DBInstanceIdentifier = aws_db_instance.main.id

  }

}



# terraform/outputs.tf

output "alb_dns_name" {

  description = "DNS name of the load balancer"

  value       = aws_lb.main.dns_name

}



output "ecs_cluster_name" {

  description = "Name of the ECS cluster"

  value       = aws_ecs_cluster.main.name

}



output "rds_endpoint" {

  description = "RDS instance endpoint"

  value       = aws_db_instance.main.endpoint

  sensitive   = true

}



output "ecr_repository_url" {

  description = "URL of the ECR repository"

  value       = aws_ecr_repository.backend.repository_url

}

16.4. Скрипты автоматизации

Ansible для конфигурации серверов

You: Создай Ansible playbooks для настройки серверов

AI:

# ansible/inventory/production.yml

all:

  children:

    webservers:

      hosts:

        web1:

          ansible_host: 10.0.1.10

          ansible_user: ubuntu

        web2:

          ansible_host: 10.0.1.11

          ansible_user: ubuntu

    

    databases:

      hosts:

        db1:

          ansible_host: 10.0.2.10

          ansible_user: ubuntu

          postgresql_role: primary

        db2:

          ansible_host: 10.0.2.11

          ansible_user: ubuntu

          postgresql_role: replica

    

    loadbalancers:

      hosts:

        lb1:

          ansible_host: 10.0.3.10

          ansible_user: ubuntu



  vars:

    ansible_python_interpreter: /usr/bin/python3

    app_name: myapp

    app_version: "1.0.0"

    environment: production

# ansible/playbook.yml

---

- name: Configure All Servers

  hosts: all

  become: yes

  

  roles:

    - common

    - security

    - monitoring



- name: Configure Web Servers

  hosts: webservers

  become: yes

  

  roles:

    - nginx

    - application

    - ssl



- name: Configure Database Servers

  hosts: databases

  become: yes

  

  roles:

    - postgresql

    - backup



- name: Configure Load Balancers

  hosts: loadbalancers

  become: yes

  

  roles:

    - haproxy

# ansible/roles/common/tasks/main.yml

---

- name: Update apt cache

  apt:

    update_cache: yes

    cache_valid_time: 3600



- name: Install common packages

  apt:

    name:

      - curl

      - wget

      - git

      - vim

      - htop

      - net-tools

      - ufw

      - fail2ban

      - unattended-upgrades

    state: present



- name: Set timezone

  timezone:

    name: UTC



- name: Configure NTP

  apt:

    name: chrony

    state: present

  notify: restart chrony



- name: Enable NTP service

  service:

    name: chrony

    state: started

    enabled: yes



- name: Set hostname

  hostname:

    name: "{{ inventory_hostname }}"



- name: Configure /etc/hosts

  template:

    src: hosts.j2

    dest: /etc/hosts

    owner: root

    group: root

    mode: '0644'



- name: Create app user

  user:

    name: "{{ app_user }}"

    shell: /bin/bash

    create_home: yes

    groups: sudo

    append: yes



- name: Configure sudo for app user

  template:

    src: sudoers.j2

    dest: /etc/sudoers.d/{{ app_user }}

    validate: 'visudo -cf %s'

    mode: '0440'

# ansible/roles/security/tasks/main.yml

---

- name: Configure UFW defaults

  ufw:

    direction: "{{ item.direction }}"

    policy: "{{ item.policy }}"

  loop:

    - { direction: 'incoming', policy: 'deny' }

    - { direction: 'outgoing', policy: 'allow' }



- name: Allow SSH

  ufw:

    rule: allow

    port: '22'

    proto: tcp



- name: Allow HTTP

  ufw:

    rule: allow

    port: '80'

    proto: tcp

  when: "'webservers' in group_names"



- name: Allow HTTPS

  ufw:

    rule: allow

    port: '443'

    proto: tcp

  when: "'webservers' in group_names"



- name: Enable UFW

  ufw:

    state: enabled



- name: Configure fail2ban

  template:

    src: jail.local.j2

    dest: /etc/fail2ban/jail.local

    owner: root

    group: root

    mode: '0644'

  notify: restart fail2ban



- name: Disable root login

  lineinfile:

    path: /etc/ssh/sshd_config

    regexp: '^PermitRootLogin'

    line: 'PermitRootLogin no'

  notify: restart sshd



- name: Disable password authentication

  lineinfile:

    path: /etc/ssh/sshd_config

    regexp: '^PasswordAuthentication'

    line: 'PasswordAuthentication no'

  notify: restart sshd



- name: Configure automatic security updates

  template:

    src: 50unattended-upgrades.j2

    dest: /etc/apt/apt.conf.d/50unattended-upgrades

    owner: root

    group: root

    mode: '0644'

# ansible/roles/nginx/tasks/main.yml

---

- name: Install Nginx

  apt:

    name: nginx

    state: present



- name: Create web root directory

  file:

    path: /var/www/{{ app_name }}

    state: directory

    owner: www-data

    group: www-data

    mode: '0755'



- name: Configure Nginx site

  template:

    src: nginx-site.conf.j2

    dest: /etc/nginx/sites-available/{{ app_name }}

    owner: root

    group: root

    mode: '0644'

  notify: reload nginx



- name: Enable Nginx site

  file:

    src: /etc/nginx/sites-available/{{ app_name }}

    dest: /etc/nginx/sites-enabled/{{ app_name }}

    state: link

  notify: reload nginx



- name: Remove default Nginx site

  file:

    path: /etc/nginx/sites-enabled/default

    state: absent

  notify: reload nginx



- name: Configure Nginx

  template:

    src: nginx.conf.j2

    dest: /etc/nginx/nginx.conf

    owner: root

    group: root

    mode: '0644'

  notify: reload nginx



- name: Start and enable Nginx

  service:

    name: nginx

    state: started

    enabled: yes

# ansible/roles/nginx/templates/nginx-site.conf.j2

upstream backend {

    least_conn;

    {% for host in groups['webservers'] %}

    server {{ hostvars[host]['ansible_host'] }}:8000 max_fails=3 fail_timeout=30s;

    {% endfor %}

}



# Rate limiting

limit_req_zone $binary_remote_addr zone=api_limit:10m rate=10r/s;



server {

    listen 80;

    server_name {{ domain_name }};

    

    # Redirect to HTTPS

    return 301 https://$server_name$request_uri;

}



server {

    listen 443 ssl http2;

    server_name {{ domain_name }};



    # SSL Configuration

    ssl_certificate /etc/letsencrypt/live/{{ domain_name }}/fullchain.pem;

    ssl_certificate_key /etc/letsencrypt/live/{{ domain_name }}/privkey.pem;

    ssl_protocols TLSv1.2 TLSv1.3;

    ssl_ciphers HIGH:!aNULL:!MD5;

    ssl_prefer_server_ciphers on;



    # Security headers

    add_header X-Frame-Options "SAMEORIGIN" always;

    add_header X-Content-Type-Options "nosniff" always;

    add_header X-XSS-Protection "1; mode=block" always;

    add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;



    # Logging

    access_log /var/log/nginx/{{ app_name }}_access.log;

    error_log /var/log/nginx/{{ app_name }}_error.log;



    # Root directory

    root /var/www/{{ app_name }}/public;

    index index.html;



    # Gzip compression

    gzip on;

    gzip_vary on;

    gzip_proxied any;

    gzip_comp_level 6;

    gzip_types text/plain text/css text/xml text/javascript application/json application/javascript application/xml+rss;



    location / {

        try_files $uri $uri/ /index.html;

    }



    location /api/ {

        limit_req zone=api_limit burst=20 nodelay;

        

        proxy_pass http://backend/;

        proxy_http_version 1.1;

        proxy_set_header Upgrade $http_upgrade;

        proxy_set_header Connection 'upgrade';

        proxy_set_header Host $host;

        proxy_set_header X-Real-IP $remote_addr;

        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;

        proxy_set_header X-Forwarded-Proto $scheme;

        proxy_cache_bypass $http_upgrade;

        

        proxy_connect_timeout 60s;

        proxy_send_timeout 60s;

        proxy_read_timeout 60s;

    }



    location /health {

        access_log off;

        return 200 "healthy\n";

        add_header Content-Type text/plain;

    }



    # Static files caching

    location ~* \.(jpg|jpeg|png|gif|ico|css|js|svg|woff|woff2)$ {

        expires 1y;

        add_header Cache-Control "public, immutable";

    }

}

# ansible/roles/postgresql/tasks/main.yml

---

- name: Add PostgreSQL repository

  apt_repository:

    repo: deb http://apt.postgresql.org/pub/repos/apt/ {{ ansible_distribution_release }}-pgdg main

    state: present



- name: Add PostgreSQL repository key

  apt_key:

    url: https://www.postgresql.org/media/keys/ACCC4CF8.asc

    state: present



- name: Install PostgreSQL

  apt:

    name:

      - postgresql-15

      - postgresql-contrib-15

      - python3-psycopg2

    state: present

    update_cache: yes



- name: Ensure PostgreSQL is running

  service:

    name: postgresql

    state: started

    enabled: yes



- name: Configure PostgreSQL

  template:

    src: postgresql.conf.j2

    dest: /etc/postgresql/15/main/postgresql.conf

    owner: postgres

    group: postgres

    mode: '0644'

  notify: restart postgresql



- name: Configure pg_hba

  template:

    src: pg_hba.conf.j2

    dest: /etc/postgresql/15/main/pg_hba.conf

    owner: postgres

    group: postgres

    mode: '0640'

  notify: restart postgresql



- name: Create application database

  postgresql_db:

    name: "{{ db_name }}"

    encoding: UTF-8

    lc_collate: en_US.UTF-8

    lc_ctype: en_US.UTF-8

    template: template0

  become_user: postgres



- name: Create application user

  postgresql_user:

    name: "{{ db_user }}"

    password: "{{ db_password }}"

    db: "{{ db_name }}"

    priv: ALL

  become_user: postgres



- name: Configure replication (on replica)

  include_tasks: replica.yml

  when: postgresql_role == 'replica'

# ansible/roles/monitoring/tasks/main.yml

---

- name: Install Node Exporter

  get_url:

    url: https://github.com/prometheus/node_exporter/releases/download/v1.6.1/node_exporter-1.6.1.linux-amd64.tar.gz

    dest: /tmp/node_exporter.tar.gz



- name: Extract Node Exporter

  unarchive:

    src: /tmp/node_exporter.tar.gz

    dest: /tmp/

    remote_src: yes



- name: Move Node Exporter binary

  copy:

    src: /tmp/node_exporter-1.6.1.linux-amd64/node_exporter

    dest: /usr/local/bin/node_exporter

    remote_src: yes

    mode: '0755'



- name: Create node_exporter systemd service

  template:

    src: node_exporter.service.j2

    dest: /etc/systemd/system/node_exporter.service

    owner: root

    group: root

    mode: '0644'

  notify: reload systemd



- name: Start and enable node_exporter

  service:

    name: node_exporter

    state: started

    enabled: yes



- name: Install Filebeat for log shipping

  apt:

    deb: https://artifacts.elastic.co/downloads/beats/filebeat/filebeat-8.10.0-amd64.deb



- name: Configure Filebeat

  template:

    src: filebeat.yml.j2

    dest: /etc/filebeat/filebeat.yml

    owner: root

    group: root

    mode: '0644'

  notify: restart filebeat



- name: Start and enable Filebeat

  service:

    name: filebeat

    state: started

    enabled: yes

# ansible/deploy.sh

#!/bin/bash



set -e



echo "Starting deployment..."



# Check syntax

ansible-playbook -i inventory/production.yml playbook.yml --syntax-check



# Dry run

echo "Running dry-run..."

ansible-playbook -i inventory/production.yml playbook.yml --check



# Confirm

read -p "Continue with actual deployment? (y/n) " -n 1 -r

echo

if [[ ! $REPLY =~ ^[Yy]$ ]]

then

    echo "Deployment cancelled"

    exit 1

fi



# Deploy

ansible-playbook -i inventory/production.yml playbook.yml -v



echo "Deployment completed!"

16.5. Мониторинг и логирование

Prometheus и Grafana Setup

You: Создай полную систему мониторинга с Prometheus и Grafana

AI:

# prometheus/prometheus.yml

global:

  scrape_interval: 15s

  evaluation_interval: 15s

  external_labels:

    cluster: 'production'

    environment: 'prod'



# Alertmanager configuration

alerting:

  alertmanagers:

    - static_configs:

        - targets:

            - alertmanager:9093



# Load rules

rule_files:

  - '/etc/prometheus/rules/*.yml'



# Scrape configurations

scrape_configs:

  # Prometheus itself

  - job_name: 'prometheus'

    static_configs:

      - targets: ['localhost:9090']



  # Node Exporter

  - job_name: 'node'

    static_configs:

      - targets:

          - 'node1:9100'

          - 'node2:9100'

          - 'node3:9100'

        labels:

          group: 'production'



  # Application metrics

  - job_name: 'application'

    metrics_path: '/metrics'

    static_configs:

      - targets:

          - 'backend:8000'

        labels:

          service: 'backend'

          environment: 'production'



  # PostgreSQL Exporter

  - job_name: 'postgres'

    static_configs:

      - targets:

          - 'postgres-exporter:9187'

        labels:

          database: 'main'



  # Redis Exporter

  - job_name: 'redis'

    static_configs:

      - targets:

          - 'redis-exporter:9121'



  # Nginx Exporter

  - job_name: 'nginx'

    static_configs:

      - targets:

          - 'nginx-exporter:9113'



  # Kubernetes pods (if using K8s)

  - job_name: 'kubernetes-pods'

    kubernetes_sd_configs:

      - role: pod

    relabel_configs:

      - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]

        action: keep

        regex: true

      - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]

        action: replace

        target_label: __metrics_path__

        regex: (.+)

      - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]

        action: replace

        regex: ([^:]+)(?::\d+)?;(\d+)

        replacement: $1:$2

        target_label: __address__

# prometheus/rules/alerts.yml

groups:

  - name: instance_alerts

    interval: 30s

    rules:

      - alert: InstanceDown

        expr: up == 0

        for: 5m

        labels:

          severity: critical

        annotations:

          summary: "Instance {{ $labels.instance }} down"

          description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."



      - alert: HighCPUUsage

        expr: 100 - (avg by(instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80

        for: 10m

        labels:

          severity: warning

        annotations:

          summary: "High CPU usage on {{ $labels.instance }}"

          description: "CPU usage is above 80% (current value: {{ $value }}%)"



      - alert: HighMemoryUsage

        expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 85

        for: 10m

        labels:

          severity: warning

        annotations:

          summary: "High memory usage on {{ $labels.instance }}"

          description: "Memory usage is above 85% (current value: {{ $value }}%)"



      - alert: DiskSpaceLow

        expr: (node_filesystem_avail_bytes / node_filesystem_size_bytes) * 100 < 15

        for: 10m

        labels:

          severity: warning

        annotations:

          summary: "Low disk space on {{ $labels.instance }}"

          description: "Disk space is below 15% (current value: {{ $value }}%)"



  - name: application_alerts

    interval: 30s

    rules:

      - alert: HighRequestLatency

        expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) > 1

        for: 10m

        labels:

          severity: warning

        annotations:

          summary: "High request latency"

          description: "95th percentile latency is above 1s (current value: {{ $value }}s)"



      - alert: HighErrorRate

        expr: rate(http_requests_total{status=~"5.."}[5m]) / rate(http_requests_total[5m]) > 0.05

        for: 5m

        labels:

          severity: critical

        annotations:

          summary: "High error rate"

          description: "Error rate is above 5% (current value: {{ $value }}%)"



      - alert: DatabaseConnectionPoolExhausted

        expr: database_connections_active / database_connections_max > 0.9

        for: 5m

        labels:

          severity: critical

        annotations:

          summary: "Database connection pool almost exhausted"

          description: "Using {{ $value }}% of available connections"

# grafana/provisioning/datasources/prometheus.yml

apiVersion: 1



datasources:

  - name: Prometheus

    type: prometheus

    access: proxy

    url: http://prometheus:9090

    isDefault: true

    editable: false

    jsonData:

      timeInterval: "15s"

// grafana/provisioning/dashboards/application.json

{

  "dashboard": {

    "title": "Application Metrics",

    "panels": [

      {

        "title": "Request Rate",

        "type": "graph",

        "gridPos": {"h": 8, "w": 12, "x": 0, "y": 0},

        "targets": [

          {

            "expr": "rate(http_requests_total[5m])",

            "legendFormat": "{{method}} {{path}}"

          }

        ]

      },

      {

        "title": "Error Rate",

        "type": "graph",

        "gridPos": {"h": 8, "w": 12, "x": 12, "y": 0},

        "targets": [

          {

            "expr": "rate(http_requests_total{status=~\"5..\"}[5m])",

            "legendFormat": "Errors"

          }

        ]

      },

      {

        "title": "Response Time (95th percentile)",

        "type": "graph",

        "gridPos": {"h": 8, "w": 12, "x": 0, "y": 8},

        "targets": [

          {

            "expr": "histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m]))",

            "legendFormat": "p95"

          },

          {

            "expr": "histogram_quantile(0.99, rate(http_request_duration_seconds_bucket[5m]))",

            "legendFormat": "p99"

          }

        ]

      },

      {

        "title": "Active Connections",

        "type": "graph",

        "gridPos": {"h": 8, "w": 12, "x": 12, "y": 8},

        "targets": [

          {

            "expr": "sum(active_connections)",

            "legendFormat": "Active"

          }

        ]

      }

    ]

  }

}

# app/metrics.py

from prometheus_client import Counter, Histogram, Gauge, generate_latest, CONTENT_TYPE_LATEST

from functools import wraps

import time



# Metrics

REQUEST_COUNT = Counter(

    'http_requests_total',

    'Total HTTP requests',

    ['method', 'endpoint', 'status']

)



REQUEST_LATENCY = Histogram(

    'http_request_duration_seconds',

    'HTTP request latency',

    ['method', 'endpoint']

)



ACTIVE_REQUESTS = Gauge(

    'http_requests_active',

    'Number of active HTTP requests'

)



DATABASE_CONNECTIONS = Gauge(

    'database_connections_active',

    'Number of active database connections'

)



DATABASE_POOL_SIZE = Gauge(

    'database_connections_max',

    'Maximum database connection pool size'

)



def track_metrics(f):

    """Decorator to track request metrics"""

    @wraps(f)

    def decorated_function(*args, **kwargs):

        start_time = time.time()

        ACTIVE_REQUESTS.inc()

        

        try:

            response = f(*args, **kwargs)

            status = response.status_code if hasattr(response, 'status_code') else 200

            

            REQUEST_COUNT.labels(

                method=request.method,

                endpoint=request.endpoint,

                status=status

            ).inc()

            

            return response

            

        except Exception as e:

            REQUEST_COUNT.labels(

                method=request.method,

                endpoint=request.endpoint,

                status=500

            ).inc()

            raise

            

        finally:

            REQUEST_LATENCY.labels(

                method=request.method,

                endpoint=request.endpoint

            ).observe(time.time() - start_time)

            

            ACTIVE_REQUESTS.dec()

    

    return decorated_function



def metrics_endpoint():

    """Prometheus metrics endpoint"""

    return generate_latest(), 200, {'Content-Type': CONTENT_TYPE_LATEST}



# Usage in Flask

from flask import Flask, request



app = Flask(__name__)



@app.route('/metrics')

def metrics():

    return metrics_endpoint()



@app.route('/api/users')

@track_metrics

def get_users():

    # Your logic here

    return {'users': []}