diff --git a/.github/workflows/ec2-reboot.yml b/.github/workflows/ec2-reboot.yml index ce0067b..9b975a5 100644 --- a/.github/workflows/ec2-reboot.yml +++ b/.github/workflows/ec2-reboot.yml @@ -1,11 +1,11 @@ -name: EC2-RECOVERY +name: EC2-DIAGNOSTIC on: workflow_dispatch: jobs: - recover: - name: Recover Server + diagnose: + name: Diagnose EC2 Instance runs-on: ubuntu-latest steps: @@ -16,84 +16,81 @@ jobs: aws-secret-access-key: ${{ secrets.AWS_PROD_SECRET_KEY }} aws-region: ap-northeast-2 - - name: Check IAM identity + - name: Check IAM identity and permissions run: | - echo "=== IAM 정보 확인 ===" - aws sts get-caller-identity || echo "STS 호출 실패" + echo "=== IAM 정보 ===" + aws sts get-caller-identity 2>&1 || true - - name: Check CodeDeploy deployments + echo "" + echo "=== IAM 정책 확인 ===" + ACCOUNT=$(aws sts get-caller-identity --query Account --output text 2>/dev/null) + USER_NAME=$(aws sts get-caller-identity --query Arn --output text 2>/dev/null | awk -F'/' '{print $NF}') + echo "Account: $ACCOUNT" + echo "User: $USER_NAME" + + echo "" + echo "=== Attached Policies ===" + aws iam list-attached-user-policies --user-name "$USER_NAME" 2>&1 || echo "IAM 정책 조회 권한 없음" + + echo "" + echo "=== Inline Policies ===" + aws iam list-user-policies --user-name "$USER_NAME" 2>&1 || echo "IAM 인라인 정책 조회 권한 없음" + + - name: CodeDeploy deployment group info run: | - echo "=== 최근 배포 상태 확인 ===" - aws deploy list-deployments \ + echo "=== 배포 그룹 상세 ===" + aws deploy get-deployment-group \ --application-name runnect-prod-codedeploy \ --deployment-group-name runnect-prod-codedeploy-group \ - --include-only-statuses "Succeeded,Failed,InProgress" \ - --query "deployments[:3]" \ - --output text || echo "배포 목록 조회 실패" + --output json 2>&1 || echo "배포 그룹 조회 실패" + - name: Latest deployment details + run: | + echo "=== 최근 배포 목록 ===" LATEST=$(aws deploy list-deployments \ --application-name runnect-prod-codedeploy \ --deployment-group-name runnect-prod-codedeploy-group \ --query "deployments[0]" \ --output text 2>/dev/null) + echo "Latest deployment: $LATEST" if [ -n "$LATEST" ] && [ "$LATEST" != "None" ]; then echo "" - echo "=== 최신 배포 상세 ===" - aws deploy get-deployment --deployment-id "$LATEST" \ - --query "deploymentInfo.{status:status, createTime:createTime, completeTime:completeTime, errorInfo:errorInformation}" \ - --output json + echo "=== 배포 상세 ===" + aws deploy get-deployment --deployment-id "$LATEST" --output json 2>&1 + + echo "" + echo "=== 배포 인스턴스 목록 ===" + aws deploy list-deployment-instances --deployment-id "$LATEST" --output json 2>&1 || echo "인스턴스 목록 조회 실패" + + echo "" + echo "=== 배포 타겟 상세 ===" + INSTANCE_IDS=$(aws deploy list-deployment-instances --deployment-id "$LATEST" --query "instancesList" --output text 2>/dev/null) + for INST in $INSTANCE_IDS; do + echo "--- Instance: $INST ---" + aws deploy get-deployment-instance --deployment-id "$LATEST" --instance-id "$INST" --output json 2>&1 || echo "조회 실패" + done fi - - name: Trigger new CodeDeploy deployment + - name: Check SSM access run: | - echo "=== 새 CodeDeploy 배포 트리거 ===" - DEPLOYMENT_ID=$(aws deploy create-deployment \ - --application-name runnect-prod-codedeploy \ - --deployment-group-name runnect-prod-codedeploy-group \ - --file-exists-behavior OVERWRITE \ - --s3-location bucket=runnect-prod-bucket,bundleType=zip,key=runnect_prod_server.zip \ - --region ap-northeast-2 \ - --query "deploymentId" \ - --output text) - - echo "Deployment ID: $DEPLOYMENT_ID" - - echo "배포 완료 대기 (최대 5분)..." - for i in $(seq 1 30); do - STATUS=$(aws deploy get-deployment --deployment-id "$DEPLOYMENT_ID" \ - --query "deploymentInfo.status" --output text 2>/dev/null) - echo "[$i/30] Status: $STATUS" - - if [ "$STATUS" = "Succeeded" ]; then - echo "배포 성공!" - break - elif [ "$STATUS" = "Failed" ] || [ "$STATUS" = "Stopped" ]; then - echo "배포 실패! 상세 정보:" - aws deploy get-deployment --deployment-id "$DEPLOYMENT_ID" \ - --query "deploymentInfo.errorInformation" --output json - break - fi - sleep 10 - done - - - name: Health check + echo "=== SSM 인스턴스 목록 ===" + aws ssm describe-instance-information --output json 2>&1 || echo "SSM 권한 없음" + + - name: Try EC2 describe (may fail) run: | - echo "서버 헬스 체크 (최대 3분 대기)..." - for i in $(seq 1 18); do - HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 --max-time 10 http://3.35.195.11/actuator/health 2>/dev/null || echo "000") - echo "[$i/18] HTTP: $HTTP_CODE" - if [ "$HTTP_CODE" = "200" ]; then - echo "서버 복구 완료!" - exit 0 - fi - sleep 10 - done + echo "=== EC2 인스턴스 조회 시도 ===" + aws ec2 describe-instances --output json 2>&1 || echo "EC2 권한 없음" echo "" - echo "=== 포트별 체크 ===" - for PORT in 80 8081 8082; do - HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 --max-time 10 http://3.35.195.11:$PORT/actuator/health 2>/dev/null || echo "000") - echo "Port $PORT: HTTP $HTTP_CODE" - done - echo "WARNING: 서버가 아직 응답하지 않습니다." + echo "=== Elastic IP 조회 시도 ===" + aws ec2 describe-addresses --public-ips 3.35.195.11 2>&1 || echo "Elastic IP 조회 실패" + + echo "" + echo "=== 보안 그룹 조회 시도 ===" + aws ec2 describe-security-groups 2>&1 || echo "보안 그룹 조회 실패" + + - name: Check S3 bucket + run: | + echo "=== S3 버킷 확인 ===" + aws s3 ls s3://runnect-prod-bucket/ 2>&1 || echo "S3 접근 실패" diff --git a/scripts/deploy.sh b/scripts/deploy.sh index c53ea7a..c7db67c 100644 --- a/scripts/deploy.sh +++ b/scripts/deploy.sh @@ -96,9 +96,68 @@ echo "> 스위칭" sleep 10 /home/ubuntu/app/nonstop/switch.sh -echo "> 배포 완료. 최종 상태 확인" -echo "> Nginx: $(sudo systemctl is-active nginx)" -echo "> Java 프로세스:" -pgrep -a java || echo "> Java 프로세스 없음" -echo "> 포트 리스닝:" -sudo ss -tlnp | grep -E ':(80|8081|8082) ' || echo "> 해당 포트 리스닝 없음" +echo "> 배포 완료. 진단 정보 수집 중..." + +DIAG_FILE="/tmp/server-diagnostic-$(date +%Y%m%d-%H%M%S).txt" +{ + echo "========== SERVER DIAGNOSTIC ==========" + echo "Date: $(date)" + echo "" + + echo "=== Public IP (EC2 metadata) ===" + curl -s --connect-timeout 3 http://169.254.169.254/latest/meta-data/public-ipv4 2>/dev/null || echo "메타데이터 접근 불가" + echo "" + + echo "=== Network Interfaces ===" + ip addr show 2>/dev/null || ifconfig 2>/dev/null + echo "" + + echo "=== Nginx Status ===" + sudo systemctl status nginx 2>&1 + echo "" + + echo "=== Nginx Config ===" + sudo nginx -T 2>&1 + echo "" + + echo "=== Listening Ports ===" + sudo ss -tlnp 2>/dev/null || sudo netstat -tlnp 2>/dev/null + echo "" + + echo "=== Java Processes ===" + pgrep -a java 2>/dev/null || echo "Java 프로세스 없음" + echo "" + + echo "=== iptables Rules ===" + sudo iptables -L -n 2>/dev/null || echo "iptables 조회 실패" + echo "" + + echo "=== Localhost Health Check ===" + curl -s http://localhost:8081/actuator/health 2>/dev/null + echo "" + curl -s http://localhost:8082/actuator/health 2>/dev/null + echo "" + curl -s http://localhost/actuator/health 2>/dev/null + echo "" + curl -s http://localhost/profile 2>/dev/null + echo "" + + echo "=== Disk Usage ===" + df -h 2>/dev/null + echo "" + + echo "=== Memory Usage ===" + free -h 2>/dev/null + echo "" + + echo "=== nohup.out (last 50 lines) ===" + tail -50 /home/ubuntu/app/nohup.out 2>/dev/null || echo "nohup.out 없음" + echo "" + + echo "========== END DIAGNOSTIC ==========" +} > "$DIAG_FILE" 2>&1 + +echo "> 진단 결과를 S3에 업로드..." +aws s3 cp "$DIAG_FILE" s3://runnect-prod-bucket/diagnostics/$(basename "$DIAG_FILE") 2>&1 || echo "> S3 업로드 실패" + +echo "> 진단 완료"