#!/bin/bash # # docker-deploy-elb - Deploy docker image to containers behind and ELB # # usage: docker-deploy --aws-profile profile --deploy cmd --group group --proxy proxy --region region \ # --validate page --version 1.2.3 IMAGE REMOTE_SGROUP="remote-prod" DPORT=2375 # # Source configuration and define as env vars # #files="" #for f in ../CONFIG/keys.json CONFIG/keys.json product.json ../pak.json ; do # [ -f ${f} ] && files="${files} ${f}" #done #eval $(../paks/assist/json2env ${files}) if [ -f pak.json ] ; then VERSION=$(pak --dir . edit version) else VERSION=$(pak --dir .. edit version) fi while [[ $# -gt 0 ]] ; do arg="$1" case ${arg} in --aws-profile) AWS_PROFILE=${2} shift ; shift ;; --deploy) AWS_DEPLOY=${2} shift ; shift ;; --group) AWS_GROUP=${2} shift ; shift ;; --proxy) AWS_PROXY=${2} shift ; shift ;; --region) AWS_DEFAULT_REGION=${2} shift ; shift ;; --validate) AWS_VALIDATE=${2} shift ; shift ;; --version) VERSION=${2} shift ; shift ;; *) break ;; esac done IMAGE="${1}" NAME="${IMAGE}" if [ "${IMAGE}" = "" -o "${AWS_DEFAULT_REGION}" = "" ] ; then echo "usage: docker-deploy [--aws-profile profile] [--region region] [--proxy proxy] [--group group] [--deploy cmd] [--validate cmd] image" exit 255 fi export AWS_PROFILE AWS_DEFAULT_REGION # # Source functions # . $(dirname ${BASH_SOURCE[0]})/common . $(dirname ${BASH_SOURCE[0]})/docker-login . $(dirname ${BASH_SOURCE[0]})/remote-access echo -e "\nDeploy container \"${IMAGE}:${VERSION}\" to Load Balancer \"${AWS_PROXY}\" Group \"${AWS_GROUP}\"\n" getGroups() { local groups arn groups="" for arn in $(aws elbv2 describe-target-groups --names ${AWS_GROUP} --output text --query 'TargetGroups[].TargetGroupArn') do if [ "${arn}" = "${arn/\/${AWS_GROUP}\//}" ] ; then continue fi groups="${groups} ${arn}" done echo ${groups} } getTargets() { local arn arn=$1 aws elbv2 describe-target-health --target-group-arn ${arn} --output text --query 'TargetHealthDescriptions[].Target.Id' if [ $? != 0 ] ; then echo "Cannot get target health" exit 255 fi } getHost() { local target target=$1 aws ec2 describe-instances --instance-ids ${target} --output text --query Reservations[0].Instances[0].PublicIpAddress } checkTargets() { local arn targets tcount allTargets=$(aws ec2 describe-instances \ --filter "Name=tag:aws:autoscaling:groupName,Values=${AWS_PROXY}" --output text \ --query 'Reservations[].Instances[].InstanceId') if [ $? != 0 ] ; then echo "Cannot describe instances" exit 255 fi if [ "${allTargets}" = "" ] ; then echo "No targets found in autoscale group ${AWS_PROXY}" exit 255 fi echo "Check targets are registered with proxy" for arn in $(getGroups) do targets=$(getTargets ${arn}) tcount=0 for target in ${allTargets} do if [ "${allTargets/${target}/}" = "${allTargets}" ] ; then echo "Repair target ${target} missing from proxy" aws elbv2 register-targets --target-group-arn ${arn} --targets Id=${target} if [ $? != 0 ] ; then echo "Cannot register ${target} with load balancer ${AWS_PROXY}" continue fi else echo "Target ${target} is registered in proxy" tcount=$((tcount+1)) fi done done echo "${NAME} has ${tcount} targets in target group" } pullImage() { local i i=0 while [ $i -lt 5 ] do echo Pull image ${IMAGE_PATH}:${VERSION} output=$(docker pull ${IMAGE_PATH}:${VERSION}) if [ $? = 0 ] ; then echo "Image pulled" echo ${output} | egrep -v 'Already exists|Pulling|Waiting|Verifying|Download complete|Pull complete|Digest:' return 0 fi echo ${output} echo "Cannot pull image, retry in 5 seconds. (${i})" sleep 5 i=$((i+1)) done return 1 } validate() { local host i host=$1 echo "Validate application at http://${host}${AWS_VALIDATE}" i=0 while [ $i -lt 5 ] do code=$(curl -s -o /dev/null --retry 10 --retry-delay 1 --retry-max-time 15 \ -I -w "%{http_code}" http://${host}${AWS_VALIDATE}) if [ "${code}" = 200 ] ; then echo "PASSED: Health check successful" return 0 fi echo "Continue to wait for application, retry in 5 seconds. (${i})" sleep 5 i=$((i+1)) done echo "FAILED: Cannot validate application, status ${code}" return 1 } waitForDrain() { local i state target target=$1 echo "Wait for elb to drain target ${target}" i=0 while [ $i -lt 30 ] do state=$(aws elbv2 describe-target-health --target-group-arn "${arn}" --output text --query "TargetHealthDescriptions[?Target.Id=='${target}'].TargetHealth.State") if [ $? != 0 ] ; then echo "Cannot get target health for ${target}" exit 255 fi if [ "${state}" = "" ] ; then echo Target ${target} now removed from elb. return 0 fi echo Waiting for ${target} to drain. State: ${state}. sleep 1 i=$((i+1)) done echo "FAILED: Cannot drain instance ${target} state ${state}" return 1 } waitForReady() { local i state target target=$1 echo "Wait for elb to enable target ${target}" i=0 while [ $i -lt 30 ] do state=$(aws elbv2 describe-target-health --target-group-arn "${arn}" --output text --query "TargetHealthDescriptions[?Target.Id=='${target}'].TargetHealth.State") if [ $? != 0 ] ; then echo "Cannot get target health for ${target}" exit 255 fi if [ "${state}" = "healthy" ] ; then echo Target ${target} now ${state}. return 0 fi echo Waiting for ${target} to become healthy. State: ${state}. sleep 1 i=$((i+1)) done echo "FAILED: Instance not ready ${target} state ${state}" return 1 } # # Currently 1-1 correspondence between target group and application # count=0 passed=0 checkTargets for arn in $(getGroups) do for target in $(getTargets ${arn}) do fail= count=$((count+1)) URI=$(dockerLogin) IMAGE_PATH=${URI}/${IMAGE} grantAccess ${REMOTE_SGROUP} ${DPORT} host=$(getHost ${target}) export DOCKER_HOST=tcp://${host}:${DPORT} if ! pullImage ; then echo "Cannot pull ${IMAGE}:${VERSION} on ${target}" continue fi echo echo "----------------------------------------------------------------------------------" echo "Deploy to instance ${target} at ${host}" echo "----------------------------------------------------------------------------------" curret=$(docker ps --filter ancestor=${IMAGE}:${VERSION} --format '{{.ID}}') if [ $? != 0 ] ; then echo "Cannot talk to docker on ${target}" fail=1 continue fi if [ "${current}" != "" ] ; then echo "Target ${target} already running version ${IMAGE}:${VERSION}" if [ "${FORCE}" = "" ] ; then passed=$((passed+1)) continue fi fi # # Deregister. After deregister, we must ALWAYS reregister below. # echo "Deregister instance ${target} from load balancer" aws elbv2 deregister-targets --target-group-arn ${arn} --targets Id=${target} if [ $? != 0 ] ; then echo "Cannot deregister ${target} from target group ${AWS_GROUP}" fail=1 # Keep going fi # # AWS seems to require at least a 10-15 second deregistration delay. It seems to deregister targets, but # continues to route requests to them for up to 15 seconds. Ugh! # delay=$(aws elbv2 describe-target-group-attributes --target-group-arn ${arn} \ --output text --query "Attributes[?Key=='deregistration_delay.timeout_seconds'].Value") echo Waiting for the deregistration delay ${delay} # This seems to need to be >= the deregistration delay for the target group sleep ${delay} # # Gracefully stop containers # containers=$(docker ps --filter "name=${NAME}" --format '{{.ID}}') if [ "${containers}" != "" ] ; then echo "Gracefully stop traffic on ${NAME}" # # The ELB should have stopped sending requests by now. # The quit instructs the container to do what it can to gracefully clean up current requests. # startQuit=$(date +%s) echo "docker kill -s SIGQUIT ${NAME}" docker kill -s SIGQUIT ${NAME} # # Wait for instance to be fully removed from the elb # if ! waitForDrain "${target}" ; then echo "Cannot drain ${target}, force kill" # keep going - should not happen - best to upgrade fi # # Wait for the app drain timeout # period=$((10 - $(date +%s) + ${startQuit})) if [ "${period}" -gt 0 ] ; then echo sleep ${period} sleep ${period} fi echo "Stopping container ${NAME} ${containers}" docker stop ${NAME} if [ $? != 0 ] ; then echo "Cannot stop container ${container} on ${target}, continuing ..." # May not be running, continue fi fi # # Remove existing containers # echo "Remove container ${NAME} ${containers}" docker rm ${NAME} >/dev/null 2>&1 # # Start new container # COMMAND=$(echo ${AWS_DEPLOY} | sed "s/-d/-d -e HOST=${host}/" | sed "s^${IMAGE}:${VERSION}^${URI}/${IMAGE}:${VERSION}^") echo "${COMMAND}" ${COMMAND} if [ $? != 0 ] ; then echo "Cannot start container ${IMAGE}:${VERSION} on ${target}" echo "WARNING: target ${target} is not registered with load balancer, skip further deployments." fail=1 fi # # Register with load balancer # echo "Register instance ${target} with load balancer" aws elbv2 register-targets --target-group-arn ${arn} --targets Id=${target} if [ $? != 0 ] ; then echo "Cannot register ${target} with target group ${AWS_GROUP}" continue fi # # Validate # if [ "${fail}" = "" ] ; then echo -n "Started: " docker ps --filter "ancestor=${IMAGE}:${VERSION}" --format '{{.ID}}, {{.Image}}, {{.Status}}' if ! validate ${host} ; then echo "Could not validate target ${target}" break fi fi # # Wait for instance to be recognized by elb # if ! waitForReady "${target}" ; then echo "Target ${target} did not become ready. Halting deploy." break fi dockerLogout revokeAccess ${REMOTE_SGROUP} ${DPORT} passed=$((passed+1)) done done if [ "${passed}" != "${count}" ] ; then echo "FAILED, upgraded ${passed} instances of ${count} with ${IMAGE}:${VERSION}" exit 1 fi echo -e "\nPASSED, all ${count} instances running ${IMAGE}:${VERSION}" echo -e "\nRunning docker garbage collection" docker system prune -f >/dev/null DOCKER_HOST= docker system prune -f >/dev/null exit 0