docker-deploy-elb 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403
  1. #!/bin/bash
  2. #
  3. # docker-deploy-elb - Deploy docker image to containers behind and ELB
  4. #
  5. # usage: docker-deploy --aws-profile profile --deploy cmd --group group --proxy proxy --region region \
  6. # --validate page --version 1.2.3 IMAGE
  7. REMOTE_SGROUP="remote-prod"
  8. DPORT=2375
  9. #
  10. # Source configuration and define as env vars
  11. #
  12. #files=""
  13. #for f in ../CONFIG/keys.json CONFIG/keys.json product.json ../pak.json ; do
  14. # [ -f ${f} ] && files="${files} ${f}"
  15. #done
  16. #eval $(../paks/assist/json2env ${files})
  17. if [ -f pak.json ] ; then
  18. VERSION=$(pak --dir . edit version)
  19. else
  20. VERSION=$(pak --dir .. edit version)
  21. fi
  22. while [[ $# -gt 0 ]] ; do
  23. arg="$1"
  24. case ${arg} in
  25. --aws-profile)
  26. AWS_PROFILE=${2}
  27. shift ; shift
  28. ;;
  29. --deploy)
  30. AWS_DEPLOY=${2}
  31. shift ; shift
  32. ;;
  33. --group)
  34. AWS_GROUP=${2}
  35. shift ; shift
  36. ;;
  37. --proxy)
  38. AWS_PROXY=${2}
  39. shift ; shift
  40. ;;
  41. --region)
  42. AWS_DEFAULT_REGION=${2}
  43. shift ; shift
  44. ;;
  45. --validate)
  46. AWS_VALIDATE=${2}
  47. shift ; shift
  48. ;;
  49. --version)
  50. VERSION=${2}
  51. shift ; shift
  52. ;;
  53. *)
  54. break
  55. ;;
  56. esac
  57. done
  58. IMAGE="${1}"
  59. NAME="${IMAGE}"
  60. if [ "${IMAGE}" = "" -o "${AWS_DEFAULT_REGION}" = "" ] ; then
  61. echo "usage: docker-deploy [--aws-profile profile] [--region region] [--proxy proxy] [--group group] [--deploy cmd] [--validate cmd] image"
  62. exit 255
  63. fi
  64. export AWS_PROFILE AWS_DEFAULT_REGION
  65. #
  66. # Source functions
  67. #
  68. . $(dirname ${BASH_SOURCE[0]})/common
  69. . $(dirname ${BASH_SOURCE[0]})/docker-login
  70. . $(dirname ${BASH_SOURCE[0]})/remote-access
  71. echo -e "\nDeploy container \"${IMAGE}:${VERSION}\" to Load Balancer \"${AWS_PROXY}\" Group \"${AWS_GROUP}\"\n"
  72. getGroups() {
  73. local groups arn
  74. groups=""
  75. for arn in $(aws elbv2 describe-target-groups --names ${AWS_GROUP} --output text --query 'TargetGroups[].TargetGroupArn')
  76. do
  77. if [ "${arn}" = "${arn/\/${AWS_GROUP}\//}" ] ; then
  78. continue
  79. fi
  80. groups="${groups} ${arn}"
  81. done
  82. echo ${groups}
  83. }
  84. getTargets() {
  85. local arn
  86. arn=$1
  87. aws elbv2 describe-target-health --target-group-arn ${arn} --output text --query 'TargetHealthDescriptions[].Target.Id'
  88. if [ $? != 0 ] ; then
  89. echo "Cannot get target health"
  90. exit 255
  91. fi
  92. }
  93. getHost() {
  94. local target
  95. target=$1
  96. aws ec2 describe-instances --instance-ids ${target} --output text --query Reservations[0].Instances[0].PublicIpAddress
  97. }
  98. checkTargets() {
  99. local arn targets tcount
  100. allTargets=$(aws ec2 describe-instances \
  101. --filter "Name=tag:aws:autoscaling:groupName,Values=${AWS_PROXY}" --output text \
  102. --query 'Reservations[].Instances[].InstanceId')
  103. if [ $? != 0 ] ; then
  104. echo "Cannot describe instances"
  105. exit 255
  106. fi
  107. if [ "${allTargets}" = "" ] ; then
  108. echo "No targets found in autoscale group ${AWS_PROXY}"
  109. exit 255
  110. fi
  111. echo "Check targets are registered with proxy"
  112. for arn in $(getGroups)
  113. do
  114. targets=$(getTargets ${arn})
  115. tcount=0
  116. for target in ${allTargets}
  117. do
  118. if [ "${allTargets/${target}/}" = "${allTargets}" ] ; then
  119. echo "Repair target ${target} missing from proxy"
  120. aws elbv2 register-targets --target-group-arn ${arn} --targets Id=${target}
  121. if [ $? != 0 ] ; then
  122. echo "Cannot register ${target} with load balancer ${AWS_PROXY}"
  123. continue
  124. fi
  125. else
  126. echo "Target ${target} is registered in proxy"
  127. tcount=$((tcount+1))
  128. fi
  129. done
  130. done
  131. echo "${NAME} has ${tcount} targets in target group"
  132. }
  133. pullImage() {
  134. local i
  135. i=0
  136. while [ $i -lt 5 ]
  137. do
  138. echo Pull image ${IMAGE_PATH}:${VERSION}
  139. output=$(docker pull ${IMAGE_PATH}:${VERSION})
  140. if [ $? = 0 ] ; then
  141. echo "Image pulled"
  142. echo ${output} | egrep -v 'Already exists|Pulling|Waiting|Verifying|Download complete|Pull complete|Digest:'
  143. return 0
  144. fi
  145. echo ${output}
  146. echo "Cannot pull image, retry in 5 seconds. (${i})"
  147. sleep 5
  148. i=$((i+1))
  149. done
  150. return 1
  151. }
  152. validate() {
  153. local host i
  154. host=$1
  155. echo "Validate application at http://${host}${AWS_VALIDATE}"
  156. i=0
  157. while [ $i -lt 5 ]
  158. do
  159. code=$(curl -s -o /dev/null --retry 10 --retry-delay 1 --retry-max-time 15 \
  160. -I -w "%{http_code}" http://${host}${AWS_VALIDATE})
  161. if [ "${code}" = 200 ] ; then
  162. echo "PASSED: Health check successful"
  163. return 0
  164. fi
  165. echo "Continue to wait for application, retry in 5 seconds. (${i})"
  166. sleep 5
  167. i=$((i+1))
  168. done
  169. echo "FAILED: Cannot validate application, status ${code}"
  170. return 1
  171. }
  172. waitForDrain() {
  173. local i state target
  174. target=$1
  175. echo "Wait for elb to drain target ${target}"
  176. i=0
  177. while [ $i -lt 30 ]
  178. do
  179. state=$(aws elbv2 describe-target-health --target-group-arn "${arn}" --output text --query "TargetHealthDescriptions[?Target.Id=='${target}'].TargetHealth.State")
  180. if [ $? != 0 ] ; then
  181. echo "Cannot get target health for ${target}"
  182. exit 255
  183. fi
  184. if [ "${state}" = "" ] ; then
  185. echo Target ${target} now removed from elb.
  186. return 0
  187. fi
  188. echo Waiting for ${target} to drain. State: ${state}.
  189. sleep 1
  190. i=$((i+1))
  191. done
  192. echo "FAILED: Cannot drain instance ${target} state ${state}"
  193. return 1
  194. }
  195. waitForReady() {
  196. local i state target
  197. target=$1
  198. echo "Wait for elb to enable target ${target}"
  199. i=0
  200. while [ $i -lt 30 ]
  201. do
  202. state=$(aws elbv2 describe-target-health --target-group-arn "${arn}" --output text --query "TargetHealthDescriptions[?Target.Id=='${target}'].TargetHealth.State")
  203. if [ $? != 0 ] ; then
  204. echo "Cannot get target health for ${target}"
  205. exit 255
  206. fi
  207. if [ "${state}" = "healthy" ] ; then
  208. echo Target ${target} now ${state}.
  209. return 0
  210. fi
  211. echo Waiting for ${target} to become healthy. State: ${state}.
  212. sleep 1
  213. i=$((i+1))
  214. done
  215. echo "FAILED: Instance not ready ${target} state ${state}"
  216. return 1
  217. }
  218. #
  219. # Currently 1-1 correspondence between target group and application
  220. #
  221. count=0
  222. passed=0
  223. checkTargets
  224. for arn in $(getGroups)
  225. do
  226. for target in $(getTargets ${arn})
  227. do
  228. fail=
  229. count=$((count+1))
  230. URI=$(dockerLogin)
  231. IMAGE_PATH=${URI}/${IMAGE}
  232. grantAccess ${REMOTE_SGROUP} ${DPORT}
  233. host=$(getHost ${target})
  234. export DOCKER_HOST=tcp://${host}:${DPORT}
  235. if ! pullImage ; then
  236. echo "Cannot pull ${IMAGE}:${VERSION} on ${target}"
  237. continue
  238. fi
  239. echo
  240. echo "----------------------------------------------------------------------------------"
  241. echo "Deploy to instance ${target} at ${host}"
  242. echo "----------------------------------------------------------------------------------"
  243. curret=$(docker ps --filter ancestor=${IMAGE}:${VERSION} --format '{{.ID}}')
  244. if [ $? != 0 ] ; then
  245. echo "Cannot talk to docker on ${target}"
  246. fail=1
  247. continue
  248. fi
  249. if [ "${current}" != "" ] ; then
  250. echo "Target ${target} already running version ${IMAGE}:${VERSION}"
  251. if [ "${FORCE}" = "" ] ; then
  252. passed=$((passed+1))
  253. continue
  254. fi
  255. fi
  256. #
  257. # Deregister. After deregister, we must ALWAYS reregister below.
  258. #
  259. echo "Deregister instance ${target} from load balancer"
  260. aws elbv2 deregister-targets --target-group-arn ${arn} --targets Id=${target}
  261. if [ $? != 0 ] ; then
  262. echo "Cannot deregister ${target} from target group ${AWS_GROUP}"
  263. fail=1
  264. # Keep going
  265. fi
  266. #
  267. # AWS seems to require at least a 10-15 second deregistration delay. It seems to deregister targets, but
  268. # continues to route requests to them for up to 15 seconds. Ugh!
  269. #
  270. delay=$(aws elbv2 describe-target-group-attributes --target-group-arn ${arn} \
  271. --output text --query "Attributes[?Key=='deregistration_delay.timeout_seconds'].Value")
  272. echo Waiting for the deregistration delay ${delay}
  273. # This seems to need to be >= the deregistration delay for the target group
  274. sleep ${delay}
  275. #
  276. # Gracefully stop containers
  277. #
  278. containers=$(docker ps --filter "name=${NAME}" --format '{{.ID}}')
  279. if [ "${containers}" != "" ] ; then
  280. echo "Gracefully stop traffic on ${NAME}"
  281. #
  282. # The ELB should have stopped sending requests by now.
  283. # The quit instructs the container to do what it can to gracefully clean up current requests.
  284. #
  285. startQuit=$(date +%s)
  286. echo "docker kill -s SIGQUIT ${NAME}"
  287. docker kill -s SIGQUIT ${NAME}
  288. #
  289. # Wait for instance to be fully removed from the elb
  290. #
  291. if ! waitForDrain "${target}" ; then
  292. echo "Cannot drain ${target}, force kill"
  293. # keep going - should not happen - best to upgrade
  294. fi
  295. #
  296. # Wait for the app drain timeout
  297. #
  298. period=$((10 - $(date +%s) + ${startQuit}))
  299. if [ "${period}" -gt 0 ] ; then
  300. echo sleep ${period}
  301. sleep ${period}
  302. fi
  303. echo "Stopping container ${NAME} ${containers}"
  304. docker stop ${NAME}
  305. if [ $? != 0 ] ; then
  306. echo "Cannot stop container ${container} on ${target}, continuing ..."
  307. # May not be running, continue
  308. fi
  309. fi
  310. #
  311. # Remove existing containers
  312. #
  313. echo "Remove container ${NAME} ${containers}"
  314. docker rm ${NAME} >/dev/null 2>&1
  315. #
  316. # Start new container
  317. #
  318. COMMAND=$(echo ${AWS_DEPLOY} | sed "s/-d/-d -e HOST=${host}/" | sed "s^${IMAGE}:${VERSION}^${URI}/${IMAGE}:${VERSION}^")
  319. echo "${COMMAND}"
  320. ${COMMAND}
  321. if [ $? != 0 ] ; then
  322. echo "Cannot start container ${IMAGE}:${VERSION} on ${target}"
  323. echo "WARNING: target ${target} is not registered with load balancer, skip further deployments."
  324. fail=1
  325. fi
  326. #
  327. # Register with load balancer
  328. #
  329. echo "Register instance ${target} with load balancer"
  330. aws elbv2 register-targets --target-group-arn ${arn} --targets Id=${target}
  331. if [ $? != 0 ] ; then
  332. echo "Cannot register ${target} with target group ${AWS_GROUP}"
  333. continue
  334. fi
  335. #
  336. # Validate
  337. #
  338. if [ "${fail}" = "" ] ; then
  339. echo -n "Started: "
  340. docker ps --filter "ancestor=${IMAGE}:${VERSION}" --format '{{.ID}}, {{.Image}}, {{.Status}}'
  341. if ! validate ${host} ; then
  342. echo "Could not validate target ${target}"
  343. break
  344. fi
  345. fi
  346. #
  347. # Wait for instance to be recognized by elb
  348. #
  349. if ! waitForReady "${target}" ; then
  350. echo "Target ${target} did not become ready. Halting deploy."
  351. break
  352. fi
  353. dockerLogout
  354. revokeAccess ${REMOTE_SGROUP} ${DPORT}
  355. passed=$((passed+1))
  356. done
  357. done
  358. if [ "${passed}" != "${count}" ] ; then
  359. echo "FAILED, upgraded ${passed} instances of ${count} with ${IMAGE}:${VERSION}"
  360. exit 1
  361. fi
  362. echo -e "\nPASSED, all ${count} instances running ${IMAGE}:${VERSION}"
  363. echo -e "\nRunning docker garbage collection"
  364. docker system prune -f >/dev/null
  365. DOCKER_HOST= docker system prune -f >/dev/null
  366. exit 0