Docker CI: 解决 Tomcat 服务宕机方案 - 健康检查
一、概述
基于 Docker 集成 CI 环境。涉及技术:Linux(Ubuntu 14.04), Docker, Jenkins, Git/Gitlab, Web/Httpbin, Python/Pytest, UI/Selenium, Robotframework, Grid Server, Appium 等。
架构图如下:
二、Docker 平台(Centos7):基于 Dockerfile 构建 Tomcat 新镜像
问题:Tomcat 服务宕机
方案:保持服务器正常提供服务,需要两个条件都有满足。即,服务自动重启和数据不丢失。因为/etc/hosts文件在重启之后还原,所以使用Shell控制。
# cd /vm/docker/tomcat
# docker build -t healthcheck/tomcat .
dockerfile
HEALTHCHECK:健康检测--interval:检查间隔(default 30s)--timeout:超时(default 30s)--retries:次数(default 3)
FROM tomcat
MAINTAINER Allan <[email protected]>
ENV DEBIAN_FRONTEND noninteractive
# 健康检测
HEALTHCHECK --interval=5s --timeout=3s CMD curl --fail http://localhost:8080/ || exit
EXPOSE 8080
三、查看并运行 Tomcat 镜像
确定 STATUS 是 healthy 状态。--name:容器名--restart:自动启动--shm-size:内存大小-d:daemon 守护进程-p:publlish 端口,8080是 tomcat 服务端口
# docker images
# docker run --name website --shm-size=2g -d -p 8080:8080 healthcheck/tomcat
# docker ps
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
8110cd610347 hc/tomcat "catalina.sh run" 21 minutes ago Up About a minute (healthy) 0.0.0.0:8080->8080/tcp rd
四、Shell 监测 tomcat 服务
- 安装 jq(处理 Linux 平台下 json 数据)
# wget http://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
# rpm -ivh epel-release-latest-7.noarch.rpm
# yum repolist
# yum install jq
# 服务是否运行
# docker inspect ${CONTAINER} | jq ".[].State.Running"
true
# 服务是否健康
# docker inspect website | jq ".[].State.Health.Status"
"healthy"
- Shell 脚本
#!/bin/bash
CONTAINER=$1
STATE=0
while true
do
echo "Check container if it's running"
STATUS=`docker inspect ${CONTAINER} | jq ".[].State.Running"`
if [ ${STATUS} != 'true' ]
then
echo `date +"%Y-%m-%d %H:%M:%S"` The ${CONTAINER} is down
docker stop ${CONTAINER}
docker rm ${CONTAINER}
docker run -d -p 8080:8080 --name ${CONTAINER} hc/tomcat
sleep 10
docker cp /usr/setup/hosts ${CONTAINER}:/etc/hosts
echo `date +"%Y-%m-%d %H:%M:%S"` Container is running...
break
fi
echo "Check container if it's healthy"
HEALTH_STATUS=`docker inspect ${CONTAINER} | jq ".[].State.Health.Status"`
if [ ${HEALTH_STATUS} != '"healthy"' ]
then
let STATE=STATE+1
echo `date +"%Y-%m-%d %H:%M:%S"` The ${STATE} time can not access
else
echo `date +"%Y-%m-%d %H:%M:%S"` Container is ${HEALTH_STATUS}
break
fi
if [ ${STATE} -gt 2 ]
echo "Have retried 3 times, it's still unhealthy. then stop, rm & start it agin."
then
echo Can not access ${CONTAINER}, service will reboot!
docker stop ${CONTAINER}
docker rm ${CONTAINER}
docker run -d -p 8080:8080 --name ${CONTAINER} hc/tomcat
sleep 10
docker cp /usr/setup/hosts ${CONTAINER}:/etc/hosts
echo `date +"%Y-%m-%d %H:%M:%S"` Container is running...
break
fi
sleep 10
done
五、调试
# docker stop website
# ./health_check.sh website
# docker exec -it website /bin/bash -c “rm -rf /usr/local/tomcat/webapps/”
# ./health_check.sh website
六、contab 定时执行任务
> crontab -e
*/1 * * * * /root/tomcat/health_check.sh website >> /tmp/logs/website/health_check.log