爬虫 Chrome 驱动镜像制作

爬虫 Chrome 驱动镜像制作,支持前端动态数据渲染的页面解析。

Tomcat

$ docker pull tomcat:9.0.37-jdk8-openjdk
$ docker run --name tomcat -p 8080:8080 -e TZ=Asia/Shanghai -d tomcat:9.0.37-jdk8-openjdk
$ docker exec -it tomcat bash

安装Chrome

# 中科大镜像源(如遇失败,请重启容器)
# https://mirrors.ustc.edu.cn/help/index.html
root@d5209675ae2b:/usr/local/tomcat# cat /etc/os-release
cp -a /etc/apt/sources.list /etc/apt/sources.list.bak
sed -i 's/deb.debian.org/mirrors.ustc.edu.cn/g' /etc/apt/sources.list
sed -i 's|security.debian.org/debian-security|mirrors.ustc.edu.cn/debian-security|g' /etc/apt/sources.list
apt-get update

root@d5209675ae2b:/usr/local/tomcat# wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
root@d5209675ae2b:/usr/local/tomcat# apt install ./google-chrome-stable_current_amd64.deb
root@d5209675ae2b:/usr/local/tomcat# google-chrome --version

安装ChromeDriver

# 注意:chromedriver版本要和chrome对应
# https://npm.taobao.org/mirrors/chromedriver/
# https://repo.huaweicloud.com/chromedriver/
root@d5209675ae2b:/usr/local/tomcat# mkdir -p /opt/chrome
root@d5209675ae2b:/usr/local/tomcat# wget https://npm.taobao.org/mirrors/chromedriver/105.0.5195.52/chromedriver_linux64.zip -P /opt/chrome
root@d5209675ae2b:/usr/local/tomcat# cd /opt/chrome
root@d5209675ae2b:/opt/chrome# unzip chromedriver_linux64.zip

安装中文字体

root@d5209675ae2b:/opt/chrome# apt-get install fontconfig
root@d5209675ae2b:/opt/chrome# fc-list :lang=zh
root@d5209675ae2b:/opt/chrome# mkdir /usr/share/fonts/myfonts

# 在电脑上找到微软雅黑字体文件(msyh.ttf、msyhbd.ttf)
root@d5209675ae2b:/opt/chrome# exit
	$ docker cp /home/renlm/msyh.ttf tomcat:/usr/share/fonts/myfonts/
	$ docker cp /home/renlm/msyhbd.ttf tomcat:/usr/share/fonts/myfonts/
	$ docker exec -it tomcat bash
root@d5209675ae2b:/opt/chrome# fc-cache -fv
root@d5209675ae2b:/opt/chrome# fc-list :lang=zh

保存镜像

$ docker login --username=renlm@21cn.com registry.cn-hangzhou.aliyuncs.com
	# https://cr.console.aliyun.com/cn-hangzhou/instances/mirrors
	# docker commit -a "作者" -m "描述信息" 容器id 目标镜像名:[镜像版本号]
	# docker tag [ImageId] registry.cn-hangzhou.aliyuncs.com/rlm/tomcat:[镜像版本号]
	# docker push registry.cn-hangzhou.aliyuncs.com/rlm/tomcat:[镜像版本号]
$ docker commit -a "renlm.cn" -m "tomcat" c4f39c888973 rlm/tomcat
$ docker tag 132986775bb9 registry.cn-hangzhou.aliyuncs.com/rlm/tomcat:latest
$ docker push registry.cn-hangzhou.aliyuncs.com/rlm/tomcat:latest