下面是scrapy中调用chrome浏览器的middleware,可以设置是否更换头,是否无界面,是否加载css与图片
from selenium import webdriver
class HandlessMiddleware(object):
def __init__(self):
super(HandlessMiddleware, self).__init__()
option = webdriver.ChromeOptions()
# self.ua = UserAgent() # 设置更换头
option.add_argument('--disable-gpu')
option.add_argument('lang=zh_CN.UTF-8')
# option.add_argument(
# 'user-agent=' + self.ua.random)
# option.add_argument('headless') # 设置无界面
prefs = {
"profile.managed_default_content_settings.images": 2, # 禁止加载图片
# 'permissions.default.stylesheet': 2, # 禁止加载css
}
option.add_experimental_option("prefs", prefs)
self.browser = webdriver.Chrome(chrome_options=option)
self.browser.implicitly_wait(10)
self.browser.execute_script('window.open("","_blank");')
def process_request(self, request, spider):
self.browser.get(request.url)
return HtmlResponse(url=self.browser.current_url, body=self.browser.page_source, encoding="utf-8",
request=request)