之前写过一篇文章(Python Selenium 网页长截图/HTML 转 PDF)网页截图转PDF,但以图片形式保存的pdf在阅读时不太友好。网上搜索文章时,发现可以直接调用网页打印机另存为PDF,下面为核心代码,测试浏览器Chrome。
import time,json
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
path = r'C:\Users\XXXX\Desktop' # 修改为网页转PDF后要保存的路径
#%% chrome 选项,调用浏览器打印机,另存为pdf
chrome_options = webdriver.ChromeOptions()
settings = {"recentDestinations": [{"id": "Save as PDF",
"origin": "local",
"account": ""
}],
"selectedDestinationId": "Save as PDF",
"version": 2,
"isHeaderFooterEnabled": True,
# "customMargins": {},
# "marginsType": 2,
# "scaling": 100, # 缩放
# "scalingType": 3,
# "scalingTypePdf": 3,
"isLandscapeEnabled":False, #landscape横向,portrait 纵向,若不设置该参数,默认纵向
"isCssBackgroundEnabled": True,
"mediaSize": {"height_microns": 297000,
"name": "ISO_A4",
"width_microns": 210000,
"custom_display_name": "A4 210 x 297 mm"
},
}
# chrome_options.add_argument('--headless') #headless模式下,浏览器窗口不可见,可提高效率
chrome_options.add_argument('--enable-print-browser')
chrome_options.add_argument('--kiosk-printing') #静默打印,无需用户点击打印页面的确定按钮
prefs = {'printing.print_preview_sticky_settings.appState': json.dumps(settings),
'savefile.default_directory': path #此处填写文件保存的路径
}
chrome_options.add_experimental_option('prefs', prefs)
#%% 打印,编写打印函数
def web_print_save_pdf(url, filename):
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)
driver.get(url)
driver.maximize_window()
time.sleep(3)
driver.execute_script('document.title="{}";window.print();'.format(filename)) #利用js修改网页的title,该title最终就是PDF文件名,利用js的window.print可以快速调出浏览器打印窗口,避免使用热键ctrl+P
driver.close()
#%% 使用
url = "https://python3-cookbook.readthedocs.io/zh-cn/latest/"
filename = 'Python Cookbook 3rd Edition Documentation.pdf'
web_print_save_pdf(url, filename)
参考文章:
python之批量打印网页为pdf文件
TypeError: WebDriver.init() got multiple values for argument ‘options‘_got multiple values for argument 'options
TypeError: WebDriver.init() got multiple values for argument ‘options‘_got multiple values for argument 'options
版权归原作者 夏日麦香 所有, 如有侵权,请联系我们删除。