# ウェブスクレイピング # python + selenimu + webDriver + BeautifulSoup # #python 3.8+ # #conda install -c conda-forge selenium==4.15.1 # → https://developer.microsoft.com/en-us/microsoft-edge/tools/webdriver/ #conda install -c anaconda beautifulsoup4==4.9.1 import sys import time import datetime import traceback from selenium import webdriver from selenium.webdriver.common.by import By from bs4 import BeautifulSoup import requests as rq import ssl, urllib3 ################################################## class CustomHttpAdapter (rq.adapters.HTTPAdapter): def __init__(self, ssl_context=None, **kwargs): self.ssl_context = ssl_context super().__init__(**kwargs) def init_poolmanager(self, connections, maxsize, block=False): self.poolmanager = urllib3.poolmanager.PoolManager( num_pools=connections ,maxsize=maxsize ,block=block ,ssl_context=self.ssl_context) ################################################## def login_post(): # open web browser print('open web browser') options = webdriver.EdgeOptions() options.add_argument("headless") options.add_argument('log-level=3') # INFO = 0, WARNING = 1, LOG_ERROR = 2, LOG_FATAL = 3. service = webdriver.EdgeService(executable_path='./msedgedriver.exe', service_args=['--log-level=SEVERE']) driver = webdriver.Edge(service=service, options = options) driver.set_window_size('1200', '1000') # login print ('login') driver.get('url') time.sleep(1) print (driver.current_url) driver.find_element(By.ID, 'user-name').send_keys('id') driver.find_element(By.ID, 'password').send_keys('pass') driver.find_element(By.ID, 'login').click() time.sleep(2) with rq.Session() as s: ctx = ssl.create_default_context(ssl.Purpose.SERVER_AUTH) ctx.options |= 0x4 s.mount('https://', CustomHttpAdapter(ctx)) for cookies in driver.get_cookies(): # cookiejar s.cookies.set(cookies["name"], cookies["value"], **{"domain":cookies["domain"] ,"path":cookies["path"]}) st = s.post('url',data=data) print (st.text) # close web browser print ('close web browser') driver.close() driver.quit()
2023年11月21日火曜日
ウェブスクレイピング (selenium - requests)
登録:
コメントの投稿 (Atom)
0 件のコメント:
コメントを投稿