Voici une question similaire Pourquoi le script de multiprocessing Python ralentit-il après un certain temps?
Exemple de code utilisant Pool:
from multiprocessing import Pool
Pool(processes=6).map(some_func, array)
Après quelques itérations, le programme ralentit et finit par devenir encore plus lent qu'avec le multiprocessing. Peut-être que le problème vient de la fonction liée à Selenium? Voici le code complet:
# bibliothèques
import os
from time import sleep
from bs4 import BeautifulSoup
from selenium import webdriver
from multiprocessing import Pool
#
url = "https://eldorado.ua/"
directory = os.path.dirname(os.path.realpath(__file__))
env_path = directory + "\chromedriver"
chromedriver_path = env_path + "\chromedriver.exe"
dict1 = {" ": "https://eldorado.ua/node/c1038944/",
" ": "https://eldorado.ua/node/c1038957/",
", ": "https://eldorado.ua/node/c1038958/",
" ": "https://eldorado.ua/node/c1088594/",
" ": "https://eldorado.ua/node/c1088603/",
" ": "https://eldorado.ua/node/c1285101/",
" ": "https://eldorado.ua/node/c1215257/",
"": "https://eldorado.ua/node/c1039055/",
" ": "https://eldorado.ua/node/c1038960/",
" ": "https://eldorado.ua/node/c1178596/",
" ": "https://eldorado.ua/node/c1284654/",
" ": "https://eldorado.ua/node/c1218544/",
" ": "https://eldorado.ua/node/c1285161/",
" ": "https://eldorado.ua/node/c1085100/"}
def openChrome_headless(url1, name):
options = webdriver.ChromeOptions()
options.headless = True
options.add_experimental_option("excludeSwitches", ['enable-automation'])
options.add_argument(
'--user-agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36"')
driver = webdriver.Chrome(executable_path=chromedriver_path, options=options)
driver.get(url=url1)
sleep(1)
try:
with open(name + ".html", "w", encoding="utf-8") as file:
file.write(driver.page_source)
except Exception as ex:
print(ex)
finally:
driver.close()
driver.quit()
def processing_goods_pages(name):
for n in os.listdir(f"brand_pages\\{name}"):
with open(f"{directory}\\brand_pages\\{name}\\{n}", encoding="utf-8") as file:
soup = BeautifulSoup(file.read(), "lxml")
if not os.path.exists(f"{directory}\\goods_pages\\{name}\\{n[:-5]}"):
if not os.path.exists(f"{directory}\\goods_pages\\{name}"):
os.mkdir(f"{directory}\\goods_pages\\{name}")
os.mkdir(f"{directory}\\goods_pages\\{name}\\{n[:-5]}")
links = soup.find_all("header", class_="good-description")
for li in links:
ref = url + li.find('a').get('href')
print(li.text)
openChrome_headless(ref, f"{directory}\\goods_pages\\{name}\\{n[:-5]}\\{li.text}")
if __name__ == "__main__":
ar2 = []
for k, v in dict1.items():
ar2.append(k)
Pool(processes=6).map(processing_goods_pages, ar2)