from blog_spider import craw, parse, urls from concurrent.futures import ThreadPoolExecutor import concurrent import time
start = time.time() with ThreadPoolExecutor(max_workers=5) as executer: htmls = executer.map(craw, urls) # map 方法 url_html_maps = list(zip(urls, htmls)) for url, html in url_html_maps: print(url) print(len(html)) end = time.time() print("多线程爬虫耗时:%s " % (end - start))
with ThreadPoolExecutor(max_workers=5) as executer: fs = {} for url, html in url_html_maps: future = executer.submit(parse, html) fs[future] = url for future in concurrent.futures.as_completed(fs): # as_completed的作用是当fs中有任何一个future完成的时候会先返回,而不是顺序等待 # https://blog.csdn.net/panguangyuu/article/details/105335900 url = fs[future] print(url, future.result())
这里做一个简单的示例 ```python from blog_spider import craw, parse, urls from concurrent.futures import ThreadPoolExecutor import concurrent import time
defnotify(): """ 模拟一个消息通知函数 """ pass
with ThreadPoolExecutor(max_workers=5) as executer: fs = {} for url, html in url_html_maps: future = executer.submit(parse, html) fs[future] = url for future in concurrent.futures.as_completed(fs): future.add_done_callback(notify)
""" 计算一个大数是不是一个素数, 这是一个CPU消耗型的代码,更适合多进程, 这段代码会对比单线程、多线程和多进程的性能区别 """ """ 计算一个大数是不是一个素数, 这是一个CPU消耗型的代码,更适合多进程, 这段代码会对比单线程、多线程和多进程的性能区别 """ import math from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ProcessPoolExecutor from utils.function_timer import func_timer
defis_prime(n): """ 判断一个数是不是素数, n 要能走完所有的逻辑,这样才能消耗大量的CPU, 如果从中间某一步就结束的话,后面三中情况的对比结果可能就不是预期的那样 """ if n < 2: returnFalse if n == 2: returnTrue if n % 2 == 0: returnFalse sqrt_n = int(math.floor(math.sqrt(n))) for i inrange(3, sqrt_n + 1, 2): if n % i == 0: returnFalse returnTrue
@func_timer defsingle_thread(numbers): for number in numbers: is_prime(number)
@func_timer defmulti_thread(numbers): with ThreadPoolExecutor(max_workers=10) as executer: executer.map(is_prime, numbers)
@func_timer defmulti_process(numbers): with ProcessPoolExecutor(max_workers=10) as executer: executer.map(is_prime, numbers)