Preface
The text and pictures in this article are from the Internet. They are for study and communication only. They do not have any commercial use. The copyright is owned by the original author. If you have any questions, please contact us in time for processing.
Author: Chiang Gou
When using multiple processes, place the method in main(), otherwise an exception warning will occur.
Process() Basic use: similar to Thread().
Pool() Basic use:
The map method, like the built-in map function, has multi-process support.
from multiprocessing import Pool
pool = Pool(2)
pool.map(fib, [35] * 2)
multiprocessing.dummy module:
multiprocessing.dummy replicates the API of multiprocessing but is no more than a wrapper around the threading module.
For some of the above knowledge points, not actually used, but simply understand and write Demo for practice, understanding is not very thorough.
# -*- coding: utf-8 -*-
from multiprocessing import Process, Pool
from multiprocessing.dummy import Pool as DummyPool
import time
import datetime
def log_time(methond_name):
def decorator(f):
def wrapper(*args, **kwargs):
start_time = time.time()
res = f(*args, **kwargs)
end_time = time.time()
print('%s cost %ss' % (methond_name, (end_time - start_time)))
return res
return wrapper
return decorator
def fib(n):
if n <=2 :
return 1
return fib(n-1) + fib(n-2)
@log_time('single_process')
def single_process():
fib(33)
fib(33)
@log_time('multi_process')
def multi_process():
jobs = []
for _ in range(2):
p = Process(target=fib, args=(33, ))
p.start()
jobs.append(p)
for j in jobs:
j.join()
@log_time('pool_process')
def pool_process():
pool = Pool(2)
pool.map(fib, [33]*2)
@log_time('dummy_pool')
def dummy_pool():
pool = DummyPool(2)
pool.map(fib, [33]*2)
if __name__ == '__main__':
single_process()
multi_process()
pool_process()
dummy_pool()
Pipe-based parmap
There is a slight difficulty in understanding.Note: If your Python foundation is not solid enough, you can Click me into the skirt See my latest introductory to the review of the actual combat tutorial
queue
Implement a production consumer model, one queue to store tasks, and one queue to store results.(
Queue is also available under the multiprocessing module, but task_done() and join() methods are not provided.So use Queue to store results, JoinableQueue() to store tasks.
Imitate Demo, a consumer process and a producer process:
# -*- coding: utf-8 -*-
from multiprocessing import Process, Queue, JoinableQueue
import time
import random
def double(n):
return n * 2
def producer(name, task_q):
while 1:
n = random.random()
if n > 0.8: # Jump out when greater than 0.8
task_q.put(None)
print('%s break.' % name)
break
print('%s produce %s.' % (name, n))
task_q.put((double, n))
def consumer(name, task_q, result_q):
while 1:
task = task_q.get()
if task is None:
print('%s break.' % name)
break
func, arg = task
res = func(arg)
time.sleep(0.5) # block
task_q.task_done()
result_q.put(res)
print('%s consume %s, result %s' % (name, arg, res))
def run():
task_q = JoinableQueue()
result_q = Queue()
processes = []
p1 = Process(name='p1', target=producer, args=('p1', task_q))
c1 = Process(name='c1', target=consumer, args=('c1', task_q, result_q))
p1.start()
c1.start()
processes.append(p1)
processes.append(c1)
# join() blocks the main process
for p in processes:
p.join()
# After the child process ends, the value in the output result
while 1:
if result_q.empty():
break
result = result_q.get()
print('result is: %s' % result)
if __name__ == '__main__':
run()
If there are multiple consumer() processes, only one consumer() process can take out None and break, and the others will hang all the time in task_q.get(), attempting to add a timeout exit to the consumer() method.
import queue
def consumer(name, task_q, result_q):
while 1:
try:
task = task_q.get(1) # 1s
except queue.Empty:
print('%s time out, break.' % name)
if task is None:
print('%s break.' % name)
break
func, arg = task
res = func(arg)
time.sleep(0.5) # block
task_q.task_done()
result_q.put(res)
print('%s consume %s, result %s' % (name, arg, res))
Shared memory
Share memory using Array, Value in sharedctypes.(
The following example is an example.
# -*- coding: utf-8 -*-
from pprint import pprint
# Shared memory
from multiprocessing import sharedctypes, Process, Lock
from ctypes import Structure, c_bool, c_double
pprint(sharedctypes.typecode_to_type)
lock = Lock()
class Point(Structure):
_fields_ = [('x', c_double), ('y', c_double)] # _fields_
def modify(n, b, s, arr, A):
n.value **= 2
b.value = True
s.value = s.value.upper()
arr[0] = 10
for a in A:
a.x **= 2
a.y **= 2
if __name__ == '__main__':
n = sharedctypes.Value('i', 7)
b = sharedctypes.Value(c_bool, False, lock=False)
s = sharedctypes.Array('c', b'hello world', lock=lock) # bytes
arr = sharedctypes.Array('i', range(5), lock=True)
A = sharedctypes.Array(Point, [(1.875, -6.25), (-5.75, 2.0)], lock=lock)
p = Process(target=modify, args=(n, b, s, arr, A))
p.start()
p.join()
print(n.value)
print(b.value)
print(s.value)
print(arr[:])
print([(a.x, a.y) for a in A])
Value is used in the actual project to monitor the task status of child processes, and memcached is used to store updates and deletes.
# -*- coding: utf-8 -*-
from multiprocessing import Process, Value
import time
import datetime
import random
FINISHED = 3
FAILED = 4
INPROCESS = 2
WAITING = 1
def execute_method(status, process):
time.sleep(1)
status.value = INPROCESS # test
time.sleep(1)
status.value = FINISHED # test
time.sleep(0.5)
def run(execute_code):
status = Value('i', WAITING )
process = Value('f', 0.0)
# mem_cache.set('%s_status' % execute_code, status.value, 0)
# mem_cache.set('%s_process' % execute_code, process .value, 0)
p = Process(target=execute_method, args=(status, process))
p.start()
start_time = datetime.datetime.now()
while True:
print(status.value)
now_time = datetime.datetime.now()
if (now_time - start_time).seconds > 30: # Over 30sbreak
# mem_cache.delete('%s_status' % execute_code)
# mem_cache.delete('%s_process' % execute_code)
print('execute failed')
p.terminate()
break
if status.value == 3:
# mem_cache.delete('%s_status' % execute_code)
# mem_cache.delete('%s_process' % execute_code)
print('end execute')
break
else:
# mem_cache.set('%s_status' % execute_code, status.value, 0)
# mem_cache.set('%s_process' % execute_code, process .value, 0)
print('waiting or executing')
time.sleep(0.5)
p.join()
Service Process
Following is a simple example of how Manager is commonly shared, following the example of the service process in the blog.
A multiprocessing.Manager object controls a server process, which can be accessed by other processes through a proxy.There are several common ways to share:
1. Namespace.Create a shared namespace.(
2. Value/Array.Same way as above to share ctypes objects.(
dict/list.Create a Sharable
3. dict/list, which supports methods for corresponding data structures.(
4. Condition/Event/Lock/Queue/Semaphore.Create a shared object corresponding to the synchronization primitive.
# -*- coding: utf-8 -*-
from multiprocessing import Manager, Process
def modify(ns, lproxy, dproxy):
ns.name = 'new_name'
lproxy.append('new_value')
dproxy['new'] = 'new_value'
def run():
# Data preparation
manager = Manager()
ns = manager.Namespace()
ns.name = 'origin_name'
lproxy = manager.list()
lproxy.append('origin_value')
dproxy = manager.dict()
dproxy['origin'] = 'origin_value'
# Subprocess
p = Process(target=modify, args=(ns, lproxy, dproxy))
p.start()
print(p.pid)
p.join()
print('ns.name: %s' % ns.name)
print('lproxy: %s' % lproxy)
print('dproxy: %s' % dproxy)
if __name__ == '__main__':
run()
The example above mainly shows the shared object types and proxies in Manager, and the source code is viewed through the register() method.
multiprocessing/managers.py:
#
# Definition of SyncManager
#
class SyncManager(BaseManager):
'''
Subclass of `BaseManager` which supports a number of shared object types.
The types registered are those intended for the synchronization
of threads, plus `dict`, `list` and `Namespace`.
The `multiprocessing.Manager()` function creates started instances of
this class.
'''
SyncManager.register('Queue', queue.Queue)
SyncManager.register('JoinableQueue', queue.Queue)
SyncManager.register('Event', threading.Event, EventProxy)
SyncManager.register('Lock', threading.Lock, AcquirerProxy)
SyncManager.register('RLock', threading.RLock, AcquirerProxy)
SyncManager.register('Semaphore', threading.Semaphore, AcquirerProxy)
SyncManager.register('BoundedSemaphore', threading.BoundedSemaphore,
AcquirerProxy)
SyncManager.register('Condition', threading.Condition, ConditionProxy)
SyncManager.register('Barrier', threading.Barrier, BarrierProxy)
SyncManager.register('Pool', pool.Pool, PoolProxy)
SyncManager.register('list', list, ListProxy)
SyncManager.register('dict', dict, DictProxy)
SyncManager.register('Value', Value, ValueProxy)
SyncManager.register('Array', Array, ArrayProxy)
SyncManager.register('Namespace', Namespace, NamespaceProxy)
# types returned by methods of PoolProxy
SyncManager.register('Iterator', proxytype=IteratorProxy, create_method=False)
SyncManager.register('AsyncResult', create_method=False)
In addition to sub-processes, Manager() can also be used to communicate between different processes, such as the following distributed processes.
Distribution process
The main difference from the previous example is that communication occurs between non-subprocesses.
manager_server.py:
# -*- coding: utf-8 -*-
from multiprocessing.managers import BaseManager
host = '127.0.0.1'
port = 8080
authkey = b'python'
shared_list = []
class ServerManager(BaseManager):
pass
ServerManager.register('get_list', callable=lambda: shared_list)
server_manager = ServerManager(address=(host, port), authkey=authkey)
server = server_manager.get_server()
server.serve_forever()
manager_client.py
# -*- coding: utf-8 -*-
from multiprocessing.managers import BaseManager
host = '127.0.0.1'
port = 8080
authkey = b'python'
class ClientManager(BaseManager):
pass
ClientManager.register('get_list')
client_manager = ClientManager(address=(host, port), authkey=authkey)
client_manager.connect()
l = client_manager.get_list()
print(l)
l.append('new_value')
print(l)
After running several times, new_value is added to the shared_list.
Modify it appropriately following the distributed process in Liao Xuefeng's tutorial.
manager_server.py:
# -*- coding: utf-8 -*-
from multiprocessing.managers import BaseManager
from multiprocessing import Condition, Value
import queue
host = '127.0.0.1'
port = 8080
authkey = b'python'
task_q = queue.Queue(10)
result_q = queue.Queue(20)
cond = Condition()
done = Value('i', 0)
def double(n):
return n * 2
class ServerManager(BaseManager):
pass
ServerManager.register('get_task_queue', callable=lambda: task_q)
ServerManager.register('get_result_queue', callable=lambda: result_q)
ServerManager.register('get_cond', callable=lambda: cond)
ServerManager.register('get_done', callable=lambda: done)
ServerManager.register('get_double', callable=double)
server_manager = ServerManager(address=(host, port), authkey=authkey)
server = server_manager.get_server()
print('start server')
server.serve_forever(
manager_producer.py:
# -*- coding: utf-8 -*-
from multiprocessing.managers import BaseManager
import random
import time
host = '127.0.0.1'
port = 8080
authkey = b'python'
class ProducerManager(BaseManager):
pass
ProducerManager.register('get_task_queue')
ProducerManager.register('get_cond')
ProducerManager.register('get_done')
producer_manager = ProducerManager(address=(host, port), authkey=authkey)
producer_manager.connect()
task_q = producer_manager.get_task_queue()
cond = producer_manager.get_cond()
# done = producer_manager.get_done()
count = 20 # Up to 20 tasks
while count > 0:
if cond.acquire():
if not task_q.full():
n = random.randint(0, 10)
task_q.put(n)
print("Producer:deliver one, now tasks:%s" % task_q.qsize())
cond.notify()
count -= 1
time.sleep(0.5)
else:
print("Producer:already full, stop deliver, now tasks:%s" % task_q.qsize())
cond.wait()
cond.release()
# done.value = 1
print('Producer break')
manager_consumer.py:
# -*- coding: utf-8 -*-
from multiprocessing.managers import BaseManager
host = '127.0.0.1'
port = 8080
authkey = b'python'
class ConsumerManager(BaseManager):
pass
ConsumerManager.register('get_task_queue')
ConsumerManager.register('get_result_queue')
ConsumerManager.register('get_cond')
# ConsumerManager.register('get_done')
ConsumerManager.register('get_double')
consumer_manager = ConsumerManager(address=(host, port), authkey=authkey)
consumer_manager.connect()
task_q = consumer_manager.get_task_queue()
result_q = consumer_manager.get_result_queue()
cond = consumer_manager.get_cond()
# done = consumer_manager.get_done()
while 1:
if result_q.full():
print('result queue is full')
break
if cond.acquire():
if not task_q.empty():
arg = task_q.get()
res = consumer_manager.get_double(arg)
print("Consumer:consume one, now tasks:%s" % task_q.qsize())
result_q.put(res)
cond.notify()
else:
print("Consumer:only 0, stop consume, products")
cond.wait()
cond.release()
while 1:
if result_q.empty():
break
result = result_q.get()
print('result is: %s' % result)