60% of people don't know the Python Process, do you know?

Keywords: Python Lambda

 

Preface
The text and pictures in this article are from the Internet. They are for study and communication only. They do not have any commercial use. The copyright is owned by the original author. If you have any questions, please contact us in time for processing.
Author: Chiang Gou 

 
Beginner Note: If your Python foundation is not solid enough, you can Click me into the skirt See my latest entry to the actual combat tutorial and review it again

Basic Use

When using multiple processes, place the method in main(), otherwise an exception warning will occur.

Process() Basic use: similar to Thread().

Pool() Basic use:

The map method, like the built-in map function, has multi-process support.

from multiprocessing import Pool
pool = Pool(2)
pool.map(fib, [35] * 2)

multiprocessing.dummy module:

multiprocessing.dummy replicates the API of multiprocessing but is no more than a wrapper around the threading module.

For some of the above knowledge points, not actually used, but simply understand and write Demo for practice, understanding is not very thorough.

# -*- coding: utf-8 -*-
from multiprocessing import Process, Pool
from multiprocessing.dummy import Pool as DummyPool
import time
import datetime

def log_time(methond_name):
    def decorator(f):
        def wrapper(*args, **kwargs):
            start_time = time.time()
            res = f(*args, **kwargs)
            end_time = time.time()
            print('%s cost %ss' % (methond_name, (end_time - start_time)))
            return res
        return wrapper
    return decorator

def fib(n):
    if n <=2 :
        return 1
    return fib(n-1) + fib(n-2)

@log_time('single_process')
def single_process():
    fib(33)
    fib(33)

@log_time('multi_process')
def multi_process():
    jobs = []
    for _ in range(2):
        p = Process(target=fib, args=(33, ))
        p.start()
        jobs.append(p)
    for j in jobs:
        j.join()


@log_time('pool_process')
def pool_process():
    pool = Pool(2)
    pool.map(fib, [33]*2)


@log_time('dummy_pool')
def dummy_pool():
    pool = DummyPool(2)
    pool.map(fib, [33]*2)


if __name__ == '__main__':
    single_process()
    multi_process()
    pool_process()
    dummy_pool()

 

Pipe-based parmap

There is a slight difficulty in understanding.Note: If your Python foundation is not solid enough, you can Click me into the skirt See my latest introductory to the review of the actual combat tutorial

queue

Implement a production consumer model, one queue to store tasks, and one queue to store results.(
Queue is also available under the multiprocessing module, but task_done() and join() methods are not provided.So use Queue to store results, JoinableQueue() to store tasks.

Imitate Demo, a consumer process and a producer process:

# -*- coding: utf-8 -*-
from multiprocessing import Process, Queue, JoinableQueue
import time
import random

def double(n):
    return n * 2 

def producer(name, task_q):
    while 1:
        n = random.random()
        if n > 0.8:  # Jump out when greater than 0.8
            task_q.put(None)
            print('%s break.' % name)
            break
        print('%s produce %s.' % (name, n))
        task_q.put((double, n))


def consumer(name, task_q, result_q):
    while 1:
        task = task_q.get()
        if task is None:
            print('%s break.' % name)
            break
        func, arg = task
        res = func(arg)
        time.sleep(0.5)  # block
        task_q.task_done()
        result_q.put(res)
        print('%s consume %s, result %s' % (name, arg, res))

def run():
    task_q = JoinableQueue()
    result_q = Queue()
    processes = []
    p1 = Process(name='p1', target=producer, args=('p1', task_q))
    c1 = Process(name='c1', target=consumer, args=('c1', task_q, result_q))
    p1.start()
    c1.start()
    processes.append(p1)
    processes.append(c1)

    # join() blocks the main process
    for p in processes:
        p.join()

    # After the child process ends, the value in the output result
    while 1:
        if result_q.empty():
            break
        result = result_q.get()
        print('result is: %s' % result)

if __name__ == '__main__':
    run()

If there are multiple consumer() processes, only one consumer() process can take out None and break, and the others will hang all the time in task_q.get(), attempting to add a timeout exit to the consumer() method.

import queue

def consumer(name, task_q, result_q):
    while 1:
        try:
            task = task_q.get(1)  # 1s
        except queue.Empty:
            print('%s time out, break.' % name)
        if task is None:
            print('%s break.' % name)
            break
        func, arg = task
        res = func(arg)
        time.sleep(0.5)  # block
        task_q.task_done()
        result_q.put(res)
        print('%s consume %s, result %s' % (name, arg, res))

Shared memory

Share memory using Array, Value in sharedctypes.(
The following example is an example.

# -*- coding: utf-8 -*-

from pprint import pprint

# Shared memory
from multiprocessing import sharedctypes, Process, Lock
from ctypes import Structure, c_bool, c_double

pprint(sharedctypes.typecode_to_type)

lock = Lock()


class Point(Structure):
    _fields_ = [('x', c_double), ('y', c_double)]  # _fields_


def modify(n, b, s, arr, A):
    n.value **= 2
    b.value = True
    s.value = s.value.upper()
    arr[0] = 10
    for a in A:
        a.x **= 2
        a.y **= 2

if __name__ == '__main__':

    n = sharedctypes.Value('i', 7)
    b = sharedctypes.Value(c_bool, False, lock=False)
    s = sharedctypes.Array('c', b'hello world', lock=lock)  # bytes
    arr = sharedctypes.Array('i', range(5), lock=True)
    A = sharedctypes.Array(Point, [(1.875, -6.25), (-5.75, 2.0)], lock=lock)
    p = Process(target=modify, args=(n, b, s, arr, A))
    p.start()
    p.join()
    print(n.value)
    print(b.value)
    print(s.value)
    print(arr[:])
    print([(a.x, a.y) for a in A])

Value is used in the actual project to monitor the task status of child processes, and memcached is used to store updates and deletes.

# -*- coding: utf-8 -*-

from multiprocessing import Process, Value
import time
import datetime
import random


FINISHED = 3
FAILED = 4
INPROCESS = 2
WAITING = 1

def execute_method(status, process):
    time.sleep(1)
    status.value = INPROCESS  # test
    time.sleep(1)
    status.value = FINISHED  # test
    time.sleep(0.5)

def run(execute_code):
    status = Value('i', WAITING )
    process = Value('f', 0.0)
    # mem_cache.set('%s_status' % execute_code, status.value, 0)
    # mem_cache.set('%s_process' % execute_code, process .value, 0)
    p = Process(target=execute_method, args=(status, process))
    p.start()
    start_time = datetime.datetime.now()
    while True:
        print(status.value)
        now_time = datetime.datetime.now()
        if (now_time - start_time).seconds > 30:  # Over 30sbreak
            # mem_cache.delete('%s_status' % execute_code)
            # mem_cache.delete('%s_process' % execute_code)
            print('execute failed')
            p.terminate()
            break
        if status.value == 3:
            # mem_cache.delete('%s_status' % execute_code)
            # mem_cache.delete('%s_process' % execute_code)
            print('end execute')
            break
        else:
            # mem_cache.set('%s_status' % execute_code, status.value, 0)
            # mem_cache.set('%s_process' % execute_code, process .value, 0)
            print('waiting or executing')
        time.sleep(0.5)
    p.join()

Service Process

Following is a simple example of how Manager is commonly shared, following the example of the service process in the blog.

A multiprocessing.Manager object controls a server process, which can be accessed by other processes through a proxy.There are several common ways to share:
1. Namespace.Create a shared namespace.(
2. Value/Array.Same way as above to share ctypes objects.(
dict/list.Create a Sharable
3. dict/list, which supports methods for corresponding data structures.(
4. Condition/Event/Lock/Queue/Semaphore.Create a shared object corresponding to the synchronization primitive.

# -*- coding: utf-8 -*-
from multiprocessing import Manager, Process

def modify(ns, lproxy, dproxy):
    ns.name = 'new_name'
    lproxy.append('new_value')
    dproxy['new'] = 'new_value'

def run():
    # Data preparation
    manager = Manager()
    ns = manager.Namespace()
    ns.name = 'origin_name'
    lproxy = manager.list()
    lproxy.append('origin_value')
    dproxy = manager.dict()
    dproxy['origin'] = 'origin_value'

    # Subprocess
    p = Process(target=modify, args=(ns, lproxy, dproxy))
    p.start()
    print(p.pid)
    p.join()

    print('ns.name: %s' % ns.name)
    print('lproxy: %s' % lproxy)
    print('dproxy: %s' % dproxy)

if __name__ == '__main__':
    run()

The example above mainly shows the shared object types and proxies in Manager, and the source code is viewed through the register() method.

multiprocessing/managers.py:

#
# Definition of SyncManager
#

class SyncManager(BaseManager):
    '''
    Subclass of `BaseManager` which supports a number of shared object types.

    The types registered are those intended for the synchronization
    of threads, plus `dict`, `list` and `Namespace`.

    The `multiprocessing.Manager()` function creates started instances of
    this class.
    '''

SyncManager.register('Queue', queue.Queue)
SyncManager.register('JoinableQueue', queue.Queue)
SyncManager.register('Event', threading.Event, EventProxy)
SyncManager.register('Lock', threading.Lock, AcquirerProxy)
SyncManager.register('RLock', threading.RLock, AcquirerProxy)
SyncManager.register('Semaphore', threading.Semaphore, AcquirerProxy)
SyncManager.register('BoundedSemaphore', threading.BoundedSemaphore,
                     AcquirerProxy)
SyncManager.register('Condition', threading.Condition, ConditionProxy)
SyncManager.register('Barrier', threading.Barrier, BarrierProxy)
SyncManager.register('Pool', pool.Pool, PoolProxy)
SyncManager.register('list', list, ListProxy)
SyncManager.register('dict', dict, DictProxy)
SyncManager.register('Value', Value, ValueProxy)
SyncManager.register('Array', Array, ArrayProxy)
SyncManager.register('Namespace', Namespace, NamespaceProxy)

# types returned by methods of PoolProxy
SyncManager.register('Iterator', proxytype=IteratorProxy, create_method=False)
SyncManager.register('AsyncResult', create_method=False)

In addition to sub-processes, Manager() can also be used to communicate between different processes, such as the following distributed processes.

Distribution process

The main difference from the previous example is that communication occurs between non-subprocesses.

manager_server.py:

# -*- coding: utf-8 -*-

from multiprocessing.managers import BaseManager

host = '127.0.0.1'
port = 8080
authkey = b'python'

shared_list = []

class ServerManager(BaseManager):
    pass

ServerManager.register('get_list', callable=lambda: shared_list)
server_manager = ServerManager(address=(host, port), authkey=authkey)
server = server_manager.get_server()
server.serve_forever()

manager_client.py

# -*- coding: utf-8 -*-

from multiprocessing.managers import BaseManager

host = '127.0.0.1'
port = 8080
authkey = b'python'

class ClientManager(BaseManager):
    pass

ClientManager.register('get_list')
client_manager = ClientManager(address=(host, port), authkey=authkey)
client_manager.connect()

l = client_manager.get_list()
print(l)

l.append('new_value')
print(l)

After running several times, new_value is added to the shared_list.

Modify it appropriately following the distributed process in Liao Xuefeng's tutorial.

manager_server.py:

# -*- coding: utf-8 -*-

from multiprocessing.managers import BaseManager
from multiprocessing import Condition, Value
import queue

host = '127.0.0.1'
port = 8080
authkey = b'python'


task_q = queue.Queue(10)
result_q = queue.Queue(20)
cond = Condition()
done = Value('i', 0)

def double(n):
    return n * 2

class ServerManager(BaseManager):
    pass

ServerManager.register('get_task_queue', callable=lambda: task_q)
ServerManager.register('get_result_queue', callable=lambda: result_q)
ServerManager.register('get_cond', callable=lambda: cond)
ServerManager.register('get_done', callable=lambda: done)
ServerManager.register('get_double', callable=double)

server_manager = ServerManager(address=(host, port), authkey=authkey)
server = server_manager.get_server()

print('start server')
server.serve_forever(

manager_producer.py:

# -*- coding: utf-8 -*-

from multiprocessing.managers import BaseManager
import random
import time

host = '127.0.0.1'
port = 8080
authkey = b'python'

class ProducerManager(BaseManager):
    pass

ProducerManager.register('get_task_queue')
ProducerManager.register('get_cond')
ProducerManager.register('get_done')
producer_manager = ProducerManager(address=(host, port), authkey=authkey)

producer_manager.connect()
task_q  = producer_manager.get_task_queue()
cond = producer_manager.get_cond()
# done = producer_manager.get_done()
count = 20  # Up to 20 tasks

while count > 0:
    if cond.acquire():
        if not task_q.full():
            n = random.randint(0, 10)
            task_q.put(n)
            print("Producer:deliver one, now tasks:%s" % task_q.qsize())
            cond.notify()
            count -= 1
            time.sleep(0.5)
        else:
            print("Producer:already full, stop deliver, now tasks:%s" % task_q.qsize())
            cond.wait() 
        cond.release()
# done.value = 1
print('Producer break')

manager_consumer.py:

# -*- coding: utf-8 -*-

from multiprocessing.managers import BaseManager

host = '127.0.0.1'
port = 8080
authkey = b'python'

class ConsumerManager(BaseManager):
    pass

ConsumerManager.register('get_task_queue')
ConsumerManager.register('get_result_queue')
ConsumerManager.register('get_cond')
# ConsumerManager.register('get_done')
ConsumerManager.register('get_double')

consumer_manager = ConsumerManager(address=(host, port), authkey=authkey)
consumer_manager.connect()

task_q = consumer_manager.get_task_queue()
result_q = consumer_manager.get_result_queue()
cond = consumer_manager.get_cond()
# done = consumer_manager.get_done()

while 1:
    if result_q.full():
        print('result queue is full')
        break
    if cond.acquire():
        if not task_q.empty():
            arg = task_q.get()
            res = consumer_manager.get_double(arg)
            print("Consumer:consume one, now tasks:%s" % task_q.qsize())
            result_q.put(res)
            cond.notify()
        else:
            print("Consumer:only 0, stop consume, products")
            cond.wait()
        cond.release()

while 1:
    if result_q.empty():
        break
    result = result_q.get()
    print('result is: %s' % result)

Posted by sirmanson on Fri, 29 Nov 2019 19:47:29 -0800