I. Knowledge of Processes in Operating Systems
_Unix/Linux operating system provides a fork() system call, which is very special. Ordinary function calls, call once, return once, but fork() call once, return twice, because the operating system automatically copies the current process (called the parent process) into a copy (called the child process), and then returns in the parent process and the child process, respectively. _The child process always returns 0, while the parent process returns the ID of the child process. The reason for this is that a parent process can fork out many child processes, so the parent process needs to record the ID of each child process, and the child process only needs to call getppid() to get the ID of the parent process. _Python's os module encapsulates common system calls, including fork, which makes it easy to create sub-processes in Python programs.
The following examples are given
import os pid=os.fork() if pid==0: print('I am child process %s my parents is %s'%(os.getpid(),os.getppid())) else: print('I (%s) just created a child process (%s).'%(os.getpid(),pid))
The output is as follows
I (64225) just created a child process (64226). I am child process 64226 my parents is 64225
II. multiprocessing of Cross-Platform Modules
The multiprocessing module provides a Process class to represent a process object.
Sample 1
from multiprocessing import Process import os # Code to be executed by a child process def run_proc(name): print('Run child process %s (%s)...' % (name, os.getpid())) if __name__=='__main__': print('Parent process %s.' % os.getppid()) p = Process(target=run_proc, args=('test',)) print('Child process will start.') p.start() p.join() print('Child process end.') #The join() method can wait until the end of the child process before continuing to run, usually for inter-process synchronization.
Sample 2
from multiprocessing import Process import time import os class P(Process): def run(self): print('Run child process %s (%s)...'%(self.name,os.getpid())) # The default function object has the name method, and the result is: P-1 time.sleep(3) print('%s is done' % self.name) if __name__ == '__main__': print('Parent process %s.' % os.getppid()) p=P() p.start() p.join()
Process Data Isolation
Data between multiple processes is isolated, that is to say, multiple processes modify global variables without affecting each other. _Verification Example
from multiprocessing import Process import time x=100 def task(): global x print('Subprocess open, current x The value is%d'%x) time.sleep(3) x=10 print('The child process ends, and the current x The value is%d'%x) if __name__ == '__main__': print('Currently the parent process is ready to start the child process. x The value is%d' % x) p1=Process(target=task) p1.start() p1.join() print('The current parent process is ready to end the parent process. x The value is%d' % x)
Output
Currently the parent process, ready to open the child process, x value of 100 The child process opens with the current x value of 100 The child process ends and the current x value is 10 The current parent process is ready to terminate the parent process. The value of x is 100.
== Note: Some cases require locking, such as file reading and writing problems.==
IV. Multi-process Parallel Execution
The following examples are given
import time from multiprocessing import Process def task(name,n): print('%s is running'%name) time.sleep(n) print('%s is done'%name) if __name__ == '__main__': p1=Process(target=task,args=("Process 1",1)) #Use time 1s p2=Process(target=task,args=("Process 2",2)) #Use time 1s p3=Process(target=task,args=("Process 3",3)) #Use time 1s start_time=time.time() p1.start() p2.start() p3.start() # When the first second is running p1, actually p2 and P3 are already running. When it comes to p2 after 1s, it only needs to run 1s to p3, and the same is true for p3. p1.join() p2.join() p3.join() stop_time=time.time() print(stop_time-start_time) #3.2848567962646484
V. Process pool
1. Linear execution (pool.apply()
from multiprocessing import Pool # Import the process pool module pool import time,os def foo(i): time.sleep(2) print("in process", os.getpid()) # Print process number if __name__ == "__main__": pool = Pool(processes=5) # Setting Allow Process Pool to Place Five Processes at the Same Time for i in range(10): pool.apply(func=foo, args=(i,)) # Synchronized execution of pending process print('end') pool.close() # Close the process pool and no longer accept new processes pool.join() # The process in the process pool closes after execution, and if commented out, the program closes directly.
2. Concurrent execution (pool.apply_async())
from multiprocessing import Pool # Import the process pool module pool import time,os def foo(i): time.sleep(2) print("in process", os.getpid()) # Print process number if __name__ == "__main__": pool = Pool(processes=5) # Setting allows the process pool to be put into five processes at the same time and handing them over to the cpu to run for i in range(10): pool.apply_async(func=foo, args=(i,)) # Executing foo functions asynchronously print('end') pool.close() pool.join() # The process in the process pool closes after execution, and if commented out, the program closes directly.
3. Setting callbacks
from multiprocessing import Process,Pool import time,os def foo(i): time.sleep(2) print("in process", os.getpid()) # Print the process number of the subprocess def bar(arg):#Note that arg parameters are necessary print('-->exec done:', arg, os.getpid()) # Print process number if __name__ == "__main__": pool = Pool(processes=2) print("Main process", os.getpid()) # The process number of the main process for i in range(3): pool.apply_async(func=foo, args=(i,), callback=bar) # Execute callback function callback=Bar print('end') pool.close() pool.join() # The process in the process pool closes after execution, and if commented out, the program closes directly.
_Implementation results
Main process 752 end in process 2348 -->exec done: None 752 in process 8364 -->exec done: None 752 in process 2348 -->exec done: None 752 #The callback function indicates that fun=Foo will not execute the bar function until it's finished, and then Bar will be executed when Foo is finished. #This callback function is called by the main process, not by each subprocess.
VI. Subprocesses
Often the subprocess is an external process, such as executing a command, which is the same as command line execution. The following examples are given
import subprocess print('$nslookup https://www.baidu.com') r = subprocess.call(['nslookup','https://www.baidu.com']) print('Exit code',r)
_2. Sometimes subprocesses need to be input, which can be input through communication method. The following examples are given
import subprocess print('$ nslookup https://www.baidu.com') p = subprocess.Popen(['nslookup'],stdin=subprocess.PIPE,stdout=subprocess.PIPE,stderr=subprocess.PIPE) output,err = p.communicate(b'set q=mx\nbaidu.com\nexit\n') print(output.decode('gbk')) print('Exit code:',p.returncode)
The output is as follows
$ nslookup https://www.baidu.com //Default server: bogon Address: 192.168.111.1 > > The server: bogon Address: 192.168.111.1 baidu.com MX preference = 10, mail exchanger = mx.maillb.baidu.com baidu.com MX preference = 20, mail exchanger = jpmx.baidu.com baidu.com MX preference = 15, mail exchanger = mx.n.shifen.com baidu.com MX preference = 20, mail exchanger = mx50.baidu.com baidu.com MX preference = 20, mail exchanger = mx1.baidu.com > Exit code: 0
Guardianship process
The daemon is immediately suspended when the main process code is executed, and then the main process waits for the non-daemon process to recover the resources of the sub-process (to avoid botnet processes) before the whole process ends.
Example
from multiprocessing import Process import os import time def task(x): print('%s is running ' %x) time.sleep(3) print('%s is done' %x) if __name__ == '__main__': p1=Process(target=task,args=('Daemon',)) p2=Process(target=task,args=('Child process',)) p2.start() p1.daemon=True # Set p1 as daemon p1.start() print('The main process code has been executed') >>: The main process code has been executed >>: Child process is running >>: Child process is done
== You can see from the result that the main process code is executed, the daemon is immediately suspended, and the main process exits after waiting for the sub-process to execute.==
VIII. Interprocess Communication
_If you want to communicate between processes, you can use Queue or Pipe to achieve it. _Using Queue Example
from multiprocessing import Queue,Process def put_id(q): q.put([1,2,3,4]) if __name__ == '__main__': q=Queue() p=Process(target=put_id,args=(q,)) p.start() print(q.get()) p.join() # output [1,2,3,4]
== Note: In this case, you need to import Queue module from multiprocessing==
_Using Pipe Example
from multiprocessing import Process,Pipe def put_id(conn): conn.send([1,2,3]) conn.send([4,5,6]) conn.close() if __name__ == '__main__': ## Generate pipes. When generated, two return objects are generated, which correspond to the telephone at both ends and are connected by a pipeline line. ## Two objects are assigned to two variables. parent_conn,child_conn=Pipe() p=Process(target=put_id,args=(child_conn,))#child_conn needs to be passed to the opposite end for send data to parent_conn p.start() print(parent_conn.recv()) # Pare_conn is used to receive data at this interruption > [1,2,3] print(parent_conn.recv()) # Pare_conn is used to receive data at this interruption [4,5,6] p.join()
== Note that the number of times sent and received at both ends should be equal, otherwise they will be stuck until they are equal.==
IX. Interprocess Data Sharing (Dictionary and List)
As mentioned earlier, data between processes is isolated, and if you want to share data between processes, you can use Manager to achieve it. The following examples are given
from multiprocessing import Manager,Process from random import randint import os def run(d,l): d[randint(1,50)]=randint(51,100)#Generate a dictionary that can be passed and shared between multiple processes l.append(os.getpid()) print(l) if __name__ == '__main__': with Manager() as manage: #Make an alias, and manager is equivalent to Manager() d=manage.dict()#Generate a dictionary that can be passed and shared between multiple processes l=manage.list(range(5))#Generate a list that can be passed and shared between multiple processes p_list=[] for i in range(10):#Generate 10 processes p=Process(target=run,args=(d,l)) p_list.append(p)# Put each process in an empty list p.start() for i in p_list: i.join() print(d)#Print the dictionary after all processes have been executed print(l)#Print the list after all processes have been executed
10. Distributed processes
_In distributed computing, processes are more suitable than threads. First, processes are more stable. Second, threads can only run on multiple CPUs of the same machine at most. _multiprocessing's managers sub-module supports the distribution of multiple processes to multiple machines, a service process as a scheduler, relying on the network to distribute tasks to other processes. _Suppose there is a requirement for two machines, one for sending tasks and one for processing tasks. The following examples are given
# task_master.py from multiprocessing.managers import BaseManager from queue import Queue import random import time task_queue = Queue() result_queue = Queue() class QueueManager(BaseManager): pass def get_task_queue(): global task_queue return task_queue def get_result_queue(): global result_queue return result_queue if __name__ == '__main__': # Register two queues on the network, and the calltable parameter associates the Queue object QueueManager.register('get_task_queue', callable=get_task_queue) QueueManager.register('get_result_queue', callable=get_result_queue) # Create a queue manager, bind port 5000, and set the password to abc manager = QueueManager(address=('127.0.0.1',5000),authkey=b'abc') manager.start() # Getting Queue Objects through the Network task = manager.get_task_queue() result = manager.get_result_queue() # Put the task in for i in range(10): n = random.randint(0,1000) print('Put Task %d'%n) task.put(n) # Getting results from the result queue print('Try get results') for i in range(10): r = result.get() print('Result: %s' % r) manager.shutdown() print('master exit')
== Note: Make sure you use registered Queue objects. In addition, you can use QueueManager. register ('get_result_queue', callable = lambda: result_queue) directly when registering on linux/unix/mac and other systems.==
# task_worker.py from multiprocessing.managers import BaseManager from queue import Queue from queue import Empty import time class QueueManager(BaseManager): pass if __name__ == '__main__': # Get it from the server, so you only need to provide the name when registering, that is, the interface name. QueueManager.register('get_task_queue') QueueManager.register('get_result_queue') # Connect to the server, the task_master.py machine server_addr = '127.0.0.1' manager = QueueManager(address=(server_addr,5000),authkey=b'abc') manager.connect() # Get Queue objects task = manager.get_task_queue() result = manager.get_result_queue() # Extract tasks from the queue and insert processing results into the result queue for i in range(10): try: n = task.get(timeout=1) print('run task %d*%d'%(n,n)) r = '%d * %d = %d'%(n,n,n*n) time.sleep(1) result.put(r) except Empty: print('task queue is empty') print('worker exit')