update process and thread tutorials

devopsor · devopsor · commit e56cd366ef1a · 2022-08-12T00:31:01.000+09:00
diff --git a/13_thread/4.threadlocal.py b/13_thread/4.threadlocal.py
@@ -0,0 +1,54 @@
+########################################ThreadLocal###################################
+
+
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import threading
+
+# Create a global ThreadLocal object:
+local_school = threading.local()  #※
+
+def process_student():
+    # Get the student associated with the current thread:
+    std = local_school.student
+    print('Hello, %s (in %s)' % (std, threading.current_thread().name))
+
+def process_thread(name):
+    # Bind the student of ThreadLocal:
+    print('main thread...')
+    local_school.student = name
+    process_student()
+
+t1 = threading.Thread(target= process_thread, args=('Alice',), name='Thread-A')
+t2 = threading.Thread(target= process_thread, args=('Bob',), name='Thread-B')
+t1.start()
+t2.start()
+t1.join()
+t2.join()
+
+# The results are as follows:
+
+# main thread...
+# Hello, Alice (in Thread-A)
+# main thread...
+# Hello, Bob (in Thread-B)  
+
+
+# A global variable local_school is an ThreadLocal object, each Thread of which can read and write student 
+# properties to it, but do not affect each other. You can think of it local_school as a global variable, 
+# but each attribute local_school.studentis a local variable of the thread, which can be read and written arbitrarily 
+# without interfering with each other, and there is no need to manage the lock problem, ThreadLocal 
+# which will be handled internally.
+
+# It can be understood as a global variable local_school, dict which can not only be used local_school.student, 
+# but also bind other variables, such as local_school.teacher and so on.
+
+# ThreadLocal 
+# The most commonly used place is to bind a database connection, HTTP request, user identity information, etc. 
+# for each thread, so that all the called processing functions of a thread can easily access these resources.
+
+############################################ Summary#############################
+# Although a ThreadLocal variable is a global variable, 
+# each thread can only read and write an independent copy of its own thread without interfering with each other. 
+# ThreadLocal Solved the problem that parameters are passed to each other between functions in a thread.
diff --git a/13_thread/5.prothread.py b/13_thread/5.prothread.py
@@ -0,0 +1,118 @@
+# ########################################Process vs Thread################################
+
+# We introduced multiprocessing and multithreading, two of the most common ways to implement multitasking. 
+# Now, let's discuss the pros and cons of both approaches.
+
+# First of all, to achieve multi-tasking, we usually design the Master-Worker mode. 
+# The Master is responsible for assigning tasks, and the Worker is responsible for executing tasks. 
+# Therefore, in a multi-tasking environment, there is usually one Master and multiple Workers. (※)
+
+# If the Master-Worker is implemented with multiple processes(※), the main process is the Master, 
+# and the other processes are the Workers.
+
+# If the Master-Worker is implemented with multiple threads(※), the main thread is the Master, 
+# and the other threads are the Workers.
+
+# The biggest advantage of multi-process mode is high stability, 
+# because a child process crashes, it will not affect the main process and other child processes. 
+# (Of course, the main process hangs up  all the processes, but the Master process is only responsible for 
+# allocating tasks, and the probability of  hanging up is low.) 
+# The famous Apache first adopted the multi-process mode.(※)
+
+# The disadvantage of the multi-process mode is that the cost of creating a process is high. 
+# In Unix/Linux systems, it fork is OK to use calls, and the cost of creating processes in Windows is huge. 
+# In addition, the number of processes that the operating system can run at the same time is also limited. 
+# Under the constraints of memory and CPU, if there are thousands of processes running at the same time, 
+# the operating system will even have scheduling problems.
+
+# Multi-threaded mode is usually a little faster than multi-process, but not much faster, 
+# and the fatal disadvantage of multi-threaded mode is that any thread hangs may directly cause 
+# the entire process to crash, because all threads share the memory of the process. 
+# On Windows, if there is a problem with the code executed by a thread, you can often see this prompt: 
+# "The program has performed an illegal operation and is about to close." 
+# In fact, there is often a problem with a thread, but the operating system will force End the entire process.
+
+# # Under Windows, multi-threading is more efficient than multi-process, so Microsoft's IIS server adopts 
+# multi-threading mode by default. Due to the stability problem of multi-threading, 
+# the stability of IIS is not as good as that of Apache. In order to alleviate this problem, IIS and Apache now 
+# have a mixed mode of multi-process + multi-threading, which really complicates the problem.
+
+# ########################################Thread Switching################################
+
+# Whether it is multi-process or multi-threaded, 
+# as long as the number is large, the efficiency will definitely not go up, why?
+
+# Let's take an analogy. 
+# Suppose you are unfortunately preparing for the senior high school entrance examination. 
+# You need to do homework in 5 subjects of Chinese, mathematics, English, physics, and chemistry every night. 
+# Each homework takes 1 hour.
+# If you spend 1 hour doing the language homework first, and then spend 1 hour doing the math homework, 
+# and then do it all in turn, it will take a total of 5 hours. This method is called a single-task model, 
+# or a batch task model.
+
+# Suppose you plan to switch to a multitasking model, you can do Chinese for 1 minute, 
+# then switch to math homework, do 1 minute, then switch to English, and so on, as long as the switching 
+# speed is fast enough, this method will be executed with a single-core CPU. Multitasking is the same. 
+# From the point of view of a kindergartener, you are doing 5 homework at the same time.
+
+# However, switching homework comes at a cost. For example, when switching from Chinese to mathematics, 
+# you must first clean up the Chinese books and pens on the desk (this is called saving the scene), 
+# then, open the mathematics textbook and find a compass ruler (this is called preparing for a new environment) ) 
+# to start doing math homework. The same is true when the operating system switches processes or threads. 
+# It needs to save the currently executed on-site environment (CPU register state, memory page, etc.), 
+# and then prepare the execution environment of the new task (restore the last register state, switch memory 
+# pages, etc.) to start execution. Although this switching process is fast, it also takes time. 
+# If there are thousands of tasks running at the same time, the operating system may be mainly busy switching 
+# tasks, and there is not much time to perform tasks.
+
+# Therefore, once the multitasking reaches a limit, it will consume all the resources of the system, resulting in 
+# a sharp drop in efficiency, and all tasks cannot be done well.
+
+
+# ##################################Compute-intensive vs. IO-intensive#######################
+
+# A second consideration for multitasking is the type of task. 
+# We can divide tasks into compute-intensive and IO-intensive.
+
+# Computation-intensive tasks are characterized by a large amount of computation and CPU resource consumption, 
+# such as calculating the pi ratio, decoding video in high-definition, etc., all of which depend on the computing 
+# power of the CPU. Although this kind of computing-intensive task can also be completed by multitasking, 
+# the more tasks, the more time spent in task switching, and the lower the efficiency of the CPU to perform tasks. 
+# The number of simultaneous tasks should be equal to the number of CPU cores.
+
+# Computation-intensive tasks mainly consume CPU resources, so the efficiency of the code is very important. 
+# Scripting languages ​​like Python are inefficient and completely unsuitable for computationally intensive tasks. 
+# For computationally intensive tasks, it is best to write in C.
+
+# The second type of task is IO-intensive. Tasks involving network and disk IO are all IO-intensive tasks. 
+# This type of task is characterized by low CPU consumption and most of the time of the task is waiting for the 
+# IO operation to complete (because The speed of IO is much lower than the speed of CPU and memory). 
+# For IO-intensive tasks, the more tasks, the higher the CPU efficiency, but there is a limit. Most common tasks 
+# are IO-intensive tasks, such as web applications.
+
+# During the execution of IO-intensive tasks, 99% of the time is spent on IO, and very little time is spent 
+# on the CPU. Therefore, it is completely impossible to replace the extremely slow scripting language such as 
+# Python with the extremely fast C language. Improve operational efficiency. For IO-intensive tasks, the most 
+# suitable language is the language with the highest development efficiency (the least amount of code), 
+# the scripting language is the first choice, and the C language is the worst.
+
+
+# ##################################Asynchronous IO#######################
+# Considering the huge speed difference between CPU and IO, a task spends most of the time waiting for IO 
+# operations during execution. 
+# The single-process single-threaded model will prevent other tasks from being executed in parallel. 
+# Therefore, we need a multi-process model. Or a multithreading model to support concurrent execution of 
+# multiple tasks.
+
+# Modern operating systems have made huge improvements to IO operations, and the biggest feature is 
+# support for asynchronous IO. 
+# If you make full use of the asynchronous IO support provided by the operating system, you can use 
+# a single-process single-thread model to perform multitasking. This new model is called an event-driven model. 
+# Nginx is a web server that supports asynchronous IO. It runs on a single-core CPU. The single-process model 
+# can efficiently support multitasking. On a multi-core CPU, you can run multiple processes (the same number 
+# as the number of CPU cores), taking full advantage of the multi-core CPU. Because the total number of processes 
+# in the system is very limited, the operating system scheduling is very efficient. Multitasking with the asynchronous 
+# IO programming model is a major trend.
+# Corresponding to the Python language, the single-threaded asynchronous programming model is called coroutines. 
+# With the support of coroutines, efficient multitasking programs can be written based on event driving. 
+# We'll discuss how to write coroutines later.
diff --git a/13_thread/6.master.py b/13_thread/6.master.py
@@ -0,0 +1,39 @@
+########################################Master###################################
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import random, time, queue
+from multiprocessing.managers import BaseManager
+
+# Queue for sending tasks:
+task_queue = queue.Queue()
+# Queue to receive results:
+result_queue = queue.Queue()
+
+# QueueManager inherited from BaseManager:
+class QueueManager(BaseManager):
+    pass
+
+# Register both Queues on the network, and the callable parameter is associated with the Queue object:
+QueueManager.register('get_task_queue', callable=lambda: task_queue)
+QueueManager.register('get_result_queue', callable=lambda: result_queue)
+# Bind port 5000, set verification code 'abc':
+manager = QueueManager(address=('', 5000), authkey=b'abc')
+# Start Queue:
+manager.start()
+# Get a Queue object accessed over the network:
+task = manager.get_task_queue()
+result = manager.get_result_queue()
+#Put a few tasks in it:
+for i in range(10):
+    n = random.randint(0, 10000)
+    print('Put task %d...' % n)
+    task.put(n)
+# Read the result from the result queue:
+print('Try get results...')
+for i in range(10):
+    r = result.get(timeout=10)
+    print('Result: %s' % r)
+# Close:
+manager.shutdown()
+print('master exit.')
diff --git a/13_thread/7.worker.py b/13_thread/7.worker.py
@@ -0,0 +1,37 @@
+########################################Worker###################################
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import time, sys, queue
+from multiprocessing.managers import BaseManager
+
+# Create a similar QueueManager:
+class QueueManager(BaseManager):
+    pass
+
+# Since this QueueManager only gets the Queue from the network, only the name is provided when registering:
+QueueManager.register('get_task_queue')
+QueueManager.register('get_result_queue')
+
+# Connect to the server, which is the machine running task_master.py:
+server_addr = '127.0.0.1'
+print('Connect to server %s...' % server_addr)
+# Note that the port and verification code should be exactly the same as those set in task_master.py:
+m = QueueManager(address=(server_addr, 5000), authkey=b'abc')
+# Connecting from the Internet:
+m.connect()
+# Get the object of Queue:
+task = m.get_task_queue()
+result = m.get_result_queue()
+# Get the task from the task queue and write the result to the result queue:
+for i in range(10):
+    try:
+        n = task.get(timeout=1)
+        print('run task %d * %d...' % (n, n))
+        r = '%d * %d = %d' % (n, n, n*n)
+        time.sleep(1)
+        result.put(r)
+    except Queue.Empty:
+        print('task queue is empty.')
+# End of processing:
+print('worker exit.')