多线程/协程操作

ShakeCode · ShakeCode · commit b8283330a4eb · 2022-10-05T15:34:41.000+08:00
diff --git a/venv/com/python/thread/Coprogram.py b/venv/com/python/thread/Coprogram.py
@@ -0,0 +1,23 @@
+import asyncio
+
+
+@asyncio.coroutine
+def hello():
+    print("Hello world!")
+    # 异步调用asyncio.sleep(1):
+    r = yield from asyncio.sleep(1)
+    print("Hello again!")
+
+
+# 获取EventLoop:
+loop = asyncio.get_event_loop()
+# 执行coroutine
+loop.run_until_complete(hello())
+loop.close()
+
+'''
+@asyncio.coroutine把一个generator标记为coroutine类型，然后，我们就把这个coroutine扔到EventLoop中执行。 hello()会首先打印出Hello world!，
+然后，yield from语法可以让我们方便地调用另一个generator。由于asyncio.sleep()也是一个coroutine，所以线程不会等待asyncio.sleep()，
+而是直接中断并执行下一个消息循环。当asyncio.sleep()返回时，线程就可以从yield from拿到返回值（此处是None），然后接着执行下一行语句。
+把asyncio.sleep(1)看成是一个耗时1秒的IO操作，在此期间，主线程并未等待，而是去执行EventLoop中其他可以执行的coroutine了，因此可以实现并发执行。
+'''
diff --git a/venv/com/python/thread/ThreadPool.py b/venv/com/python/thread/ThreadPool.py
@@ -0,0 +1,110 @@
+'''
+4.1 传统多线程问题？
+​ 传统多线程方案会使用“即时创建， 即时销毁”的策略。尽管与创建进程相比，创建线程的时间已经大大的缩短，但是如果提交给线程的任务是执行时间较短，而且执行次数极其频繁，那么服务器将处于不停的创建线程，销毁线程的状态。
+
+​ 一个线程的运行时间可以分为3部分：线程的启动时间、线程体的运行时间和线程的销毁时间。在多线程处理的情景中，如果线程不能被重用，就意味着每次创建都需要经过启动、销毁和运行3个过程。这必然会增加系统相应的时间，降低了效率。
+
+有没有一种高效的解决方案呢？ —— 线程池
+
+4.2 线程池基本原理：
+​ 我们把任务放进队列中去，然后开N个线程，每个线程都去队列中取一个任务，执行完了之后告诉系统说我执行完了，然后接着去队列中取下一个任务，直至队列中所有任务取空，退出线程。
+
+使用线程池：
+​ 由于线程预先被创建并放入线程池中，同时处理完当前任务之后并不销毁而是被安排处理下一个任务，因此能够避免多次创建线程，从而节省线程创建和销毁的开销，能带来更好的性能和系统稳定性。
+
+线程池要设置为多少？
+
+服务器CPU核数有限，能够同时并发的线程数有限，并不是开得越多越好，以及线程切换是有开销的，如果线程切换过于频繁，反而会使性能降低
+
+线程执行过程中，计算时间分为两部分：
+
+CPU计算，占用CPU
+不需要CPU计算，不占用CPU，等待IO返回，比如recv(), accept(), sleep()等操作，具体操作就是比如
+访问cache、RPC调用下游service、访问DB，等需要网络调用的操作
+那么如果计算时间占50%， 等待时间50%，那么为了利用率达到最高，可以开2个线程：
+假如工作时间是2秒， CPU计算完1秒后，线程等待IO的时候需要1秒，此时CPU空闲了，这时就可以切换到另外一个线程，让CPU工作1秒后，线程等待IO需要1秒，此时CPU又可以切回去，第一个线程这时刚好完成了1秒的IO等待，可以让CPU继续工作，就这样循环的在两个线程之前切换操作。
+
+那么如果计算时间占20%， 等待时间80%，那么为了利用率达到最高，可以开5个线程：
+可以想象成完成任务需要5秒，CPU占用1秒，等待时间4秒，CPU在线程等待时，可以同时再激活4个线程，这样就把CPU和IO等待时间，最大化的重叠起来
+
+抽象一下，计算线程数设置的公式就是：
+N核服务器，通过执行业务的单线程分析出本地计算时间为x，等待时间为y，则工作线程数（线程池线程数）设置为 N*(x+y)/x，能让CPU的利用率最大化。
+由于有GIL的影响，python只能使用到1个核，所以这里设置N=1
+
+'''
+
+import queue
+import threading
+import time
+
+
+# 声明线程池管理类
+class WorkManager(object):
+    def __init__(self, work_num=1000, thread_num=2):
+        self.work_queue = queue.Queue()  # 任务队列
+        self.threads = []  # 线程池
+        self.__init_work_queue(work_num)  # 初始化任务队列，添加任务
+        self.__init_thread_pool(thread_num)  # 初始化线程池，创建线程
+
+    """
+       初始化线程池
+    """
+    def __init_thread_pool(self, thread_num):
+        for i in range(thread_num):
+            # 创建工作线程(线程池中的对象)
+            self.threads.append(Work(self.work_queue))
+
+    """
+       初始化工作队列
+    """
+    def __init_work_queue(self, jobs_num):
+        for i in range(jobs_num):
+            self.add_job(do_job, i)
+
+    """
+       添加一项工作入队
+    """
+    def add_job(self, func, *args):
+        self.work_queue.put((func, list(args)))  # 任务入队，Queue内部实现了同步机制
+
+    """
+       等待所有线程运行完毕
+    """
+    def wait_allcomplete(self):
+        for item in self.threads:
+            if item.isAlive(): item.join()
+
+
+class Work(threading.Thread):
+    def __init__(self, work_queue):
+        threading.Thread.__init__(self)
+        self.work_queue = work_queue
+        self.start()
+
+    def run(self):
+        # 死循环，从而让创建的线程在一定条件下关闭退出
+        while True:
+            try:
+               # 任务异步出队，Queue内部实现了同步机制
+                do, args = self.work_queue.get(block=False)
+                do(args)
+               # 通知系统任务完成
+                self.work_queue.task_done()
+            except:
+                break
+
+
+# 具体要做的任务
+def do_job(args):
+    time.sleep(0.1)  # 模拟处理时间
+    print(threading.current_thread())
+    print(list(args))
+
+
+if __name__ == '__main__':
+    start = time.time()
+    work_manager = WorkManager(100, 10)
+    # 或者work_manager =  WorkManager(10000, 20)
+    work_manager.wait_allcomplete()
+    end = time.time()
+    print("cost all time: %s" % (end - start))
diff --git a/venv/com/python/thread/ThreadQueue.py b/venv/com/python/thread/ThreadQueue.py
@@ -0,0 +1,84 @@
+import queue
+import threading
+import time
+
+'''
+线程同步队列queue
+
+python2.x中提供的Queue， Python3.x中提供的是queue
+
+见import queue.
+
+Python的queue模块中提供了同步的、线程安全的队列类，包括FIFO（先入先出)队列Queue，LIFO（后入先出）队列LifoQueue，和优先级队列PriorityQueue。这些队列都实现了锁原语，能够在多线程中直接使用。可以使用队列来实现线程间的同步。
+
+queue模块中的常用方法:
+
+queue.qsize() 返回队列的大小
+queue.empty() 如果队列为空，返回True,反之False
+queue.full() 如果队列满了，返回True,反之False
+queue.full 与 maxsize 大小对应
+queue.get([block[, timeout]])获取队列，timeout等待时间
+queue.get_nowait() 相当Queue.get(False)
+queue.put(item) 写入队列，timeout等待时间
+queue.put_nowait(item) 相当Queue.put(item, False)
+queue.task_done() 在完成一项工作之后，Queue.task_done()函数向任务已经完成的队列发送一个信号
+queue.join() 实际上意味着等到队列为空，再执行别的操作
+'''
+
+
+exitFlag = 0
+
+class myThread(threading.Thread):
+   def __init__(self, threadID, name, queue):
+      threading.Thread.__init__(self)
+      self.threadID = threadID
+      self.name = name
+      self.queue = queue
+
+   def run(self):
+      print("Starting " + self.name)
+      process_data(self.name, self.queue)
+      print("Exiting " + self.name)
+
+def process_data(threadName, queue):
+   while not exitFlag:
+      queueLock.acquire()
+      if not workQueue.empty():
+         data = queue.get()
+         queueLock.release()
+         print("%s processing %s" % (threadName, data))
+      else:
+         queueLock.release()
+      time.sleep(1)
+
+threadList = ["Thread-1", "Thread-2", "Thread-3"]
+nameList = ["One", "Two", "Three", "Four", "Five"]
+queueLock = threading.Lock()
+workQueue = queue.Queue(10)
+threads = []
+threadID = 1
+
+# 创建新线程
+for tName in threadList:
+   thread = myThread(threadID, tName, workQueue)
+   thread.start()
+   threads.append(thread)
+   threadID += 1
+
+# 填充队列
+queueLock.acquire()
+for word in nameList:
+   workQueue.put(word)
+queueLock.release()
+
+# 等待队列清空
+while not workQueue.empty():
+   pass
+
+# 通知线程是时候退出
+exitFlag = 1
+
+# 等待所有线程完成
+for t in threads:
+   t.join()
+print("Exiting Main Thread")
diff --git a/venv/com/python/thread/ThreadTest1.py b/venv/com/python/thread/ThreadTest1.py
@@ -0,0 +1,24 @@
+import threading
+import time
+
+'''
+​ python主要是通过thread和threading这两个模块来实现多线程支持。python的thread模块是比较底层的模块，python的threading模块是对thread做了一些封装，可以更加方便的被使用。但是python（cpython）由于GIL的存在无法使用threading充分利用CPU资源，如果想充分发挥多核CPU的计算能力需要使用multiprocessing模块(Windows下使用会有诸多问题)。
+
+​ python3.x中已经摒弃了Python2.x中采用函数式thread模块中的start_new_thread()函数来产生新线程方式。
+
+​ python3.x中通过threading模块创建新的线程有两种方法：一种是通过threading.Thread(Target=executable Method)-即传递给Thread对象一个可执行方法（或对象）;第二种是继承threading.Thread定义子类并重写run()方法。第二种方法中，唯一必须重写的方法是run()
+'''
+
+def target():
+    print("the current threading %s is runing" % (threading.current_thread().name))
+    time.sleep(1)
+    print("the current threading %s is ended" % (threading.current_thread().name))
+
+
+print("the current threading %s is runing" % (threading.current_thread().name))
+## 属于线程t的部分
+t = threading.Thread(target=target)
+t.start()
+## 属于线程t的部分
+t.join()  # join是阻塞当前线程(此处的当前线程时主线程) 主线程直到Thread-1结束之后才结束
+print("the current threading %s is ended" % (threading.current_thread().name))
diff --git a/venv/com/python/thread/ThreadTest2.py b/venv/com/python/thread/ThreadTest2.py
@@ -0,0 +1,55 @@
+import threading
+import time
+
+'''
+线程间同步
+​ 如果多个线程共同对某个数据修改，则可能出现不可预料的结果，为了保证数据的正确性，需要对多个线程进行同步。
+
+​ 使用Thread对象的Lock和Rlock可以实现简单的线程同步，这两个对象都有acquire方法和release方法，对于那些需要每次只允许一个线程操作的数据，可以将其操作放到acquire和release方法之间。
+
+​ 需要注意的是，Python有一个GIL（Global Interpreter Lock）机制，任何线程在运行之前必须获取这个全局锁才能执行，每当执行完100条字节码，全局锁才会释放，切换到其他线程执行。
+
+多线程实现同步有四种方式：
+
+锁机制，信号量，条件判断和同步队列。
+
+下面我主要关注两种同步机制：锁机制和同步队列。
+
+（1）锁机制
+
+threading的Lock类，用该类的acquire函数进行加锁，用realease函数进行解锁
+'''
+
+class myThread(threading.Thread):  # 继承父类threading.Thread
+   def __init__(self, threadID, name, counter):
+      threading.Thread.__init__(self)
+      self.threadID = threadID
+      self.name = name
+      self.counter = counter
+
+   def run(self):  # 把要执行的代码写到run函数里面 线程在创建后会直接运行run函数
+      print("Starting " + self.name)
+      print_time(self.name, self.counter, 5)
+      print("Exiting " + self.name)
+
+
+def print_time(threadName, delay, counter):
+   while counter:
+      time.sleep(delay)
+      print("%s process at: %s" % (threadName, time.ctime(time.time())))
+      counter -= 1
+
+
+# 创建新线程
+thread1 = myThread(1, "Thread-1", 1)
+thread2 = myThread(2, "Thread-2", 2)
+
+# 开启线程
+thread1.start()
+thread2.start()
+
+# 等待线程结束
+thread1.join()
+thread2.join()
+
+print("Exiting Main Thread")
diff --git a/venv/com/python/thread/ThreadWorker.py b/venv/com/python/thread/ThreadWorker.py
@@ -0,0 +1,38 @@
+import time
+import threading
+import queue
+
+
+class Worker(threading.Thread):
+    def __init__(self, name, queue):
+        threading.Thread.__init__(self)
+        self.queue = queue
+        self.start()  # 执行run()
+
+    def run(self):
+        # 循环，保证接着跑下一个任务
+        while True:
+            # 队列为空则退出线程
+            if self.queue.empty():
+                break
+            # 获取一个队列数据
+            foo = self.queue.get()
+            # 延时1S模拟你要做的事情
+            time.sleep(1)
+            # 打印
+            print('{0} process data: {1} \r\n'.format(self.getName(), str(foo)))
+            # 任务完成
+            self.queue.task_done()
+
+
+# 队列
+queue = queue.Queue()
+# 加入100个任务队列
+for i in range(100):
+    queue.put(i)
+# 开10个线程
+for i in range(10):
+    threadName = 'Thread' + str(i)
+    Worker(threadName, queue)
+# 所有线程执行完毕后关闭
+queue.join()
diff --git a/venv/com/python/thread/async_await.py b/venv/com/python/thread/async_await.py
@@ -0,0 +1,35 @@
+import asyncio
+import re
+
+async def browser(host, port=80):
+    # 连接host
+    reader, writer = await asyncio.open_connection(host, port)
+    print(host, port, '连接成功!')
+
+    # 发起 / 主页请求(HTTP协议)
+    # 发送请求头必须是两个空行
+    index_get = 'GET {} HTTP/1.1\r\nHost:{}\r\n\r\n'.format('/', host)
+    writer.write(index_get.encode())
+
+    await writer.drain()  # 等待向连接写完数据（请求发送完成）
+
+    # 开始读取响应的数据报头
+    while True:
+        line = await reader.readline()  # 等待读取响应数据
+        if line == b'\r\n':
+            break
+
+        print(host, '<header>', line)
+
+    # 读取响应的数据body
+    body = await reader.read()
+    print(host, '<content>', body)
+
+
+if __name__ == '__main__':
+    loop = asyncio.get_event_loop()
+    tasks = [browser(host) for host in ['www.dushu.com', 'www.sina.com.cn', 'www.baidu.com']]
+    loop.run_until_complete(asyncio.wait(tasks))
+    loop.close()
+
+    print('---over---')
diff --git a/venv/com/python/thread/async_wget.py b/venv/com/python/thread/async_wget.py
@@ -0,0 +1,22 @@
+import asyncio
+
+@asyncio.coroutine
+def wget(host):
+    print('wget %s...' % host)
+    connect = asyncio.open_connection(host, 80)
+    reader, writer = yield from connect
+    header = 'GET / HTTP/1.0\r\nHost: %s\r\n\r\n' % host
+    writer.write(header.encode('utf-8'))
+    yield from writer.drain()
+    while True:
+        line = yield from reader.readline()
+        if line == b'\r\n':
+            break
+        print('%s header > %s' % (host, line.decode('utf-8').rstrip()))
+    # Ignore the body, close the socket
+    writer.close()
+
+loop = asyncio.get_event_loop()
+tasks = [wget(host) for host in ['www.sina.com.cn', 'www.sohu.com', 'www.163.com']]
+loop.run_until_complete(asyncio.wait(tasks))
+loop.close()
diff --git a/venv/com/python/thread/coroutine.py b/venv/com/python/thread/coroutine.py