class A:
__slots__ = ["a", "b"]
a = A()
a.a = 2
a.c = 3 # 报错
注意:假设类 B 继承自定义了 __slots__ 的类 A,那么子类 B 的实例不会受到父类 __slots__ 的限制。
2.5 内置函数 dir 与 __dir__ 方法
从设计理念上说:不同于 vars 与 __dict__,dir 方法倾向于给出全部信息:包括特殊方法名
dir 函数返回的是一个标识符名列表,逻辑是:首先寻找 __dir__ 函数的定义(object 类中有着默认的实现),若存在 __dir__ 函数,则返回 list(x.__dir__())。备注:__dir__ 函数必须定义为一个可迭代对象。
若该类没有自定义 __dir__ 函数,则使用 object 类的实现逻辑,大略如下:
If the object is a module object, the list contains the names of the module’s attributes.
If the object is a type or class object, the list contains the names of its attributes, and recursively of the attributes of its bases.
Otherwise, the list contains the object’s attributes’ names, the names of its class’s attributes, and recursively of the attributes of its class’s base classes.
class A:
def __setattr__(self, name, value):
print(f"enter __setattr__({name}, {value})")
if name == "a":
self.b = value
if name == "c":
self.c = value
a = A()
a.a = 3 # 从结果上来看什么也没做
# enter __setattr__(a, 3)
# enter __setattr__(b, 3)
a.b = 3 # 从结果上来看什么也没做
# enter __setattr__(b, 3)
a.c = 3 # 无限循环
# enter __setattr__(c, 3)
# enter __setattr__(c, 3)
# enter __setattr__(c, 3)
# ...
总结如下: 以下几种方式给属性赋值:
obj.name=value: 直接触发 __setattr__ 方法, 但这里的 name 得是一个合法的标识符
setattr(obj, name, value): 同上, name 可以不是合法的标识符
这种方式仅做说明, 平时不会使用到
obj.__setattr__(name, value): 同上, 但会多触发一次 getattr("__setattr__") 的调用, name 可以不是合法的标识符
class OneDigitNumericValue():
def __set_name__(self, owner, name):
# owner is Foo, name is number
self.name = name
def __get__(self, obj, type=None) -> object:
return obj.__dict__.get(self.name) or 0
def __set__(self, obj, value) -> None:
obj.__dict__[self.name] = value
class Foo():
number = OneDigitNumericValue()
my_foo_object = Foo()
my_second_foo_object = Foo()
my_foo_object.number = 3
print(my_foo_object.number)
print(my_second_foo_object.number)
my_third_foo_object = Foo()
print(my_third_foo_object.number)
实用例子
避免重复使用 property
class Values:
def __init__(self):
self._value1 = 0
self._value2 = 0
self._value3 = 0
@property
def value1(self):
return self._value1
@value1.setter
def value1(self, value):
self._value1 = value if value % 2 == 0 else 0
@property
def value2(self):
return self._value2
@value2.setter
def value2(self, value):
self._value2 = value if value % 2 == 0 else 0
@property
def value3(self):
return self._value3
@value3.setter
def value3(self, value):
self._value3 = value if value % 2 == 0 else 0
my_values = Values()
my_values.value1 = 1
my_values.value2 = 4
print(my_values.value1)
print(my_values.value2)
可以使用如下方法实现
class EvenNumber:
def __set_name__(self, owner, name):
self.name = name
def __get__(self, obj, type=None) -> object:
return obj.__dict__.get(self.name) or 0
def __set__(self, obj, value) -> None:
obj.__dict__[self.name] = (value if value % 2 == 0 else 0)
class Values:
value1 = EvenNumber()
value2 = EvenNumber()
value3 = EvenNumber()
my_values = Values()
my_values.value1 = 1
my_values.value2 = 4
print(my_values.value1)
print(my_values.value2)
2.10 pickle 与 __setstate__、__getstate__ 方法
一个说明功能的例子:
import pickle
class A:
def __init__(self, a):
self.a = a
def __getstate__(self):
return (self.a, self.a+1)
def __setstate__(self, state):
a, b = state
print(a, b)
self.a = "recover"
a = A(2)
with open("test.pkl", "wb") as fw:
pickle.dump(a, fw)
with open("test.pkl", "rb") as fr:
a = pickle.load(fr)
print(a.a) # "recover"
unless you make strong use of multiple inheritance and you have non-trivial hierarchies, you don't need to understand the C3 algorithm, and you can easily skip this paper.
class Base: def __init__(self): print("Base"); super().__init__()
class A(Base): def __init__(self): print("A"); super().__init__()
class B(Base): def __init__(self): print("B"); super().__init__()
class C(A, B): def __init__(self): print("C"); super().__init__()
C()
# 输出:
# C
# A
# B
# Base
上例为典型的菱形继承方式,使用 super 可以按照 MRO 顺序依次调用 __init__ 函数一次。
class VerifySnakeCase:
# cls 是 Animal, name 是 "animal", kwargs 是 {}
def __init_subclass__(cls, name, **kwargs):
print(cls, name, kwargs)
super().__init_subclass__(**kwargs) # 注意 object.__init_subclass__ 实际上只能接收 0 个参数
cls.name = name
not_camel_case = set()
for ele in cls.__dict__:
if cls._not_snake_case(ele) and ele not in not_camel_case:
not_camel_case.add(ele)
if not_camel_case:
raise ValueError(f'The following members are not in snake case: {", ".join(not_camel_case)}')
@classmethod
def _not_snake_case(cls, txt):
return txt.lower() != txt
class Animal(VerifySnakeCase, name="animal"):
def __init__(self, a, b):
self.a = a
self.b = b
def eat_method(self):
print('This animal can eat.')
def sleep_method(self):
print('This animal can sleep.')
Dog = type("Dog", (VerifySnakeCase,), {}, name="dog") # 此时可以使用第 4 个参数
class MetaA(type):
def __new__(cls, name, bases, dct):
print('MetaA.__new__ begin')
t = type(name, bases, dct)
print('MetaA.__new__ end', t)
return t
def __init__(cls, name, bases, dct):
print('MetaA.__init__')
class A(object, metaclass=MetaA): pass
"""
MetaA.__new__ begin
MetaA.__new__ end <class '__main__.A'>
"""
class MetaA(type):
def __new__(cls, name, bases, dct):
print('MetaA.__new__ begin')
t = type.__new__(cls, name, bases, dct)
print('MetaA.__new__ end', t)
return t
def __init__(cls, name, bases, dct):
print('MetaA.__init__')
class A(object, metaclass=MetaA): pass
"""
MetaA.__new__ begin
MetaA.__new__ end <class '__main__.A'>
MetaA.__init__
"""
object.__new__ 函数与 object.__init__ 函数
以下是一个代码样例:
class A(object):
def __init__(self, *args, **kwargs):
print("run the init of A")
def __new__(cls, *args, **kwargs):
print(f"run the new of A, parameters: {cls}")
return object.__new__(B)
class B(object):
def __init__(self, *args, **kwargs):
print("run the init of B")
print(f"extra parameters for __init__: {args}, {kwargs}")
print("id in __init__", id(args[0]), args, id(kwargs))
self.args = args
self.kwargs = kwargs
def __new__(cls, *args, **kwargs):
print("run the new of B", cls)
print(f"extra parameters for __new__: {args}, {kwargs}")
print("id in __new__ start", id(args[0]), args, id(kwargs))
args[0]["b"] = 3 # 如果直接用 args = ({"a": 2, "b": 3},) 是没有效果的
print("id in __new__ after", id(args[0]), args, id(kwargs))
return object.__new__(cls) # object.__new__ 只能有一个参数
a = A() # 只调用了 A.__new__ 就结束了
print(type(a)) # <class '__main__.B'>
print("===============")
b = B({"a": 2}, c = 2)
# 执行逻辑: __new__ 的 cls 参数自动用 B 填充. 伪代码猜测如下
# def _construct_guess(*args, **kwargs):
# ret = B.__new__(B, *args, **kwargs)
# if isinstance(ret, B):
# B.__init__(ret, *args, **kwargs)
# return ret
# 实参传递如下
# b = B.__new__(B, args=({"a": 2},), kwargs={"c": 2})
# B.__init__(b, args=({"a": 2, "b": 3},), kwargs={"c": 2})
print(type(b), b.args, b.kwargs)
输出结果
run the new of A, parameters: <class '__main__.A'>
<class '__main__.B'>
===============
run the new of B <class '__main__.B'>
extra parameters for __new__: ({'a': 2},), {'c': 2}
id in __new__ start 139811860204800 ({'a': 2},) 139811860203200
id in __new__ after 139811860204800 ({'a': 2, 'b': 3},) 139811860203200
run the init of B
extra parameters for __init__: ({'a': 2, 'b': 3},), {'c': 2}
id in __init__ 139811860204800 ({'a': 2, 'b': 3},) 139811860203200
<class '__main__.B'> ({'a': 2, 'b': 3},) {'c': 2}
abc 模块
最佳实践
from abc import abstractmethod, ABCMeta, ABC
# class Model(metaclass=ABCMeta):
class Model(ABC):
@abstractmethod
def foo(self):
"""This method foos the model."""
用法如下:
from abc import ABCMeta, abstractmethod
class Base(metaclass=ABCMeta):
@abstractmethod
def foo(self):
print("foo")
@abstractmethod
def bar(self):
pass
class A(Base):
def foo(self):
print("A foo")
def bar(self):
print("A bar")
a = A()
super(A, a).foo()
a.foo()
a.bar()
# 1) without using with statement
file = open('file_path', 'w')
file.write('hello world !')
file.close()
# 2) without using with statement
file = open('file_path', 'w')
try:
file.write('hello world')
finally:
file.close()
# 3) using with statement
with open('file_path', 'w') as file:
file.write('hello world !')
Typical usage:
@contextmanager
def some_generator(<arguments>):
<setup>
try:
yield <value>
finally:
<cleanup>
This makes this:
with some_generator(<arguments>) as <variable>:
<body>
equivalent to this:
<setup>
try:
<variable> = <value>
<body>
finally:
<cleanup>
5.4 "复合"with语句
with open(in_path) as fr, open(out_path, "w") as fw:
pass
from contextlib import ExitStack
import csv
def rel2logic(in_path, logic_dir):
"""将关系表转为逻辑客关系表形式
Example:
>>> rel2logic("./python_logical/tests/all_relations.tsv", "./python_logical/tests/gen")
"""
with ExitStack() as stack:
fr = csv.DictReader(stack.enter_context(open(in_path, encoding="utf-8")), delimiter="\t")
fws = {}
for row in fr:
start_type, end_type = row["start_type"], row["end_type"]
start_id, end_id, relation = row["start_id"], row["end_id"], row["relation"]
key = start_type + "-" + end_type + ".tsv"
if key not in fws:
out_path = os.path.join(logic_dir, key)
fw = stack.enter_context(open(out_path, "w", encoding="utf-8"))
fws[key] = csv.writer(fw, delimiter="\t", lineterminator="\n")
fws[key].writerow([start_type, end_type, "relation"])
fws[key].writerow([start_id, end_id, relation])
6. for else语法
# 获取[1, n]中的所有素数
for n in range(2, 10):
for x in range(2, n):
if n % x == 0:
print( n, 'equals', x, '*', n/x)
break
else:
# loop fell through without finding a factor
print(n, 'is a prime number')
# 来源于Cython文档里的例子
def foo(a, b, /, c, d=3, *args, e=5, f, **kwargs): pass
for name, p in inspect.signature(foo).parameters.items():
print(name, p.kind.__str__())
# 打印结果
# a POSITIONAL_ONLY
# b POSITIONAL_ONLY
# c POSITIONAL_OR_KEYWORD
# d POSITIONAL_OR_KEYWORD
# args VAR_POSITIONAL
# e KEYWORD_ONLY
# f KEYWORD_ONLY
# kwargs VAR_KEYWORD
# 绝对导入
from aa import bb
from aa.bb import C
import aa.bb # aa.bb 必须为一个module/namespace package/package
# 相对导入:必须以点开头,且只有from ... import ...这一种写法
from . import aa
from ..aa.bb import cc
# import .aa # 无此语法
from ... import ... 语法详解
下面分别对上述导入语句作解析:
from aa import bb
导入成功只能为三种情况
aa 是一个不带 __init__.py 的文件夹(namespace package)。
bb 是一个 bb.py 文件。则可以直接使用 bb,但不能使用 aa 以及 aa.bb。注意,此时
# models.py
a = 1
# load_detr.py
import torch
model = torch.hub.load('facebookresearch/detr:main', 'detr_resnet50', pretrained=False)
from models import a
print(a)
from models.backbone import Backbone, Joiner
from models.detr import DETR, PostProcess
def detr_resnet50(...)
导致当前目录下的 models 无法被重新导入
修改策略(未必万无一失):
import torch
model = torch.hub.load('facebookresearch/detr:main', 'detr_resnet50', pretrained=False)
import sys
sys.modules.pop("models")
from models import a
10. Python buid-in fuction and operation
Truth Value Testing
任何对象都可以进行 Truth Value Testing(真值测试),即用于 bool(x) 或 if 或 while 语句,具体测试流程为,首先查找该对象是否有 __bool__ 方法,若存在,则返回 bool(x) 的结果。然后再查找是否有 __len__ 方法,若存在,则返回 len(x)!=0 的结果。若上述两个方法都不存在,则返回 True。
运算优先级:非bool运算 > not > and > or,所以 not a == b 等价于 not (a == b)
注意这三个运算符的准确含义如下:
not bool(a) # not a
a and b # a if bool(a)==False else b
a or b # a if bool(a)==True else b
12 and 13 # 13
23 or False # 23
delattr function and del operation
delattr(x, "foo") # 等价于 del x.foo
11. Python 内存管理与垃圾回收(待补充)
12. 怎么运行 Python 脚本
主要有:
python xx/yy.py
python -m xx.yy
import
runpy
importlib
exec
13. 迭代器与生成器
class A:
def __iter__(self):
for i in range(10):
yield i
a = A() # a是一个可迭代对象(Iterable)
iter(a) # 返回的是一个生成器(特殊的迭代器)
file_name = "techcrunch.csv"
lines = (line for line in open(file_name))
list_line = (s.rstrip().split(",") for s in lines)
cols = next(list_line)
company_dicts = (dict(zip(cols, data)) for data in list_line)
funding = (
int(company_dict["raisedAmt"])
for company_dict in company_dicts
if company_dict["round"] == "a"
)
total_series_a = sum(funding)
print(f"Total series A fundraising: ${total_series_a}")
generator 高级用法: send, throw, close
generator 还有着三个方法 send, throw, close.
send
def jumping_range(up_to):
index = 0
while index < up_to:
jump = yield index
if jump is None:
jump = 1
index += jump
if __name__ == '__main__':
iterator = jumping_range(5)
print(next(iterator)) # 0
print(iterator.send(2)) # 2
print(next(iterator)) # 3
print(iterator.send(-1)) # 2
for x in iterator:
print(x) # 3, 4
def list_gen():
data = [1, 2, 3]
for x in data:
print("x", x)
yield x
it = list_gen()
next(it)
it.close() # 之后再度调用 next(it) 时会触发 StopIteration, 因此后面的 for 不会打印内容
for i in it:
print("i", i)
throw
def list_gen():
data = [1, 2, 3]
for x in data:
print("x", x)
try:
yield x
except ValueError as err:
print(err)
it = list_gen()
next(it) # 打印内容如下
# x: 1
it.throw(ValueError("stop")) # 打印内容如下, 注意不完全等同于 send(ValueError("stop"))
# x: 2
# stop
next(it)
# x: 3
next(it)
# 触发 StopIteration
def writer():
"""A coroutine that writes data *sent* to it to fd, socket, etc."""
while True:
w = (yield)
print('>> ', w)
def writer_wrapper(coro):
yield from coro
# for i in coro:
# yield i
w = writer()
wrap = writer_wrapper(w)
wrap.send(None) # "prime" the coroutine
for i in range(4):
wrap.send(i) # 注意这里是对 wrap 调用 send, 如果改成对 w 调用 send, 那么在这个例子中, yield from 和 for 都能得到一样的结果, 然而通常情况下我们没有办法拿到 w 这个变量, 而只能对 wrap 进行操作, 所以 yield from 实际上相当于建立了这里的 send 到 w 的隧道
执行结果
>> 0
>> 1
>> 2
>> 3
如果不使用 yield from, 那么执行结果将是:
>> None
>> None
>> None
>> None
引用上面这个问答的理解:
What yield from does is it establishes a transparent bidirectional connection between the caller and the sub-generator
简单来说优先顺序就是: local scope, enclosing scope, global scope, buildin scope.
var = 100 # A global variable
def increment():
print(var) # UnboundLocalError: local variable 'var' referenced before assignment
var = 200
If a name binding operation occurs anywhere within a code block, all uses of the name within the block are treated as references to the current block. This can lead to errors when a name is used within a block before it is bound. This rule is subtle. Python lacks declarations and allows name binding operations to occur anywhere within a code block. The local variables of a code block can be determined by scanning the entire text of the block for name binding operations. See the FAQ entry on UnboundLocalError for examples.
这里是解释器先看了整个 code block, 即先看了 print(var) 之后的 var=200 这条语句, 认为 var 应该是一个 local variable, 所以在真正执行时按从上到下, 在执行 print(var) 时发现局部变量 var 没有被定义, 引发报错
var = 100 # A global variable
def increment():
var = 2 # OK, local variable
If a name is bound in a block, it is a local variable of that block, unless declared as nonlocal or global. If a name is bound at the module level, it is a global variable. (The variables of the module code block are local and global.) If a variable is used in a code block but not defined there, it is a free variable.
这里的 outer_func 被称为 inner_func 的 enclosing function, 而 inner_func 被称为 outer_func 的 inner function (nested function). 从 inner_func 的视角看, who 变量是 free variable, 从 outer_func 的视角看, who 变量是 local variable
17. Closure
Operationally, a closure is a record storing a function together with an environment. The environment is a mapping associating each free variable of the function (variables that are used locally, but defined in an enclosing scope) with the value or reference to which the name was bound when the closure was created.
注意这里的 free variable, used locally, enclosing scope 都是站在 inner function 的视角来看待的, 简单来说:
closure 包含 inner function 和它的 free variable
closure 在被调用时的特点如下:
Unlike a plain function, a closure allows the function to access those captured variables through the closure's copies of their values or references, even when the function is invoked outside their scope.
def generate_power(exponent): # `generate_power` is enclosing function (higher-order function, closure factory function, outer function)
def power(base): # `power` is inner function (nested function)
return base ** exponent
return power # Return a closure
raise_two = generate_power(2) # `generate_power(2)` is specific closure
raise_three = generate_power(3) # `generate_power(3)` is specific closure
raise_two(4) # 16
raise_two(5) # 25
raise_three(4) # 64
raise_three(5) # 125
for cell in raise_two.__closure__:
print(cell.cell_contents)
Objects such as modules and instances have an updateable __dict__ attribute; however, other objects may have write restrictions on their __dict__ attributes (for example, classes use a to prevent direct dictionary updates).
Without an argument, vars() acts like . Note, the locals dictionary is only useful for reads since updates to the locals dictionary are ignored.
A TypeError exception is raised if an object is specified but it doesn’t have a __dict__ attribute (for example, if its class defines the attribute).
If the object does not provide , the function tries its best to gather information from the object’s attribute, if defined, and from its type object. The resulting list is not necessarily complete, and may be inaccurate when the object has a custom .
The default mechanism behaves differently with different types of objects, as it attempts to produce the most relevant, rather than complete, information:
分为两组, 第一组是删除对象, 参考
以及 pytorch 的 torch.nn.Module 的 __setattr__ 的写法。
If a is an instance of , then the binding super(B, obj).m() searches obj.__class__.__mro__ for the base class A immediately preceding B and then invokes the descriptor with the call: A.__dict__['m'].__get__(obj, obj.__class__).
The statement is syntactic sugar for this function
——https://docs.python.org/3/library/importlib.html
其函数定义为():
An implementation of the built-in function.
Note: Programmatic importing of modules should use instead of this function.
而内置函数 __import__ 的定义为():
This function is invoked by the statement. It can be replaced (by importing the module and assigning to builtins.__import__) in order to change semantics of the import statement, but doing so is strongly discouraged as it is usually simpler to use import hooks (see ) to attain the same goals and does not cause issues with code which assumes the default import implementation is in use. Direct use of is also discouraged in favor of .
import 语法:
importlib.__import__ 函数:
__importlib__ 内置函数:
import 语句与 __import__ 内置函数的对应关系可以参见。
怎样完全删除一个已经被导入的包,似乎做不到,参考
参考资料:
主要参考(翻译)自:
的最后有一个使用迭代器推导式求一个大型 csv 文件某列和的代码, 适用于大文件, 很值得体会:
参考资料:
例子参考:
python 中还有一个关键字 yield from, 虽然在简单场景下, yield from it 似乎跟 for i in it: yield i 没太大区别, 但实际上, 在 send, close, throw 方法上, 还是有区别的, 参考这个, 这里仅举一例: