Python源码剖析(四)字符串对象
bilibili視頻講解:https://space.bilibili.com/431392724
b站用戶名:平凡的久月
1. PyBytesObject
變長對象(數(shù)據(jù)長度在定義時是不知道的,只能在創(chuàng)建時才能確定)
不可變對象(改變值內(nèi)存地址會發(fā)生改變)
1.1 定義
// Include/bytesobject.h #ifndef Py_LIMITED_API typedef struct {PyObject_VAR_HEADPy_hash_t ob_shash;char ob_sval[1];/* Invariants:* ob_sval contains space for 'ob_size+1' elements.* ob_sval[ob_size] == 0.* ob_shash is the hash of the string or -1 if not computed yet.*/ } PyBytesObject; #endif// python3 中不再使用 PyBytesObject 作為 String 類的底層實現(xiàn) #define PyObject_VAR_HEAD PyVarObject ob_base;typedef struct {PyObject ob_base;Py_ssize_t ob_size; /* Number of items in variable part */ } PyVarObject;typedef struct _object {_PyObject_HEAD_EXTRAPy_ssize_t ob_refcnt;struct _typeobject *ob_type; } PyObject;// 等價于下列表達 typedef struct {Py_hash_t ob_shash; // 緩存該對象的hash值,避免重新計算(初始值-1),dict中詳細解釋作用// PyBytesObject內(nèi)部維護的字符串必須以'\0'結(jié)尾char ob_sval[1]; // 字符指針,指向ob_size+1個字節(jié)的內(nèi)存'\0'Py_ssize_t ob_size;Py_ssize_t ob_refcnt;struct _typeobject *ob_type; } PyBytesObject; #endif PyTypeObject PyBytes_Type = {PyVarObject_HEAD_INIT(&PyType_Type, 0)"bytes",PyBytesObject_SIZE, // ob_sizesizeof(char), // 一個字節(jié)bytes_dealloc, /* tp_dealloc */0, /* tp_print */0, /* tp_getattr */0, /* tp_setattr */0, /* tp_reserved */(reprfunc)bytes_repr, /* tp_repr */// 支持三種操作&bytes_as_number, /* tp_as_number */&bytes_as_sequence, /* tp_as_sequence */&bytes_as_mapping, /* tp_as_mapping */(hashfunc)bytes_hash, /* tp_hash */// ......0, /* tp_init */0, /* tp_alloc */bytes_new, /* tp_new */PyObject_Del, /* tp_free */ };1.2 創(chuàng)建PyBytesObject
Python提供了多種路徑從C中原生的字符串創(chuàng)建PyBytesObject對象
-
PyBytes_FromString
PyObject * PyBytes_FromString(const char *str) {size_t size;PyBytesObject *op;assert(str != NULL);size = strlen(str);// 判斷字符串長度是否超過系統(tǒng)尋址能力if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {PyErr_SetString(PyExc_OverflowError,"byte string is too long");return NULL;}// 處理空串:通過nullstring,始終只有一個if (size == 0 && (op = nullstring) != NULL) { #ifdef COUNT_ALLOCSnull_strings++; #endifPy_INCREF(op);return (PyObject *)op;}// 處理單個字符(字符串對象緩沖池)if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) { #ifdef COUNT_ALLOCSone_strings++; #endifPy_INCREF(op);return (PyObject *)op;}// 創(chuàng)建新的PyBytesObject對象,并初始化/* Inline PyObject_NewVar */op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);if (op == NULL)return PyErr_NoMemory();(void)PyObject_INIT_VAR(op, &PyBytes_Type, size);op->ob_shash = -1;memcpy(op->ob_sval, str, size+1);/* share short strings */if (size == 0) {nullstring = op;Py_INCREF(op);} else if (size == 1) {characters[*str & UCHAR_MAX] = op;Py_INCREF(op);}return (PyObject *) op; }
python3中沒有ob_sstate這個變量!!!
-
PyBytes_FromStringAndSize
PyObject * PyBytes_FromStringAndSize(const char *str, Py_ssize_t size) {PyBytesObject *op;if (size < 0) {PyErr_SetString(PyExc_SystemError,"Negative size passed to PyBytes_FromStringAndSize");return NULL;}// 單個字符if (size == 1 && str != NULL &&(op = characters[*str & UCHAR_MAX]) != NULL){ #ifdef COUNT_ALLOCSone_strings++; #endifPy_INCREF(op);return (PyObject *)op;}// 創(chuàng)建新的PyBytesObject對象,并初始化op = (PyBytesObject *)_PyBytes_FromSize(size, 0);if (op == NULL)return NULL;if (str == NULL)return (PyObject *) op;memcpy(op->ob_sval, str, size);/* share short strings */if (size == 1) {characters[*str & UCHAR_MAX] = op;Py_INCREF(op);}return (PyObject *) op; }
1.3 intern機制
1.3.1 Python2中
Python2中通過PyString_InternInPlace實現(xiàn)intern機制
檢查兩項內(nèi)容:
(1)是否是PyBytesObject?
(2)是否被intern機制處理過(保證只處理一次)
void PyString_InternInPlace(PyObject **p) {register PyStringObject *s = (PyStringObject *)(*p);PyObject *t;if (s == NULL || !PyString_Check(s))Py_FatalError("PyString_InternInPlace: strings only please!");/* If it's a string subclass, we don't really know what puttingit in the interned dict might do. */if (!PyString_CheckExact(s))return;if (PyString_CHECK_INTERNED(s))return;if (interned == NULL) {interned = PyDict_New();if (interned == NULL) {PyErr_Clear(); /* Don't leave an exception */return;}}t = PyDict_GetItem(interned, (PyObject *)s);if (t) {Py_INCREF(t);Py_SETREF(*p, t);return;}if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {PyErr_Clear();return;}/* The two references in interned are not counted by refcnt.The string deallocator will take care of this */Py_REFCNT(s) -= 2;PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL; }-
intern是什么?PyDictObject對象–interned
-
創(chuàng)建臨時變量a,在intern中尋找是否有一樣的對象。
- intern中的指針不作為a的有效引用(不然a永遠無法銷毀)
細節(jié)問題:
(1)為什么創(chuàng)建的時候interned的鍵與值都設(shè)置為對象的PyObject指針?
(2)為什么將對象的引用計數(shù)減2?
1.3.2 Python3中
Python3中做了修改,移動到了sys庫,編譯器默認執(zhí)行
from six.moves import intern from sys import intern str = "shanghai" print(intern(str).__doc__) // Modules/pyexpat.c static PyObject* string_intern(xmlparseobject *self, const char* str) {PyObject *result = conv_string_to_unicode(str);PyObject *value;/* result can be NULL if the unicode conversion failed. */if (!result)return result;if (!self->intern)return result;value = PyDict_GetItem(self->intern, result);if (!value) {if (PyDict_SetItem(self->intern, result, result) == 0)return result;elsereturn NULL;}Py_INCREF(value);Py_DECREF(result);return value; }1.4 字符串緩沖池
一個字節(jié)的字符對應(yīng)的對象緩沖池
static PyBytesObject *characters[UCHAR_MAX + 1]; static PyBytesObject *nullstring;實現(xiàn)過程
(1)創(chuàng)建PyBytesObject對象
(2)進行intern操作
(3)緩存進緩沖池
1.5 與效率相關(guān)的問題
背景:實現(xiàn)100個字符串的拼接
實現(xiàn)方法:+
問題:創(chuàng)建N-1個對象,進行N-1次內(nèi)存的申請與釋放
根本原因:不可變對象
解決方法:對存儲在list的一組對象進行連接操作(join)
一次申請N個對象使用的內(nèi)存,并統(tǒng)計這些對象維護的字符串有多長,然后申請內(nèi)存,最后拷貝到內(nèi)存空間。
a = 345 b = a c = 456 d = 456 print(a is b) print(a is c) print(c is d)e = "abc" f = "abc" g = "abd" print(e is f) print(e is g)# True # False # True # True # False總結(jié)
以上是生活随笔為你收集整理的Python源码剖析(四)字符串对象的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: java filter
- 下一篇: Android之PhotoView使用(