深入探究Python:字符串JOIN
作者:互联网
2.字符串join
#源码
a="dddddddddd"
b="sssssssssss"
a.join(b)
#字节码(实际上也就是调用unicode的method中的join函数)
0 LOAD_NAME 0 (a)
2 LOAD_METHOD 1 (join)
4 LOAD_NAME 2 (b)
6 CALL_METHOD 1
8 RETURN_VALUE
//实际上也就是调用这个函数,有删减
PyObject *
PyUnicode_Join(PyObject *separator, PyObject *seq)
{
PyObject *res;
PyObject *fseq;
Py_ssize_t seqlen;
PyObject **items;
fseq = PySequence_Fast(seq, "can only join an iterable");
items = PySequence_Fast_ITEMS(fseq);
seqlen = PySequence_Fast_GET_SIZE(fseq);//join中参数的size
res = _PyUnicode_JoinArray(separator, items, seqlen);
Py_DECREF(fseq);
return res;
}
PyObject *
PySequence_Fast(PyObject *v, const char *m)
{
PyObject *it;
//NULL报错
if (v == NULL) {
return null_error();
}
//List和Tuple类型直接返回
if (PyList_CheckExact(v) || PyTuple_CheckExact(v)) {
Py_INCREF(v);
return v;
}
//调用对象方法,转换为迭代器
it = PyObject_GetIter(v);
if (it == NULL) {
if (PyErr_ExceptionMatches(PyExc_TypeError))
PyErr_SetString(PyExc_TypeError, m);
return NULL;
}
//转换为List类型
v = PySequence_List(it);
Py_DECREF(it);
return v;
}
//有删减
PyObject *
_PyUnicode_JoinArray(PyObject *separator, PyObject *const *items, Py_ssize_t seqlen)
{
PyObject *res = NULL; /* the result */
PyObject *sep = NULL;
Py_ssize_t seplen;
PyObject *item;
Py_ssize_t sz, i, res_offset;
Py_UCS4 maxchar;
Py_UCS4 item_maxchar;
int use_memcpy;
unsigned char *res_data = NULL, *sep_data = NULL;
PyObject *last_obj;
unsigned int kind = 0;
last_obj = NULL;
//Join参数长度为1,不用处理,直接返回
if (seqlen == 1) {
if (PyUnicode_CheckExact(items[0])) {
res = items[0];
return res;
}
seplen = 0;
maxchar = 0;
}
else {
//分隔符为NULL,设置为' ',并设置长度
if (separator == NULL) {
/* fall back to a blank space separator */
sep = PyUnicode_FromOrdinal(' ');
if (!sep)
goto one rror;
seplen = 1;
maxchar = 32;
}
else {
sep = separator; //分隔符
seplen = PyUnicode_GET_LENGTH(separator); //分隔符长度
maxchar = PyUnicode_MAX_CHAR_VALUE(separator); //编码长度
}
last_obj = sep;
}
/* There are at least two things to join, or else we have a subclass
* of str in the sequence.
* Do a pre-pass to figure out the total amount of space we'll
* need (sz), and see whether all argument are strings.
*/
sz = 0;
#ifdef Py_DEBUG
use_memcpy = 0;
#else
use_memcpy = 1;
#endif
for (i = 0; i < seqlen; i++) {
size_t add_sz;
item = items[i]; //List或Tuple中的单个索引对象
//必须是Unicode
if (!PyUnicode_Check(separator)) {
PyErr_Format(PyExc_TypeError,
"separator: expected str instance,"
" %.80s found",
Py_TYPE(separator)->tp_name);
goto one rror;
}
add_sz = PyUnicode_GET_LENGTH(item); //索引对象
item_maxchar = PyUnicode_MAX_CHAR_VALUE(item); //对象长度
maxchar = Py_MAX(maxchar, item_maxchar); //对象编码,获取最大的编码类型
if (i != 0) {
add_sz += seplen;
}
//字符串最大长度
if (add_sz > (size_t)(PY_SSIZE_T_MAX - sz)) {
PyErr_SetString(PyExc_OverflowError,
"join() result is too long for a Python string");
goto one rror;
}
//统计总长
sz += add_sz;
if (use_memcpy && last_obj != NULL) {
if (PyUnicode_KIND(last_obj) != PyUnicode_KIND(item))
use_memcpy = 0;
}
last_obj = item;
}
//创建一个对应长度和最大编码的新对象
res = PyUnicode_New(sz, maxchar);
if (res == NULL)
goto one rror;
#ifdef Py_DEBUG
use_memcpy = 0;
#else
//使用memcpy函数
if (use_memcpy) {
res_data = PyUnicode_1BYTE_DATA(res);
kind = PyUnicode_KIND(res);
if (seplen != 0)
sep_data = PyUnicode_1BYTE_DATA(sep);
}
#endif
//使用memcpy函数
if (use_memcpy) {
for (i = 0; i < seqlen; ++i) {
Py_ssize_t itemlen;
item = items[i];
/* Copy item, and maybe the separator. */
//分隔符
if (i && seplen != 0) {
memcpy(res_data,
sep_data,
kind * seplen);
res_data += kind * seplen;
}
itemlen = PyUnicode_GET_LENGTH(item);
//索引对象
if (itemlen != 0) {
memcpy(res_data,
PyUnicode_DATA(item),
kind * itemlen);
res_data += kind * itemlen;
}
}
}
//使用_PyUnicode_FastCopyCharacters函数
else {
for (i = 0, res_offset = 0; i < seqlen; ++i) {
Py_ssize_t itemlen;
item = items[i];
/* Copy item, and maybe the separator. */
//分隔符
if (i && seplen != 0) {
_PyUnicode_FastCopyCharacters(res, res_offset, sep, 0, seplen);
res_offset += seplen;
}
itemlen = PyUnicode_GET_LENGTH(item);
//索引对象
if (itemlen != 0) {
_PyUnicode_FastCopyCharacters(res, res_offset, item, 0, itemlen);
res_offset += itemlen;
}
}
}
}
标签:PyUnicode,JOIN,item,Python,res,Py,PyObject,探究,NULL 来源: https://blog.csdn.net/qq_33913982/article/details/104761611