首页 > 编程语言> > cJSON源码分析（二）

cJSON源码分析（二）

2021-05-07 21:57:27 作者：互联网

在构建好一个JSON对象之后，如何访问呢？

首先试着将json字符串序列化，并全部打印出来看下结构再说：

char * string = "{\"name\":\"xxx\", \"name2\":\"xxx2\"}";cJSON * root = cJSON_Parse(string);//json字符串序列化printf("%s\n", cJSON_Print(root));//json格式化输出

在这里插入图片描述
看源码了解一下cJSON_Parse函数大致实现过程吧

//用来格式化json字符串所需要的缓存空间地带typedef struct{
    const unsigned char *content;//格式化的内容
    size_t length;//内容长度
    size_t offset;//偏移量
    size_t depth; /* How deeply nested (in arrays/objects) is the input at the current offset. 
    当前偏移量处的输入嵌套深度（在数组/对象中）有多深*/
    internal_hooks hooks;//内存分配} parse_buffer;/* check if the given size is left to read in a given parse buffer (starting with 1) 
检查给定的大小是否留在给定的解析缓冲区中读取（从1开始）
这里应该是通过给定buffer读取内存范围是否比buffer实际占据的范围小，保证不超出空间读取*/#define can_read(buffer, size) ((buffer != NULL) && (((buffer)->offset + size) length))/* check if the buffer can be accessed at the given index (starting with 0) 
检查是否可以在给定索引处访问缓冲区（从0开始）*/#define can_access_at_index(buffer, index) ((buffer != NULL) && (((buffer)->offset + index) < (buffer)->length))#define cannot_access_at_index(buffer, index) (!can_access_at_index(buffer, index))/* get a pointer to the buffer at the position 
获取指向位置处缓冲区的指针*/#define buffer_at_offset(buffer) ((buffer)->content + (buffer)->offset)/* Default options for cJSON_Parse 
cJSON_Parse的默认选项*/CJSON_PUBLIC(cJSON *) cJSON_Parse(const char *value){
    //传入json字符串，调用cJSON_ParseWithOpts进行序列化
    return cJSON_ParseWithOpts(value, 0, 0);//0为假，1为真
    //第二个参数为0，说明不需要获得字符串结尾的偏移量
    //第三个参数为0，说明解析字符串时不是以空白作为结尾，类似我们使用scanf读取一个带空格的字符串是，默认空格就是字符串的结尾。但是在这里，空格不是结尾
    }CJSON_PUBLIC(cJSON *) cJSON_ParseWithOpts(const char *value, const char **return_parse_end, cJSON_bool require_null_terminated){
    //return_parse_end  require_null_terminated为0，为空、为假
    size_t buffer_length;

    if (NULL == value)
    {
        return NULL;
    }

    /* Adding null character size due to require_null_terminated. 
    由require_null_terminated添加空字符大小*/
    buffer_length = strlen(value) + sizeof("");//统计json字符串长度，并在结尾加一个空字符串的长度，因为strlen统计长度是末尾的\0是不统计进去

    return cJSON_ParseWithLengthOpts(value, buffer_length, return_parse_end, require_null_terminated);}/* Parse an object - create a new root, and populate. 
解析一个对象-创建一个新的根，并填充*/CJSON_PUBLIC(cJSON *) cJSON_ParseWithLengthOpts(const char *value, size_t buffer_length, const char **return_parse_end, cJSON_bool require_null_terminated){
    parse_buffer buffer = { 0, 0, 0, 0, { 0, 0, 0 } };//首先都初始化为0
    cJSON *item = NULL;

    /* reset error position 复位错误位置*/
    //global_error是静态全集变量
    global_error.json = NULL;
    global_error.position = 0;

    if (value == NULL || 0 == buffer_length)
    {
        goto fail;
    }

    buffer.content = (const unsigned char*)value;
    buffer.length = buffer_length; 
    buffer.offset = 0;
    buffer.hooks = global_hooks;

    item = cJSON_New_Item(&global_hooks);//根据上面初始化后的buffer，构建一个json对象（具体实现前面的文章已有介绍）
    if (item == NULL) /* memory fail */
    {
        goto fail;//构建失败
    }

    /*
        总之parse_value主要是解析各种类型的数据，比如在json字符串里面的时候，有 "num":123, 那么123应该作为整形数据存储
        如果是 "num":"123"，那么"123"应该作为字符串类型数据(char *)进行存储
    */
    if (!parse_value(item, buffer_skip_whitespace(skip_utf8_bom(&buffer))))
    {
        /* parse failure. ep is set. */
        goto fail;//格式化失败
    }

    /* if we require null-terminated JSON without appended garbage, skip and then check for a null terminator 
    如果我们需要不附加垃圾的以空结尾的JSON，请跳过，然后检查空结尾符*/
    if (require_null_terminated)
    {
        buffer_skip_whitespace(&buffer);
        if ((buffer.offset >= buffer.length) || buffer_at_offset(&buffer)[0] != '\0')
        {
            goto fail;
        }
    }
    if (return_parse_end)
    {
        *return_parse_end = (const char*)buffer_at_offset(&buffer);
    }

    return item;fail:
    if (item != NULL)
    {
        cJSON_Delete(item);//前面的文章已有介绍过
    }

    if (value != NULL)
    {
        error local_error;
        local_error.json = (const unsigned char*)value;
        local_error.position = 0;

        if (buffer.offset < buffer.length)
        {
            local_error.position = buffer.offset;
        }
        else if (buffer.length > 0)
        {
            local_error.position = buffer.length - 1;
        }

        if (return_parse_end != NULL)
        {
            *return_parse_end = (const char*)local_error.json + local_error.position;
        }

        global_error = local_error;
    }

    return NULL;}/* Parser core - when encountering text, process appropriately. 
解析器核心-遇到文本时，进行适当的处理*/static cJSON_bool parse_value(cJSON * const item, parse_buffer * const input_buffer){
    if ((input_buffer == NULL) || (input_buffer->content == NULL))
    {
        return false; /* no input */
    }

    /* parse the different types of values 解析不同类型的值*/
    /* null */
    if (can_read(input_buffer, 4) && (strncmp((const char*)buffer_at_offset(input_buffer), "null", 4) == 0))
    {
        item->type = cJSON_NULL;
        input_buffer->offset += 4;
        return true;
    }
    /* false */
    if (can_read(input_buffer, 5) && (strncmp((const char*)buffer_at_offset(input_buffer), "false", 5) == 0))
    {
        item->type = cJSON_False;
        input_buffer->offset += 5;
        return true;
    }
    /* true */
    if (can_read(input_buffer, 4) && (strncmp((const char*)buffer_at_offset(input_buffer), "true", 4) == 0))
    {
        item->type = cJSON_True;
        item->valueint = 1;
        input_buffer->offset += 4;
        return true;
    }
    /* string */
    if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == '\"'))
    {
        return parse_string(item, input_buffer);//解析字符串
    }
    /* number */
    if (can_access_at_index(input_buffer, 0) && ((buffer_at_offset(input_buffer)[0] == '-') || ((buffer_at_offset(input_buffer)[0] >= '0') && (buffer_at_offset(input_buffer)[0] <= '9'))))
    {
        return parse_number(item, input_buffer);//解析数字，原理同parse_string类似
    }
    /* array */
    if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == '['))
    {
        return parse_array(item, input_buffer);//解析数组，原理同parse_string类似
    }
    /* object */
    if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == '{'))
    {
        return parse_object(item, input_buffer);
    }

    return false;}/* Parse the input text into an unescaped cinput, and populate item. 
将输入文本解析为未转义的 cinput，并填充项
这里就是开始序列化json字符串了
*/static cJSON_bool parse_string(cJSON * const item, parse_buffer * const input_buffer){
    // 获得缓存正确的读取空间范围，防治越界
    const unsigned char *input_pointer = buffer_at_offset(input_buffer) + 1;
    const unsigned char *input_end = buffer_at_offset(input_buffer) + 1;
    unsigned char *output_pointer = NULL;
    unsigned char *output = NULL;

    /* not a string */
    if (buffer_at_offset(input_buffer)[0] != '\"')//字符串没有"为开头，那么该json字符串就有问题
    {
        goto fail;
    }

    {
        /* calculate approximate size of the output (overestimate) 计算输出的近似大小（高估）*/
        size_t allocation_length = 0;
        size_t skipped_bytes = 0;
        while (((size_t)(input_end - input_buffer->content) < input_buffer->length) && (*input_end != '\"'))
        {
            /* is escape sequence 
            是转义序列*/
            if (input_end[0] == '\\')
            {
                if ((size_t)(input_end + 1 - input_buffer->content) >= input_buffer->length)
                {
                    /* prevent buffer overflow when last input character is a backslash 
                    当最后一个输入字符是反斜杠时防止缓冲区溢出*/
                    goto fail;
                }
                skipped_bytes++;
                input_end++;
            }
            input_end++;
        }
        if (((size_t)(input_end - input_buffer->content) >= input_buffer->length) || (*input_end != '\"'))
        {
            goto fail; /* string ended unexpectedly 字符串意外结束，就是说明明还符合循环条件但是却运行到这里来*/
        }

        /* This is at most how much we need for the output 
        这是我们最多需要多少产出*/
        allocation_length = (size_t) (input_end - buffer_at_offset(input_buffer)) - skipped_bytes;
        output = (unsigned char*)input_buffer->hooks.allocate(allocation_length + sizeof(""));//多加一个""的大小，应该是为了存下每个字符串末尾的\0,strlen()计算长度时不考虑末尾的'\0'
        if (output == NULL)
        {
            goto fail; /* allocation failure 分配失败*/
        }
    }

    output_pointer = output;
    /* loop through the string literal 
    循环字符串文本*/
    while (input_pointer < input_end)
    {
        if (*input_pointer != '\\')
        {
            *output_pointer++ = *input_pointer++;
        }
        /* escape sequence 转义序列*/
        else
        {
            unsigned char sequence_length = 2;
            if ((input_end - input_pointer) < 1)
            {
                goto fail;
            }

            switch (input_pointer[1])
            {
                case 'b':
                    *output_pointer++ = '\b';
                    break;
                case 'f':
                    *output_pointer++ = '\f';
                    break;
                case 'n':
                    *output_pointer++ = '\n';
                    break;
                case 'r':
                    *output_pointer++ = '\r';
                    break;
                case 't':
                    *output_pointer++ = '\t';
                    break;
                case '\"':
                case '\\':
                case '/':
                    *output_pointer++ = input_pointer[1];
                    break;

                /* UTF-16 literal UTF-16文字*/
                case 'u':
                    sequence_length = utf16_literal_to_utf8(input_pointer, input_end, &output_pointer);
                    if (sequence_length == 0)
                    {
                        /* failed to convert UTF16-literal to UTF-8 无法将UTF16文本转换为UTF-8*/
                        goto fail;
                    }
                    break;

                default:
                    goto fail;
            }
            input_pointer += sequence_length;
        }
    }

    /* zero terminate the output 零终止输出*/
    *output_pointer = '\0';

    item->type = cJSON_String;
    item->valuestring = (char*)output;

    input_buffer->offset = (size_t) (input_end - input_buffer->content);
    input_buffer->offset++;

    return true;//序列化成功fail:
    if (output != NULL)
    {
        input_buffer->hooks.deallocate(output);
    }

    if (input_pointer != NULL)
    {
        input_buffer->offset = (size_t)(input_pointer - input_buffer->content);
    }

    return false;//序列化失败}/* skip the UTF-8 BOM (byte order mark) if it is at the beginning of a buffer 
如果UTF-8bom（字节顺序标记）位于缓冲区的开头，则跳过它*/static parse_buffer *skip_utf8_bom(parse_buffer * const buffer){
    if ((buffer == NULL) || (buffer->content == NULL) || (buffer->offset != 0))
    {
        return NULL;
    }

    if (can_access_at_index(buffer, 4) && (strncmp((const char*)buffer_at_offset(buffer), "\xEF\xBB\xBF", 3) == 0))
    {
        buffer->offset += 3;
    }

    return buffer;}

序列化json字符串确实繁琐，其主要花费时间在缓冲区边界界定和数据类型转换上。
大致了解一下工作流程，函数调用顺序大致如下（主要功能）：

cJSON_Parse   ==>    cJSON_ParseWithOpts  ==>   cJSON_ParseWithLengthOpts ==>parse_value和cJSON_New_Item

标签：分析,return,cJSON,parse,buffer,源码,offset,input
来源： https://blog.51cto.com/u_14175378/2759885