抑郁症健康,内容丰富有趣,生活中的好帮手!
抑郁症健康 > 结合redis设计与实现的redis源码学习-2-SDS(简单动态字符串)

结合redis设计与实现的redis源码学习-2-SDS(简单动态字符串)

时间:2022-06-26 12:41:22

相关推荐

上一次我们学习了redis的内存分配方式,今天我们来学习redis最基本的数据结构SDS,在redis的数据库里,包含字符产值的简直对在底层都是由SDS实现的。

SDS的基本数据结构是sdshdr结构体:

struct sdshdr{int len; //记录数组中已经使用的字节的数量,等于所保存的字符串长度int free; //记录buf数组中未使用字节的数量char buf[]; //字节数组,用于保存字符串};

使用这种结构体来存储字符串有以下好处:

1、可以直接获取字符串长度;

2、在使用时杜绝缓冲区溢出,因为free表示剩余可用空间;

3、二进制安全,字节数组不一定非要\0结尾,len记录使用长度;

4、可以使用部分string.h库中的函数。

这几个特性结合redis的空间预分配和惰性空间释放两种优化策略将速度提升至最大。

我发现SDS中的结构体与之前使用的有所差异,在3.2中为了更好的节省内存,使用多种结构体不同的类型来记录数据,并用一个字节的三个位来表示结构体类型,如果大量使用短小字符串的话,节省下来的内存也是比较可观的。

下面上代码:

/* SDSLib 2.0 -- A C dynamic strings library*/#ifndef __SDS_H#define __SDS_H#define SDS_MAX_PREALLOC (1024*1024)#include <sys/types.h>#include <stdarg.h>#include <stdint.h>typedef char *sds;/* Note: sdshdr5 is never used, we just access the flags byte directly.* However is here to document the layout of type 5 SDS strings. */struct __attribute__ ((__packed__)) sdshdr5 {unsigned char flags; /* 3 lsb of type, and 5 msb of string length */char buf[];};struct __attribute__ ((__packed__)) sdshdr8 {uint8_t len; /* used */uint8_t alloc; /* excluding the header and null terminator */unsigned char flags; /* 3 lsb of type, 5 unused bits */char buf[];};struct __attribute__ ((__packed__)) sdshdr16 {uint16_t len; /* used */uint16_t alloc; /* excluding the header and null terminator */unsigned char flags; /* 3 lsb of type, 5 unused bits */char buf[];};struct __attribute__ ((__packed__)) sdshdr32 {uint32_t len; /* used */uint32_t alloc; /* excluding the header and null terminator */unsigned char flags; /* 3 lsb of type, 5 unused bits */char buf[];};struct __attribute__ ((__packed__)) sdshdr64 {uint64_t len; /* used */uint64_t alloc; /* excluding the header and null terminator */unsigned char flags; /* 3 lsb of type, 5 unused bits */char buf[];};//这里使用宏定义和inline来判断使用哪种类型的结构体,并返回对应结构体所需要的数据#define SDS_TYPE_5 0#define SDS_TYPE_8 1#define SDS_TYPE_16 2#define SDS_TYPE_32 3#define SDS_TYPE_64 4#define SDS_TYPE_MASK 7#define SDS_TYPE_BITS 3#define SDS_HDR_VAR(T,s) struct sdshdr##T *sh = (void*)((s)-(sizeof(struct sdshdr##T))); //获得对应结构体的名称#define SDS_HDR(T,s) ((struct sdshdr##T *)((s)-(sizeof(struct sdshdr##T))))#define SDS_TYPE_5_LEN(f) ((f)>>SDS_TYPE_BITS)//返回对应结构体的len数据static inline size_t sdslen(const sds s) {unsigned char flags = s[-1];switch(flags&SDS_TYPE_MASK) {case SDS_TYPE_5:return SDS_TYPE_5_LEN(flags);case SDS_TYPE_8:return SDS_HDR(8,s)->len;case SDS_TYPE_16:return SDS_HDR(16,s)->len;case SDS_TYPE_32:return SDS_HDR(32,s)->len;case SDS_TYPE_64:return SDS_HDR(64,s)->len;}return 0;}//返回对应结构体的可用大小static inline size_t sdsavail(const sds s) {unsigned char flags = s[-1];switch(flags&SDS_TYPE_MASK) {case SDS_TYPE_5: {return 0;}case SDS_TYPE_8: {SDS_HDR_VAR(8,s);return sh->alloc - sh->len;}case SDS_TYPE_16: {SDS_HDR_VAR(16,s);return sh->alloc - sh->len;}case SDS_TYPE_32: {SDS_HDR_VAR(32,s);return sh->alloc - sh->len;}case SDS_TYPE_64: {SDS_HDR_VAR(64,s);return sh->alloc - sh->len;}}return 0;}//设置sds使用的新长度static inline void sdssetlen(sds s, size_t newlen) {unsigned char flags = s[-1];switch(flags&SDS_TYPE_MASK) {case SDS_TYPE_5:{unsigned char *fp = ((unsigned char*)s)-1;*fp = SDS_TYPE_5 | (newlen << SDS_TYPE_BITS);}break;case SDS_TYPE_8:SDS_HDR(8,s)->len = newlen;break;case SDS_TYPE_16:SDS_HDR(16,s)->len = newlen;break;case SDS_TYPE_32:SDS_HDR(32,s)->len = newlen;break;case SDS_TYPE_64:SDS_HDR(64,s)->len = newlen;break;}}//设置sds增长了多少长度static inline void sdsinclen(sds s, size_t inc) {unsigned char flags = s[-1];switch(flags&SDS_TYPE_MASK) {case SDS_TYPE_5:{unsigned char *fp = ((unsigned char*)s)-1;unsigned char newlen = SDS_TYPE_5_LEN(flags)+inc;*fp = SDS_TYPE_5 | (newlen << SDS_TYPE_BITS);}break;case SDS_TYPE_8:SDS_HDR(8,s)->len += inc;break;case SDS_TYPE_16:SDS_HDR(16,s)->len += inc;break;case SDS_TYPE_32:SDS_HDR(32,s)->len += inc;break;case SDS_TYPE_64:SDS_HDR(64,s)->len += inc;break;}}/* sdsalloc() = sdsavail() + sdslen() */static inline size_t sdsalloc(const sds s) {unsigned char flags = s[-1];switch(flags&SDS_TYPE_MASK) {case SDS_TYPE_5:return SDS_TYPE_5_LEN(flags);case SDS_TYPE_8:return SDS_HDR(8,s)->alloc;case SDS_TYPE_16:return SDS_HDR(16,s)->alloc;case SDS_TYPE_32:return SDS_HDR(32,s)->alloc;case SDS_TYPE_64:return SDS_HDR(64,s)->alloc;}return 0;}static inline void sdssetalloc(sds s, size_t newlen) {unsigned char flags = s[-1];switch(flags&SDS_TYPE_MASK) {case SDS_TYPE_5:/* Nothing to do, this type has no total allocation info. */break;case SDS_TYPE_8:SDS_HDR(8,s)->alloc = newlen;break;case SDS_TYPE_16:SDS_HDR(16,s)->alloc = newlen;break;case SDS_TYPE_32:SDS_HDR(32,s)->alloc = newlen;break;case SDS_TYPE_64:SDS_HDR(64,s)->alloc = newlen;break;}}sds sdsnewlen(const void *init, size_t initlen);sds sdsnew(const char *init);sds sdsempty(void);sds sdsdup(const sds s);void sdsfree(sds s);sds sdsgrowzero(sds s, size_t len);sds sdscatlen(sds s, const void *t, size_t len);sds sdscat(sds s, const char *t);sds sdscatsds(sds s, const sds t);sds sdscpylen(sds s, const char *t, size_t len);sds sdscpy(sds s, const char *t);sds sdscatvprintf(sds s, const char *fmt, va_list ap);sds sdscatfmt(sds s, char const *fmt, ...);sds sdstrim(sds s, const char *cset);void sdsrange(sds s, int start, int end);void sdsupdatelen(sds s);void sdsclear(sds s);int sdscmp(const sds s1, const sds s2);sds *sdssplitlen(const char *s, int len, const char *sep, int seplen, int *count);void sdsfreesplitres(sds *tokens, int count);void sdstolower(sds s);void sdstoupper(sds s);sds sdsfromlonglong(long long value);sds sdscatrepr(sds s, const char *p, size_t len);sds *sdssplitargs(const char *line, int *argc);sds sdsmapchars(sds s, const char *from, const char *to, size_t setlen);sds sdsjoin(char **argv, int argc, char *sep);sds sdsjoinsds(sds *argv, int argc, const char *sep, size_t seplen);/* Low level functions exposed to the user API */sds sdsMakeRoomFor(sds s, size_t addlen);void sdsIncrLen(sds s, int incr);sds sdsRemoveFreeSpace(sds s);size_t sdsAllocSize(sds s);void *sdsAllocPtr(sds s);/* Export the allocator used by SDS to the program using SDS.* Sometimes the program SDS is linked to, may use a different set of* allocators, but may want to allocate or free things that SDS will* respectively free or allocate. */void *sds_malloc(size_t size);void *sds_realloc(void *ptr, size_t size);void sds_free(void *ptr);#endif

在.c文件中,引用了sdsalloc.h,其中定义了zmalloc的相关函数。

#include "zmalloc.h"#define s_malloc zmalloc#define s_realloc zrealloc#define s_free zfree

c文件还挺大的,有1000多行,在此我选择了一些贴了上来,其中的内联函数使用方式同.h,用来选择使用的结构体类型:

#include <stdio.h>#include <stdlib.h>#include <string.h>#include <ctype.h>#include <assert.h>#include "sds.h"#include "sdsalloc.h"/* Create a new sds string with the content specified by the 'init' pointer * and 'initlen'.*/sds sdsnewlen(const void *init, size_t initlen) {void *sh;sds s;char type = sdsReqType(initlen);//如果是一个空字符串的话会使用8的结构体if (type == SDS_TYPE_5 && initlen == 0) type = SDS_TYPE_8;int hdrlen = sdsHdrSize(type);unsigned char *fp; /* flags pointer. */sh = s_malloc(hdrlen+initlen+1);if (!init)memset(sh, 0, hdrlen+initlen+1);if (sh == NULL) return NULL;s = (char*)sh+hdrlen;fp = ((unsigned char*)s)-1; //根据结构体在内存中的排列,-1位类型flagswitch(type) {case SDS_TYPE_5: {*fp = type | (initlen << SDS_TYPE_BITS);break;}case SDS_TYPE_8: {SDS_HDR_VAR(8,s);sh->len = initlen;sh->alloc = initlen;*fp = type;break;}case SDS_TYPE_16: {SDS_HDR_VAR(16,s);sh->len = initlen;sh->alloc = initlen;*fp = type;break;}case SDS_TYPE_32: {SDS_HDR_VAR(32,s);sh->len = initlen;sh->alloc = initlen;*fp = type;break;}case SDS_TYPE_64: {SDS_HDR_VAR(64,s);sh->len = initlen;sh->alloc = initlen;*fp = type;break;}}if (initlen && init)memcpy(s, init, initlen);s[initlen] = '\0';return s;}/* Create an empty (zero length) sds string. Even in this case the string * always has an implicit null term. */sds sdsempty(void) {return sdsnewlen("",0);}void sdsfree(sds s) {if (s == NULL) return;s_free((char*)s-sdsHdrSize(s[-1])); //这里要减去flag为的长度和len还有alloc类型所占的长度才到分配内存的起点,sdsHdrSize是根据flag来计算头长度的内联函数,这里我将它省略了}//清空sds,这里只改变sds的长度,然后将sds第一位变为\0简化了操作void sdsclear(sds s) {sdssetlen(s, 0);s[0] = '\0';}//给SDS增加长度sds sdsMakeRoomFor(sds s, size_t addlen) {void *sh, *newsh;size_t avail = sdsavail(s); //获取可用大小size_t len, newlen;char type, oldtype = s[-1] & SDS_TYPE_MASK;int hdrlen;/* Return ASAP if there is enough space left. */if (avail >= addlen) return s;len = sdslen(s); //返回sds分配的长度sh = (char*)s-sdsHdrSize(oldtype);newlen = (len+addlen);if (newlen < SDS_MAX_PREALLOC) //这里的分配原则是最终小于max则直接翻倍,大于则+上max大小newlen *= 2;elsenewlen += SDS_MAX_PREALLOC;type = sdsReqType(newlen);/* Don't use type 5: the user is appending to the string and type 5 is * not able to remember empty space, so sdsMakeRoomFor() must be called* at every appending operation. */if (type == SDS_TYPE_5) type = SDS_TYPE_8;hdrlen = sdsHdrSize(type);if (oldtype==type) {newsh = s_realloc(sh, hdrlen+newlen+1);if (newsh == NULL) return NULL;s = (char*)newsh+hdrlen;} else {/* Since the header size changes, need to move the string forward,* and can't use realloc */newsh = s_malloc(hdrlen+newlen+1);if (newsh == NULL) return NULL;memcpy((char*)newsh+hdrlen, s, len+1);s_free(sh);s = (char*)newsh+hdrlen;s[-1] = type;sdssetlen(s, len);}sdssetalloc(s, newlen);return s;}//释放SDS多余的空间sds sdsRemoveFreeSpace(sds s) {void *sh, *newsh;char type, oldtype = s[-1] & SDS_TYPE_MASK;int hdrlen;size_t len = sdslen(s);sh = (char*)s-sdsHdrSize(oldtype);type = sdsReqType(len);hdrlen = sdsHdrSize(type);if (oldtype==type) {newsh = s_realloc(sh, hdrlen+len+1); //我认为这里不需要操作,新的结构体等于老的结构体,那么就不会改变大小,这里可能是防止意外的在哪里改变吧if (newsh == NULL) return NULL;s = (char*)newsh+hdrlen;} else {newsh = s_malloc(hdrlen+len+1);//这里是创建一个新的SDS然后将数据拷贝过去if (newsh == NULL) return NULL;memcpy((char*)newsh+hdrlen, s, len+1);s_free(sh);s = (char*)newsh+hdrlen;s[-1] = type;sdssetlen(s, len);}sdssetalloc(s, len);return s;}sds sdscatlen(sds s, const void *t, size_t len) {size_t curlen = sdslen(s);//这里总是先检查长度扩大大小,再进行拷贝,保证不会溢出s = sdsMakeRoomFor(s,len);if (s == NULL) return NULL;memcpy(s+curlen, t, len);sdssetlen(s, curlen+len);s[curlen+len] = '\0';return s;}//使用更为简单的函数作为中转,方便使用sds sdscat(sds s, const char *t) {return sdscatlen(s, t, strlen(t));}#define SDS_LLSTR_SIZE 21//使用SDS存储LL类型的数据int sdsll2str(char *s, long long value) {char *p, aux;unsigned long long v;size_t l;/* Generate the string representation, this method produces* an reversed string. */v = (value < 0) ? -value : value;p = s;do {*p++ = '0'+(v%10); //计算每一10进制位的值v /= 10;} while(v);if (value < 0) *p++ = '-';/* Compute length and add null term. */l = p-s;*p = '\0';/* Reverse the string. */p--;while(s < p) {aux = *s;*s = *p;*p = aux;s++;p--;}return l;}//这个函数使用不定参数,可以追加多个字符串/* Like sdscatprintf() but gets va_list instead of being variadic. */sds sdscatvprintf(sds s, const char *fmt, va_list ap) {va_list cpy;char staticbuf[1024], *buf = staticbuf, *t;size_t buflen = strlen(fmt)*2;/* We try to start using a static buffer for speed.* If not possible we revert to heap allocation. */if (buflen > sizeof(staticbuf)) {buf = s_malloc(buflen);if (buf == NULL) return NULL;} else {buflen = sizeof(staticbuf);}/* Try with buffers two times bigger every time we fail to * fit the string in the current buffer size. */while(1) {buf[buflen-2] = '\0';va_copy(cpy,ap);vsnprintf(buf, buflen, fmt, cpy);va_end(cpy);if (buf[buflen-2] != '\0') { //这里只要不为/0就是溢出了,代表大小不够,只能使用堆空间,但是这里可能会多次释放开辟对空间if (buf != staticbuf) s_free(buf);buflen *= 2;buf = s_malloc(buflen);if (buf == NULL) return NULL;continue;}break;}/* Finally concat the obtained string to the SDS string and return it. */t = sdscat(s, buf);if (buf != staticbuf) s_free(buf);return t;}//这里将不定参数变为va_listsds sdscatprintf(sds s, const char *fmt, ...) {va_list ap;char *t;va_start(ap, fmt);t = sdscatvprintf(s,fmt,ap);va_end(ap);return t;}//这个函数用来转换多种类型的数据,说这个比上面的快。。。。/* This function is similar to sdscatprintf, but much faster as it does* not rely on sprintf() family functions implemented by the libc that* are often very slow. Moreover directly handling the sds string as* new data is concatenated provides a performance improvement. * However this function only handles an incompatible subset of printf-alike* format specifiers:* %s - C String* %S - SDS string* %i - signed int* %I - 64 bit signed integer (long long, int64_t)* %u - unsigned int* %U - 64 bit unsigned integer (unsigned long long, uint64_t)* %% - Verbatim "%" character.*///这里学到了,C中不定参数的类型存储表示格式还是用格式化数据的%来定义sds sdscatfmt(sds s, char const *fmt, ...) {size_t initlen = sdslen(s);const char *f = fmt;int i;va_list ap;va_start(ap,fmt);f = fmt; /* Next format specifier byte to process. */i = initlen; /* Position of the next byte to write to dest str. */while(*f) {char next, *str;size_t l;long long num;unsigned long long unum;//这里的意思是总会至少扩大一个字节,感觉怪怪的,/* Make sure there is always space for at least 1 char. */if (sdsavail(s)==0) {s = sdsMakeRoomFor(s,1);}//判断不定参数类型,并进行处理switch(*f) {case '%':next = *(f+1);f++;switch(next) {case 's':case 'S':str = va_arg(ap,char*);l = (next == 's') ? strlen(str) : sdslen(str);if (sdsavail(s) < l) {s = sdsMakeRoomFor(s,l);}memcpy(s+i,str,l);sdsinclen(s,l);i += l;break;case 'i':case 'I':if (next == 'i')num = va_arg(ap,int);elsenum = va_arg(ap,long long);{char buf[SDS_LLSTR_SIZE];l = sdsll2str(buf,num);if (sdsavail(s) < l) {s = sdsMakeRoomFor(s,l);}memcpy(s+i,buf,l);sdsinclen(s,l);i += l;}break;case 'u':case 'U':if (next == 'u')unum = va_arg(ap,unsigned int);elseunum = va_arg(ap,unsigned long long);{char buf[SDS_LLSTR_SIZE];l = sdsull2str(buf,unum);if (sdsavail(s) < l) {s = sdsMakeRoomFor(s,l);}memcpy(s+i,buf,l);sdsinclen(s,l);i += l;}break;default: /* Handle %% and generally %<unknown>. */s[i++] = next;sdsinclen(s,1);break;}break;default:s[i++] = *f;sdsinclen(s,1);break;}f++;}va_end(ap);/* Add null-term */s[i] = '\0';return s;}//截断,找到前后第一次没有出现所需字符的地方sds sdstrim(sds s, const char *cset) {char *start, *end, *sp, *ep;size_t len;sp = start = s;ep = end = s+sdslen(s)-1;while(sp <= end && strchr(cset, *sp)) sp++;while(ep > sp && strchr(cset, *ep)) ep--;len = (sp > ep) ? 0 : ((ep-sp)+1);if (s != sp) memmove(s, sp, len);s[len] = '\0';sdssetlen(s,len);return s;}//选择起点和访问SDS的元素void sdsrange(sds s, int start, int end) {size_t newlen, len = sdslen(s);if (len == 0) return;if (start < 0) {start = len+start;if (start < 0) start = 0;}if (end < 0) {end = len+end;if (end < 0) end = 0;}newlen = (start > end) ? 0 : (end-start)+1;if (newlen != 0) {if (start >= (signed)len) {newlen = 0;} else if (end >= (signed)len) {end = len-1;newlen = (start > end) ? 0 : (end-start)+1;}} else {start = 0;}if (start && newlen) memmove(s, s+start, newlen);s[newlen] = 0;sdssetlen(s,newlen);}//对比两个SDS,这里不是只返回是否相同,而且返回值告诉了我们是哪种情况int sdscmp(const sds s1, const sds s2) {size_t l1, l2, minlen;int cmp;l1 = sdslen(s1);l2 = sdslen(s2);minlen = (l1 < l2) ? l1 : l2;cmp = memcmp(s1,s2,minlen);if (cmp == 0) return l1-l2;return cmp;}//这里将一个字符串分割成多个SDS,以数组的形式返回,以传出参数count返回长度sds *sdssplitlen(const char *s, int len, const char *sep, int seplen, int *count) {int elements = 0, slots = 5, start = 0, j;sds *tokens;if (seplen < 1 || len < 0) return NULL;tokens = s_malloc(sizeof(sds)*slots);if (tokens == NULL) return NULL;if (len == 0) {*count = 0;return tokens;}for (j = 0; j < (len-(seplen-1)); j++) {/* make sure there is room for the next element and the final one */if (slots < elements+2) {sds *newtokens;slots *= 2;newtokens = s_realloc(tokens,sizeof(sds)*slots);if (newtokens == NULL) goto cleanup;tokens = newtokens;}/* search the separator */if ((seplen == 1 && *(s+j) == sep[0]) || (memcmp(s+j,sep,seplen) == 0)) {tokens[elements] = sdsnewlen(s+start,j-start);if (tokens[elements] == NULL) goto cleanup;elements++;start = j+seplen;j = j+seplen-1; /* skip the separator */}}/* Add the final element. We are sure there is room in the tokens array. */tokens[elements] = sdsnewlen(s+start,len-start);if (tokens[elements] == NULL) goto cleanup;elements++;*count = elements;return tokens;cleanup:{int i;for (i = 0; i < elements; i++) sdsfree(tokens[i]);s_free(tokens);*count = 0;return NULL;}}/* Free the result returned by sdssplitlen(), or do nothing if 'tokens' is NULL. */void sdsfreesplitres(sds *tokens, int count) {if (!tokens) return;while(count--)sdsfree(tokens[count]);s_free(tokens);}//按顺序替换掉字符,比如讲hello,中的ho替换为ll,长度为2。变成lelll。sds sdsmapchars(sds s, const char *from, const char *to, size_t setlen) {size_t j, i, l = sdslen(s);for (j = 0; j < l; j++) {for (i = 0; i < setlen; i++) {if (s[j] == from[i]) {s[j] = to[i];break;}}}return s;}void *sds_malloc(size_t size) { return s_malloc(size); }void *sds_realloc(void *ptr, size_t size) { return s_realloc(ptr,size); }void sds_free(void *ptr) { s_free(ptr); }

看完了3.2的SDS代码后,发现该版本和redis设计与实现上写的是有一些差异的,代码量变大了,但是考虑到了更多种的情况,在实际运行过程中的性能会有一些提升,最大的改变就是使用多种结构体来存储不同的SDS,用不同类型来存储长度。

真长啊,代码,还有很多,继续加油!

如果觉得《结合redis设计与实现的redis源码学习-2-SDS(简单动态字符串)》对你有帮助,请点赞、收藏,并留下你的观点哦!

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。