关于strlen
strlen的實現是通過4個字節4個字節進行枚舉,然后通過位運算來判斷這4個字節中是否有一個字節含有0,這樣的話,效率就提高了4倍。
這個效率提高是假設a&b&c&d與a&b有差不多效率的前提下。
那用8字節8字節來偏移的話,是不是更快呢?32位機上不會,64位機上會提高一倍。因為a&b在64位下會提高一倍,因為32位的寄存器大小是32位的,對于分別MOV高位與低位兩次。
本來實驗a&b&c&d與a&b的速度的,經實驗驗證,這兩個效率確實是差不多的,然后去看匯編,看指令條數,在沒有使用-O優化下,指令的條數差別跟運算符號的個數的倍數相同,就讓我感到疑惑了。
下面附上實驗的代碼:
#include <iostream> #include <time.h> #include <cstdio> #include <string> using namespace std;int _strlen(const char *str) {const unsigned int *p = (const unsigned int *) str;unsigned int low = 0x01010101;unsigned int high = 0x80808080;while (true) {unsigned int d = *p++;if (((d - low) & ~d & high) != 0) { // handle [0...256)//if (((d - low) & high) != 0) { // handle [0...128)break;}}const char *q = (const char *)(p - 1);for (int i = 0; i < (int)sizeof(unsigned int); i++) {if (q[i] == 0) {return q - str + i;}}return -1; }int _strlen2(const char *str) {const char *p = str;while (*p != 0) {p++;}return p - str; }int _strlen3(const char *str) {const unsigned long long *p = (const unsigned long long *) str;unsigned long long low = 0x0101010101010101;unsigned long long high = 0x8080808080808080;while (true) {unsigned long long d = *p++;if (((d - low) & ~d & high) != 0) { // handle [0...256)//if (((d - low) & high) != 0) { // handle [0...128)break;}}const char *q = (const char *)(p - 1);for (int i = 0; i < (int)sizeof(unsigned long long); i++) {if (q[i] == 0) {return q - str + i;}}return -1; }size_t _strlen4(const char *str) {const char *char_ptr;const unsigned long int *longword_ptr;unsigned long int longword, himagic, lomagic;/* Handle the first few characters by reading one character at a time.Do this until CHAR_PTR is aligned on a longword boundary. */for (char_ptr = str; ((unsigned long int) char_ptr& (sizeof (longword) - 1)) != 0;++char_ptr)if (*char_ptr == '\0')return char_ptr - str;/* All these elucidatory comments refer to 4-byte longwords,but the theory applies equally well to 8-byte longwords. */longword_ptr = (unsigned long int *) char_ptr;/* Bits 31, 24, 16, and 8 of this number are zero. Call these bitsthe "holes." Note that there is a hole just to the left ofeach byte, with an extra at the end:bits: 01111110 11111110 11111110 11111111bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDDThe 1-bits make sure that carries propagate to the next 0-bit.The 0-bits provide holes for carries to fall into. */himagic = 0x80808080L;lomagic = 0x01010101L;if (sizeof (longword) > 4){/* 64-bit version of the magic. *//* Do the shift in two steps to avoid a warning if long has 32 bits. */himagic = ((himagic << 16) << 16) | himagic;lomagic = ((lomagic << 16) << 16) | lomagic;}/*jif (sizeof (longword) > 8)abort ();*//* Instead of the traditional loop which tests each character,we will test a longword at a time. The tricky part is testingif *any of the four* bytes in the longword in question are zero. */for (;;){longword = *longword_ptr++;if (((longword - lomagic) & ~longword & himagic) != 0){/* Which of the bytes was the zero? If none of them were, it wasa misfire; continue the search. */const char *cp = (const char *) (longword_ptr - 1);if (cp[0] == 0)return cp - str;if (cp[1] == 0)return cp - str + 1;if (cp[2] == 0)return cp - str + 2;if (cp[3] == 0)return cp - str + 3;if (sizeof (longword) > 4){if (cp[4] == 0)return cp - str + 4;if (cp[5] == 0)return cp - str + 5;if (cp[6] == 0)return cp - str + 6;if (cp[7] == 0)return cp - str + 7;}}} }string gen_data() {string a;for (int i = 0; i < 100000; i++) {a.push_back('a');}return a; }double get_run_time(int(*fp)(const char *), const char *str, int count) {clock_t start = clock();for (int i = 0; i < count; i++) {fp(str);}clock_t end = clock();return (double)(end - start) / CLOCKS_PER_SEC; }double get_run_time(size_t(*fp)(const char *), const char *str, int count) {clock_t start = clock();for (int i = 0; i < count; i++) {fp(str);}clock_t end = clock();return (double)(end - start) / CLOCKS_PER_SEC; }int main() {string a = gen_data(); printf("%d\n", _strlen(a.c_str()));printf("%d\n", _strlen2(a.c_str()));printf("%d\n", _strlen3(a.c_str()));printf("%d\n", (int)strlen(a.c_str()));double time = get_run_time(&_strlen, a.c_str(), 10000);printf("%f\n", time);double time2 = get_run_time(&_strlen2, a.c_str(), 10000);printf("%f\n", time2);double time3 = get_run_time(&_strlen3, a.c_str(), 10000);printf("%f\n", time3);double time4 = get_run_time(&strlen, a.c_str(), 10000);printf("%f\n", time4);double time5 = get_run_time(&_strlen4, a.c_str(), 10000);printf("%f\n", time5); }?
?
轉載于:https://www.cnblogs.com/litstrong/p/3329194.html
總結
- 上一篇: NuGet的本地服务器安装与Packag
- 下一篇: iOS 汉字转拼音 PinYin4Obj