V2EX = way to explore
V2EX 是一个关于分享和探索的地方
现在注册
已注册用户请  登录
推荐学习书目
Learn Python the Hard Way
Python Sites
PyPI - Python Package Index
http://diveintopython.org/toc/index.html
Pocoo
值得关注的项目
PyPy
Celery
Jinja2
Read the Docs
gevent
pyenv
virtualenv
Stackless Python
Beautiful Soup
结巴中文分词
Green Unicorn
Sentry
Shovel
Pyflakes
pytest
Python 编程
pep8 Checker
Styles
PEP 8
Google Python Style Guide
Code Style from The Hitchhiker's Guide
jinya
V2EX  ›  Python

Python dict lookdict 函数分析

  •  
  •   jinya · 2017-03-25 18:54:09 +08:00 · 2251 次点击
    这是一个创建于 2560 天前的主题,其中的信息可能已经有所发展或是发生改变。

    代码里面有中文说明

    static PyDictEntry * lookdict(PyDictObject *mp, PyObject *key, register long hash) { register size_t i; register size_t perturb; register PyDictEntry *freeslot; register size_t mask = (size_t)mp->ma_mask; PyDictEntry *ep0 = mp->ma_table; register PyDictEntry *ep; register int cmp; PyObject *startkey;

    i = (size_t)hash & mask;
    ep = &ep0[i];
    if (ep->me_key == NULL || ep->me_key == key)
        return ep;
    
    if (ep->me_key == dummy)
        freeslot = ep;
    else {
        if (ep->me_hash == hash) {
            startkey = ep->me_key;
            Py_INCREF(startkey);
            cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
            Py_DECREF(startkey);
            if (cmp < 0)
                return NULL;
            if (ep0 == mp->ma_table && ep->me_key == startkey) {
                if (cmp > 0)
                    return ep;
            }
            else {
                /* The compare did major nasty stuff to the
                 * dict:  start over.
                 * XXX A clever adversary could prevent this
                 * XXX from terminating.
                 */
                return lookdict(mp, key, hash);  //此处逻辑甚是艰深,望大神指点
            }
        }
        freeslot = NULL;
    }
    
    /* In the loop, me_key == dummy is by far (factor of 100s) the
       least likely outcome, so test for that last. */
    for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
        i = (i << 2) + i + perturb + 1;
        ep = &ep0[i & mask];
        if (ep->me_key == NULL)
            return freeslot == NULL ? ep : freeslot;
        if (ep->me_key == key)
            return ep;
        if (ep->me_hash == hash && ep->me_key != dummy) {
            startkey = ep->me_key;
            Py_INCREF(startkey);
            cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
            Py_DECREF(startkey);
            if (cmp < 0)
                return NULL;
            if (ep0 == mp->ma_table && ep->me_key == startkey) {
                if (cmp > 0)
                    return ep;
            }
            else {
                /* The compare did major nasty stuff to the
                 * dict:  start over.
                 * XXX A clever adversary could prevent this
                 * XXX from terminating.
                 */
                return lookdict(mp, key, hash);
            }
        }
        else if (ep->me_key == dummy && freeslot == NULL)
            freeslot = ep;
    }
    assert(0);          /* NOT REACHED */
    return 0;
    

    }

    3 条回复    2017-03-27 22:40:57 +08:00
    XYxe
        1
    XYxe  
       2017-03-27 10:45:22 +08:00
    你可以看看“.\Lib\test\crashers\nasty_eq_vs_dict.py ”文件,以及这个 http://bugs.python.org/issue14205
    大概就是一个 dict 在遍历的时候被修改了。
    jinya
        2
    jinya  
    OP
       2017-03-27 21:33:34 +08:00
    哥们,可以找到 Python 之父这么设计的原因相关的报道吗,这么看起来太费劲了。
    jinya
        3
    jinya  
    OP
       2017-03-27 22:40:57 +08:00
    这是 python 1.5.2 版本的源码,冲突直接走后面的代码

    static dictentry *
    lookdict(mp, key, hash)
    dictobject *mp;
    PyObject *key;
    register long hash;
    {
    register int i;
    register unsigned incr;
    register dictentry *freeslot;
    register unsigned int mask = mp->ma_size-1;
    dictentry *ep0 = mp->ma_table;
    register dictentry *ep;
    /* We must come up with (i, incr) such that 0 <= i < ma_size
    and 0 < incr < ma_size and both are a function of hash */
    i = (~hash) & mask;
    /* We use ~hash instead of hash, as degenerate hash functions, such
    as for ints <sigh>, can have lots of leading zeros. It's not
    really a performance risk, but better safe than sorry. */
    ep = &ep0[i];
    if (ep->me_key == NULL || ep->me_key == key)
    return ep;
    if (ep->me_key == dummy)
    freeslot = ep;
    else {
    if (ep->me_hash == hash &&
    PyObject_Compare(ep->me_key, key) == 0) //不冲突
    {
    return ep;
    }
    freeslot = NULL; //冲突
    }
    /* XXX What if PyObject_Compare returned an exception? */
    /* Derive incr from hash, just to make it more arbitrary. Note that
    incr must not be 0, or we will get into an infinite loop.*/
    incr = (hash ^ ((unsigned long)hash >> 3)) & mask;
    if (!incr)
    incr = mask;
    for (;;) {
    ep = &ep0[(i+incr)&mask];
    if (ep->me_key == NULL) {
    if (freeslot != NULL)
    return freeslot;
    else
    return ep;
    }
    if (ep->me_key == dummy) {
    if (freeslot == NULL)
    freeslot = ep;
    }
    else if (ep->me_key == key ||
    (ep->me_hash == hash &&
    PyObject_Compare(ep->me_key, key) == 0)) {
    return ep;
    }
    /* XXX What if PyObject_Compare returned an exception? */
    /* Cycle through GF(2^n)-{0} */
    incr = incr << 1;
    if (incr > mask)
    incr ^= mp->ma_poly; /* This will implicitely clear
    the highest bit */
    }
    }
    关于   ·   帮助文档   ·   博客   ·   API   ·   FAQ   ·   我们的愿景   ·   实用小工具   ·   1017 人在线   最高记录 6543   ·     Select Language
    创意工作者们的社区
    World is powered by solitude
    VERSION: 3.9.8.5 · 627ms · UTC 19:31 · PVG 03:31 · LAX 12:31 · JFK 15:31
    Developed with CodeLauncher
    ♥ Do have faith in what you're doing.