Chris@87: #ifndef Py_DICTOBJECT_H Chris@87: #define Py_DICTOBJECT_H Chris@87: #ifdef __cplusplus Chris@87: extern "C" { Chris@87: #endif Chris@87: Chris@87: Chris@87: /* Dictionary object type -- mapping from hashable object to object */ Chris@87: Chris@87: /* The distribution includes a separate file, Objects/dictnotes.txt, Chris@87: describing explorations into dictionary design and optimization. Chris@87: It covers typical dictionary use patterns, the parameters for Chris@87: tuning dictionaries, and several ideas for possible optimizations. Chris@87: */ Chris@87: Chris@87: /* Chris@87: There are three kinds of slots in the table: Chris@87: Chris@87: 1. Unused. me_key == me_value == NULL Chris@87: Does not hold an active (key, value) pair now and never did. Unused can Chris@87: transition to Active upon key insertion. This is the only case in which Chris@87: me_key is NULL, and is each slot's initial state. Chris@87: Chris@87: 2. Active. me_key != NULL and me_key != dummy and me_value != NULL Chris@87: Holds an active (key, value) pair. Active can transition to Dummy upon Chris@87: key deletion. This is the only case in which me_value != NULL. Chris@87: Chris@87: 3. Dummy. me_key == dummy and me_value == NULL Chris@87: Previously held an active (key, value) pair, but that was deleted and an Chris@87: active pair has not yet overwritten the slot. Dummy can transition to Chris@87: Active upon key insertion. Dummy slots cannot be made Unused again Chris@87: (cannot have me_key set to NULL), else the probe sequence in case of Chris@87: collision would have no way to know they were once active. Chris@87: Chris@87: Note: .popitem() abuses the me_hash field of an Unused or Dummy slot to Chris@87: hold a search finger. The me_hash field of Unused or Dummy slots has no Chris@87: meaning otherwise. Chris@87: */ Chris@87: Chris@87: /* PyDict_MINSIZE is the minimum size of a dictionary. This many slots are Chris@87: * allocated directly in the dict object (in the ma_smalltable member). Chris@87: * It must be a power of 2, and at least 4. 8 allows dicts with no more Chris@87: * than 5 active entries to live in ma_smalltable (and so avoid an Chris@87: * additional malloc); instrumentation suggested this suffices for the Chris@87: * majority of dicts (consisting mostly of usually-small instance dicts and Chris@87: * usually-small dicts created to pass keyword arguments). Chris@87: */ Chris@87: #define PyDict_MINSIZE 8 Chris@87: Chris@87: typedef struct { Chris@87: /* Cached hash code of me_key. Note that hash codes are C longs. Chris@87: * We have to use Py_ssize_t instead because dict_popitem() abuses Chris@87: * me_hash to hold a search finger. Chris@87: */ Chris@87: Py_ssize_t me_hash; Chris@87: PyObject *me_key; Chris@87: PyObject *me_value; Chris@87: } PyDictEntry; Chris@87: Chris@87: /* Chris@87: To ensure the lookup algorithm terminates, there must be at least one Unused Chris@87: slot (NULL key) in the table. Chris@87: The value ma_fill is the number of non-NULL keys (sum of Active and Dummy); Chris@87: ma_used is the number of non-NULL, non-dummy keys (== the number of non-NULL Chris@87: values == the number of Active items). Chris@87: To avoid slowing down lookups on a near-full table, we resize the table when Chris@87: it's two-thirds full. Chris@87: */ Chris@87: typedef struct _dictobject PyDictObject; Chris@87: struct _dictobject { Chris@87: PyObject_HEAD Chris@87: Py_ssize_t ma_fill; /* # Active + # Dummy */ Chris@87: Py_ssize_t ma_used; /* # Active */ Chris@87: Chris@87: /* The table contains ma_mask + 1 slots, and that's a power of 2. Chris@87: * We store the mask instead of the size because the mask is more Chris@87: * frequently needed. Chris@87: */ Chris@87: Py_ssize_t ma_mask; Chris@87: Chris@87: /* ma_table points to ma_smalltable for small tables, else to Chris@87: * additional malloc'ed memory. ma_table is never NULL! This rule Chris@87: * saves repeated runtime null-tests in the workhorse getitem and Chris@87: * setitem calls. Chris@87: */ Chris@87: PyDictEntry *ma_table; Chris@87: PyDictEntry *(*ma_lookup)(PyDictObject *mp, PyObject *key, long hash); Chris@87: PyDictEntry ma_smalltable[PyDict_MINSIZE]; Chris@87: }; Chris@87: Chris@87: PyAPI_DATA(PyTypeObject) PyDict_Type; Chris@87: PyAPI_DATA(PyTypeObject) PyDictIterKey_Type; Chris@87: PyAPI_DATA(PyTypeObject) PyDictIterValue_Type; Chris@87: PyAPI_DATA(PyTypeObject) PyDictIterItem_Type; Chris@87: PyAPI_DATA(PyTypeObject) PyDictKeys_Type; Chris@87: PyAPI_DATA(PyTypeObject) PyDictItems_Type; Chris@87: PyAPI_DATA(PyTypeObject) PyDictValues_Type; Chris@87: Chris@87: #define PyDict_Check(op) \ Chris@87: PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_DICT_SUBCLASS) Chris@87: #define PyDict_CheckExact(op) (Py_TYPE(op) == &PyDict_Type) Chris@87: #define PyDictKeys_Check(op) (Py_TYPE(op) == &PyDictKeys_Type) Chris@87: #define PyDictItems_Check(op) (Py_TYPE(op) == &PyDictItems_Type) Chris@87: #define PyDictValues_Check(op) (Py_TYPE(op) == &PyDictValues_Type) Chris@87: /* This excludes Values, since they are not sets. */ Chris@87: # define PyDictViewSet_Check(op) \ Chris@87: (PyDictKeys_Check(op) || PyDictItems_Check(op)) Chris@87: Chris@87: PyAPI_FUNC(PyObject *) PyDict_New(void); Chris@87: PyAPI_FUNC(PyObject *) PyDict_GetItem(PyObject *mp, PyObject *key); Chris@87: PyAPI_FUNC(int) PyDict_SetItem(PyObject *mp, PyObject *key, PyObject *item); Chris@87: PyAPI_FUNC(int) PyDict_DelItem(PyObject *mp, PyObject *key); Chris@87: PyAPI_FUNC(void) PyDict_Clear(PyObject *mp); Chris@87: PyAPI_FUNC(int) PyDict_Next( Chris@87: PyObject *mp, Py_ssize_t *pos, PyObject **key, PyObject **value); Chris@87: PyAPI_FUNC(int) _PyDict_Next( Chris@87: PyObject *mp, Py_ssize_t *pos, PyObject **key, PyObject **value, long *hash); Chris@87: PyAPI_FUNC(PyObject *) PyDict_Keys(PyObject *mp); Chris@87: PyAPI_FUNC(PyObject *) PyDict_Values(PyObject *mp); Chris@87: PyAPI_FUNC(PyObject *) PyDict_Items(PyObject *mp); Chris@87: PyAPI_FUNC(Py_ssize_t) PyDict_Size(PyObject *mp); Chris@87: PyAPI_FUNC(PyObject *) PyDict_Copy(PyObject *mp); Chris@87: PyAPI_FUNC(int) PyDict_Contains(PyObject *mp, PyObject *key); Chris@87: PyAPI_FUNC(int) _PyDict_Contains(PyObject *mp, PyObject *key, long hash); Chris@87: PyAPI_FUNC(PyObject *) _PyDict_NewPresized(Py_ssize_t minused); Chris@87: PyAPI_FUNC(void) _PyDict_MaybeUntrack(PyObject *mp); Chris@87: Chris@87: /* PyDict_Update(mp, other) is equivalent to PyDict_Merge(mp, other, 1). */ Chris@87: PyAPI_FUNC(int) PyDict_Update(PyObject *mp, PyObject *other); Chris@87: Chris@87: /* PyDict_Merge updates/merges from a mapping object (an object that Chris@87: supports PyMapping_Keys() and PyObject_GetItem()). If override is true, Chris@87: the last occurrence of a key wins, else the first. The Python Chris@87: dict.update(other) is equivalent to PyDict_Merge(dict, other, 1). Chris@87: */ Chris@87: PyAPI_FUNC(int) PyDict_Merge(PyObject *mp, Chris@87: PyObject *other, Chris@87: int override); Chris@87: Chris@87: /* PyDict_MergeFromSeq2 updates/merges from an iterable object producing Chris@87: iterable objects of length 2. If override is true, the last occurrence Chris@87: of a key wins, else the first. The Python dict constructor dict(seq2) Chris@87: is equivalent to dict={}; PyDict_MergeFromSeq(dict, seq2, 1). Chris@87: */ Chris@87: PyAPI_FUNC(int) PyDict_MergeFromSeq2(PyObject *d, Chris@87: PyObject *seq2, Chris@87: int override); Chris@87: Chris@87: PyAPI_FUNC(PyObject *) PyDict_GetItemString(PyObject *dp, const char *key); Chris@87: PyAPI_FUNC(int) PyDict_SetItemString(PyObject *dp, const char *key, PyObject *item); Chris@87: PyAPI_FUNC(int) PyDict_DelItemString(PyObject *dp, const char *key); Chris@87: Chris@87: #ifdef __cplusplus Chris@87: } Chris@87: #endif Chris@87: #endif /* !Py_DICTOBJECT_H */