'Overriding dict.update() method in subclass to prevent overwriting dict keys
Earlier today, I read the question "Raise error if python dict comprehension overwrites a key" and decided to try my hand at an answer. The method that naturally occurred to me was to subclass dict
for this. However, I got stuck on my answer, and now I'm obsessed with getting this worked out for myself.
Notes:
- No - I do not plan on turning in the answer to this question as an answer to the other question.
- This is purely an intellectual exercise for me at this point. As a practical matter, I would almost certainly use a
namedtuple
or a regular dictionary wherever I have a requirement for something like this.
My (not quite working) Solution:
class DuplicateKeyError(KeyError):
pass
class UniqueKeyDict(dict):
def __init__(self, *args, **kwargs):
self.update(*args, **kwargs)
def __setitem__(self, key, value):
if key in self: # Validate key doesn't already exist.
raise DuplicateKeyError('Key \'{}\' already exists with value \'{}\'.'.format(key, self[key]))
super().__setitem__(key, value)
def update(self, *args, **kwargs):
if args:
if len(args) > 1:
raise TypeError('Update expected at most 1 arg. Got {}.'.format(len(args)))
else:
try:
for k, v in args[0]:
self.__setitem__(k, v)
except ValueError:
pass
for k in kwargs:
self.__setitem__(k, kwargs[k])
My Tests and Expected Results
>>> ukd = UniqueKeyDict((k, int(v)) for k, v in ('a1', 'b2', 'c3', 'd4')) # Should succeed.
>>> ukd['e'] = 5 # Should succeed.
>>> print(ukd)
{'a': 1, 'b': 2, 'c': 3, d: 4, 'e': 5}
>>> ukd['a'] = 5 # Should fail.
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "<stdin>", line 8, in __setitem__
__main__.DuplicateKeyError: Key 'a' already exists with value '1'.
>>> ukd.update({'a': 5}) # Should fail.
>>> ukd = UniqueKeyDict((k, v) for k, v in ('a1', 'b2', 'c3', 'd4', 'a5')) # Should fail.
>>>
I'm certain the issue is in my update()
method, but I'm not able to determine just what I'm doing wrong.
Below is the original version of my update()
method. This version fails as expected on duplicates when calling my_dict.update({k: v})
for a key/value pair already in the dict, but does not fail when including a duplicate key while creating the original dict, due to the fact that converting the args to a dict
results in default behavior for a dictionary, i.e., overwriting the duplicate key.
def update(self, *args, **kwargs):
for k, v in dict(*args, **kwargs).items():
self.__setitem__(k, v)
Solution 1:[1]
Note that, per the documentation:
dict.update
takes a singleother
parameter, "either another dictionary object or an iterable of key/value pairs" (I've usedcollections.Mapping
to test for this) and "If keyword arguments are specified, the dictionary is then updated with those key/value pairs"; anddict()
takes a singleMapping
orIterable
along with optional**kwargs
(the same asupdate
accepts...).
This is not quite the interface you have implemented, which is leading to some issues. I would have implemented this as follows:
from collections import Mapping
class DuplicateKeyError(KeyError):
pass
class UniqueKeyDict(dict):
def __init__(self, other=None, **kwargs):
super().__init__()
self.update(other, **kwargs)
def __setitem__(self, key, value):
if key in self:
msg = 'key {!r} already exists with value {!r}'
raise DuplicateKeyError(msg.format(key, self[key]))
super().__setitem__(key, value)
def update(self, other=None, **kwargs):
if other is not None:
for k, v in other.items() if isinstance(other, Mapping) else other:
self[k] = v
for k, v in kwargs.items():
self[k] = v
In use:
>>> UniqueKeyDict((k, v) for k, v in ('a1', 'b2', 'c3', 'd4'))
{'c': '3', 'd': '4', 'a': '1', 'b': '2'}
>>> UniqueKeyDict((k, v) for k, v in ('a1', 'b2', 'c3', 'a4'))
Traceback (most recent call last):
File "<pyshell#8>", line 1, in <module>
UniqueKeyDict((k, v) for k, v in ('a1', 'b2', 'c3', 'a4'))
File "<pyshell#7>", line 5, in __init__
self.update(other, **kwargs)
File "<pyshell#7>", line 15, in update
self[k] = v
File "<pyshell#7>", line 10, in __setitem__
raise DuplicateKeyError(msg.format(key, self[key]))
DuplicateKeyError: "key 'a' already exists with value '1'"
and:
>>> ukd = UniqueKeyDict((k, v) for k, v in ('a1', 'b2', 'c3', 'd4'))
>>> ukd.update((k, v) for k, v in ('e5', 'f6')) # single Iterable
>>> ukd.update({'h': 8}, g='7') # single Mapping plus keyword args
>>> ukd
{'e': '5', 'f': '6', 'a': '1', 'd': '4', 'c': '3', 'h': 8, 'b': '2', 'g': '7'}
If you ever end up using this, I'd be inclined to give it a different __repr__
to avoid confusion!
Solution 2:[2]
It's interesting that simply overriding __setitem__
is not enough to change the behavior of update
in dict
. I would have expected that dict
would use its __setitem__
method when it's being updated using update
. In all cases, I think it's better to implement collections.MutableMapping
to achieve the desired result without touching update
:
import collections
class UniqueKeyDict(collections.MutableMapping, dict):
def __init__(self, *args, **kwargs):
self._dict = dict(*args, **kwargs)
def __getitem__(self, key):
return self._dict[key]
def __setitem__(self, key, value):
if key in self:
raise DuplicateKeyError("Key '{}' already exists with value '{}'.".format(key, self[key]))
self._dict[key] = value
def __delitem__(self, key):
del self._dict[key]
def __iter__(self):
return iter(self._dict)
def __len__(self):
return len(self._dict)
Edit: included dict
as base class to satisfy the isinstance(x, dict)
check.
Solution 3:[3]
I am not sure this is the problem but I just noticed that you are treating your args
in the update
method as a list of pairs:
for k, v in args[0]
while you are actually supplying a dictionary:
ukd.update({'a': 5})
Have you tried this:
try:
for k, v in args[0].iteritems():
self.__setitem__(k, v)
except ValueError:
pass
EDIT: Probably this error went unnoticed because you are except
ing a ValueError
, which is what treating a dictionary as a list of pairs will raise.
Solution 4:[4]
I was able to achieve the goal with the following code:
class UniqueKeyDict(dict):
def __init__(self, *args, **kwargs):
self.update(*args, **kwargs)
def __setitem__(self, key, value):
if self.has_key(key):
raise DuplicateKeyError("%s is already in dict" % key)
dict.__setitem__(self, key, value)
def update(self, *args, **kwargs):
for d in list(args) + [kwargs]:
for k,v in d.iteritems():
self[k]=v
Solution 5:[5]
This interesting question is a bit older and has already some solid answers (my favourite is the one from sirfz). Nevertheless, I would like to propose yet another one. You could use the dict
-wrapper UserDict. If I'm not mistaken, this should do the job you were looking for:
from collections import UserDict
class DuplicateKeyError(KeyError):
pass
class UniqueKeyDict(UserDict):
def __setitem__(self, key, value):
if key in self:
raise DuplicateKeyError(f"Key '{key}' already exists with value '{self[key]}'")
self.data[key] = value
As with the usage of collections.abc.MutableMapping
the update
method gets modified implicitly. But in contrast, you only have to (re)define the __setitem__
method. Since your modification is rather minor, the use of UserDict
seems like an appropriate approach to me.
An instance of this class is not an instance of dict
, but it is an instance of collections.abc.Mapping
, which should be used for testing for dict
-likeness.
Solution 6:[6]
Why not do something along the lines inspired by MultiKeyDict using setdefault? This leaves the update method as a way to override the currently stored values, breaking, I know, the intent that d[k] = v == d.update({k, v}). In my application the override was useful. So before flagging this as not answering the OP question, please consider this answer might be useful for someone else.
class DuplicateKeyError(KeyError):
"""File exception rasised by UniqueKeyDict"""
def __init__(self, key, value):
msg = 'key {!r} already exists with value {!r}'.format(key, value)
super(DuplicateKeyError, self).__init__(msg)
class UniqueKeyDict(dict):
"""Subclass of dict that raises a DuplicateKeyError exception"""
def __setitem__(self, key, value):
if key in self:
raise DuplicateKeyError(key, self[key])
self.setdefault(key, value)
class MultiKeyDict(dict):
"""Subclass of dict that supports multiple values per key"""
def __setitem__(self, key, value):
self.setdefault(key, []).append(value)
Rather new to python so flame on, probably deserve it...
Solution 7:[7]
According to the given answers and help(dict.update)
I did force_update
method. I will appreciate it if you will post in comments a real python code of dict.update
method (I found only C
source code).
class UniqueKeyDict(UserDict):
"""
Disable key overwriting if already exists
"""
def __setitem__(self, key=None, value=None, **kwargs):
if (key in self # Different options to override key (just a fun)
and not (isinstance(value, Iterable) and len(value) == 3 and self[key] in value and value[-1] is True)
and not (isinstance(value, Iterable) and len(value) == 2 and value[-1] == '--force')):
raise DuplicateKeyError(f"Key '{key}' already exists with value '{self[key]}'")
self.data[key] = value
def force_update(self, *a, **kw) -> None:
"""
See help({}.update)
"""
a = a[0] if len(a) == 1 else None # *a is always tuple
if a and hasattr(a, 'keys'):
for k in a:
self.pop(k, None)
self[k] = a[k]
elif a:
for k, v in a:
self.pop(k, None)
self[k] = v
for k in kw:
self.pop(k, None)
self[k] = kw[k]
# Check it, it should cover all the cases with regular dict.update method
du = UniqueKeyDict()
du['e'] = 3
du.force_update({'q': 1, 'qq': 2, 'qqq': 3})
du.update({'q': 1, 'qq': 2, 'qqq': 3}) # Error
du.force_update({'q': 1, 'qq': 2, 'qqq': 3}) # No error
du.force_update({})
du.force_update([])
du.force_update(w=2, ww=22, www=222)
du.force_update([[1, 2], [3, 4], [5, 6], [7, 8]])
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
---|---|
Solution 1 | |
Solution 2 | |
Solution 3 | Simone Bronzini |
Solution 4 | |
Solution 5 | Timus |
Solution 6 | |
Solution 7 |