allura
Revision | 530fcc317acfcacb298a917998bf7133b937cd27 (tree) |
---|---|
Time | 2011-07-15 02:02:56 |
Author | Rick Copeland <rcopeland@geek...> |
Commiter | Dave Brondsema |
[#2423] Add logic to ensure_index to drop extra duplicate keys for unique indexes
Signed-off-by: Rick Copeland <rcopeland@geek.net>
@@ -1,7 +1,9 @@ | ||
1 | 1 | import sys |
2 | 2 | from collections import defaultdict |
3 | +from itertools import groupby | |
3 | 4 | |
4 | 5 | from pylons import c, g |
6 | +from pymongo.errors import DuplicateKeyError | |
5 | 7 | |
6 | 8 | from ming.orm import mapper, session, Mapper |
7 | 9 | from ming.orm.declarative import MappedClass |
@@ -173,11 +175,32 @@ class EnsureIndexCommand(base.Command): | ||
173 | 175 | # Ensure all indexes |
174 | 176 | for name, idx in uindexes.iteritems(): |
175 | 177 | base.log.info('...... ensure %s:%s', collection.name, idx) |
176 | - collection.ensure_index(idx.index_spec, background=True, unique=True) | |
178 | + while True: | |
179 | + try: | |
180 | + collection.ensure_index(idx.index_spec, unique=True) | |
181 | + break | |
182 | + except DuplicateKeyError, err: | |
183 | + base.log.info('Found dupe key(%s), eliminating dupes', err) | |
184 | + self._remove_dupes(collection, idx.index_spec) | |
177 | 185 | for name, idx in indexes.iteritems(): |
178 | 186 | base.log.info('...... ensure %s:%s', collection.name, idx) |
179 | 187 | collection.ensure_index(idx.index_spec, background=True) |
180 | 188 | |
189 | + def _remove_dupes(self, collection, spec): | |
190 | + iname = collection.create_index(spec) | |
191 | + fields = [ f[0] for f in spec ] | |
192 | + q = collection.find({}, fields=fields).sort(spec) | |
193 | + def keyfunc(doc): | |
194 | + return tuple(doc.get(f, None) for f in fields) | |
195 | + dupes = [] | |
196 | + for key, doc_iter in groupby(q, key=keyfunc): | |
197 | + docs = list(doc_iter) | |
198 | + if len(docs) > 1: | |
199 | + base.log.info('Found dupes with %s', key) | |
200 | + dupes += [ doc['_id'] for doc in docs[1:] ] | |
201 | + collection.drop_index(iname) | |
202 | + collection.remove(dict(_id={'$in':dupes})) | |
203 | + | |
181 | 204 | def build_model_inheritance_graph(): |
182 | 205 | graph = dict((m.mapped_class, ([], [])) for m in Mapper.all_mappers()) |
183 | 206 | for cls, (parents, children) in graph.iteritems(): |