lib/utils.py
changeset 72 34fcc8b2c1f5
parent 71 591ffdca8041
child 73 c078d8a04d76
equal deleted inserted replaced
71:591ffdca8041 72:34fcc8b2c1f5
     1 #!/usr/bin/python
       
     2 
       
     3 def unique(s):
       
     4     """Return a list of the elements in s, but without duplicates.
       
     5 
       
     6     For example, unique([1,2,3,1,2,3]) is some permutation of [1,2,3],
       
     7     unique("abcabc") some permutation of ["a", "b", "c"], and
       
     8     unique(([1, 2], [2, 3], [1, 2])) some permutation of
       
     9     [[2, 3], [1, 2]].
       
    10 
       
    11     For best speed, all sequence elements should be hashable.  Then
       
    12     unique() will usually work in linear time.
       
    13 
       
    14     If not possible, the sequence elements should enjoy a total
       
    15     ordering, and if list(s).sort() doesn't raise TypeError it's
       
    16     assumed that they do enjoy a total ordering.  Then unique() will
       
    17     usually work in O(N*log2(N)) time.
       
    18 
       
    19     If that's not possible either, the sequence elements must support
       
    20     equality-testing.  Then unique() will usually work in quadratic
       
    21     time.
       
    22     """
       
    23 
       
    24     n = len(s)
       
    25     if n == 0:
       
    26         return []
       
    27 
       
    28     # Try using a dict first, as that's the fastest and will usually
       
    29     # work.  If it doesn't work, it will usually fail quickly, so it
       
    30     # usually doesn't cost much to *try* it.  It requires that all the
       
    31     # sequence elements be hashable, and support equality comparison.
       
    32     u = {}
       
    33     try:
       
    34         for x in s:
       
    35             u[x] = 1
       
    36     except TypeError:
       
    37         del u  # move on to the next method
       
    38     else:
       
    39         return u.keys()
       
    40 
       
    41     # We can't hash all the elements.  Second fastest is to sort,
       
    42     # which brings the equal elements together; then duplicates are
       
    43     # easy to weed out in a single pass.
       
    44     # NOTE:  Python's list.sort() was designed to be efficient in the
       
    45     # presence of many duplicate elements.  This isn't true of all
       
    46     # sort functions in all languages or libraries, so this approach
       
    47     # is more effective in Python than it may be elsewhere.
       
    48     try:
       
    49         t = list(s)
       
    50         t.sort()
       
    51     except TypeError:
       
    52         del t  # move on to the next method
       
    53     else:
       
    54         assert n > 0
       
    55         last = t[0]
       
    56         lasti = i = 1
       
    57         while i < n:
       
    58             if t[i] != last:
       
    59                 t[lasti] = last = t[i]
       
    60                 lasti += 1
       
    61             i += 1
       
    62         return t[:lasti]
       
    63 
       
    64     # Brute force is all that's left.
       
    65     u = []
       
    66     for x in s:
       
    67         if x not in u:
       
    68             u.append(x)
       
    69     return u
       
    70 
       
    71