utils.py
author fabien
Sun, 08 Feb 2004 16:55:24 -0500
branchimmsview
changeset 31 13f56bb29b96
child 33 ad808d18c693
permissions -rw-r--r--
[svn] New bestofimms, cleanimms, imms.py and utils.py.

#!/usr/bin/python

import dircache, os.path
from sys import stderr
import readline

def set_file_completer():
    readline.set_completer_delims('')
    readline.set_completer(_file_completer)

def _file_completer(text, state):
    dirname, filename = os.path.split(text)
    if not os.path.isdir(dirname):
        return None
    paths = dircache.listdir(dirname)
    tlen = len(filename)
    checkpaths = []
    if tlen > 0:
        for fn in paths:
            if fn[:len(filename)] == filename:
                checkpaths.append(fn)
    else:
        checkpaths = paths
    if len(checkpaths) > state:
        return os.path.join(dirname, checkpaths[state])
    return None
    
def copy_file(origname, copyname):
    orig = file(origname, 'r')
    new = file(copyname, 'w')
    new.write(orig.read())
    new.flush()
    return new

def unique(s):
    """Return a list of the elements in s, but without duplicates.

    For example, unique([1,2,3,1,2,3]) is some permutation of [1,2,3],
    unique("abcabc") some permutation of ["a", "b", "c"], and
    unique(([1, 2], [2, 3], [1, 2])) some permutation of
    [[2, 3], [1, 2]].

    For best speed, all sequence elements should be hashable.  Then
    unique() will usually work in linear time.

    If not possible, the sequence elements should enjoy a total
    ordering, and if list(s).sort() doesn't raise TypeError it's
    assumed that they do enjoy a total ordering.  Then unique() will
    usually work in O(N*log2(N)) time.

    If that's not possible either, the sequence elements must support
    equality-testing.  Then unique() will usually work in quadratic
    time.
    """

    n = len(s)
    if n == 0:
        return []

    # Try using a dict first, as that's the fastest and will usually
    # work.  If it doesn't work, it will usually fail quickly, so it
    # usually doesn't cost much to *try* it.  It requires that all the
    # sequence elements be hashable, and support equality comparison.
    u = {}
    try:
        for x in s:
            u[x] = 1
    except TypeError:
        del u  # move on to the next method
    else:
        return u.keys()

    # We can't hash all the elements.  Second fastest is to sort,
    # which brings the equal elements together; then duplicates are
    # easy to weed out in a single pass.
    # NOTE:  Python's list.sort() was designed to be efficient in the
    # presence of many duplicate elements.  This isn't true of all
    # sort functions in all languages or libraries, so this approach
    # is more effective in Python than it may be elsewhere.
    try:
        t = list(s)
        t.sort()
    except TypeError:
        del t  # move on to the next method
    else:
        assert n > 0
        last = t[0]
        lasti = i = 1
        while i < n:
            if t[i] != last:
                t[lasti] = last = t[i]
                lasti += 1
            i += 1
        return t[:lasti]

    # Brute force is all that's left.
    u = []
    for x in s:
        if x not in u:
            u.append(x)
    return u