# lrucache.py -- a simple LRU (Least-Recently-Used) cache class |
002 |
|
003 |
# Copyright 2004 Evan Prodromou <evan@bad.dynu.ca> |
004 |
# Licensed under the Academic Free License 2.1 |
005 |
|
006 |
# Licensed for ftputil under the revised BSD license |
007 |
# with permission by the author, Evan Prodromou. Many |
008 |
# thanks, Evan! :-) |
009 |
# |
010 |
# The original file is available at |
011 |
# http://pypi.python.org/pypi/lrucache/0.2 . |
012 |
|
013 |
# arch-tag: LRU cache main module |
014 |
|
015 |
"""a simple LRU (Least-Recently-Used) cache module |
016 |
|
017 |
This module provides very simple LRU (Least-Recently-Used) cache |
018 |
functionality. |
019 |
|
020 |
An *in-memory cache* is useful for storing the results of an |
021 |
'expe\nsive' process (one that takes a lot of time or resources) for |
022 |
later re-use. Typical examples are accessing data from the filesystem, |
023 |
a database, or a network location. If you know you'll need to re-read |
024 |
the data again, it can help to keep it in a cache. |
025 |
|
026 |
You *can* use a Python dictionary as a cache for some purposes. |
027 |
However, if the results you're caching are large, or you have a lot of |
028 |
possible results, this can be impractical memory-wise. |
029 |
|
030 |
An *LRU cache*, on the other hand, only keeps _some_ of the results in |
031 |
memory, which keeps you from overusing resources. The cache is bounded |
032 |
by a maximum size; if you try to add more values to the cache, it will |
033 |
automatically discard the values that you haven't read or written to |
034 |
in the longest time. In other words, the least-recently-used items are |
035 |
discarded. [1]_ |
036 |
|
037 |
.. [1]: 'Discarded' here means 'removed from the cache'. |
038 |
|
039 |
""" |
040 |
|
041 |
from __future__ import generators
|
042 |
import time
|
043 |
from heapq import heappush, heappop, heapify
|
044 |
|
045 |
# the suffix after the hyphen denotes modifications by the |
046 |
# ftputil project with respect to the original version |
047 |
__version__ = "0.2-1" |
048 |
__all__ = ['CacheKeyError', 'LRUCache', 'DEFAULT_SIZE']
|
049 |
__docformat__ = 'reStructuredText en' |
050 |
|
051 |
DEFAULT_SIZE = 16 |
052 |
"""Default size of a new LRUCache object, if no 'size' argument is given.""" |
053 |
|
054 |
class CacheKeyError(KeyError):
|
055 |
"""Error raised when cache requests fail
|
056 |
|
057 |
When a cache record is accessed which no longer exists (or never did),
|
058 |
this error is raised. To avoid it, you may want to check for the existence
|
059 |
of a cache record before reading or deleting it.""" |
060 |
pass |
061 |
|
062 |
class LRUCache(object):
|
063 |
"""Least-Recently-Used (LRU) cache.
|
064 |
|
065 |
Instances of this class provide a least-recently-used (LRU) cache. They
|
066 |
emulate a Python mapping type. You can use an LRU cache more or less like
|
067 |
a Python dictionary, with the exception that objects you put into the
|
068 |
cache may be discarded before you take them out.
|
069 |
|
070 |
Some example usage::
|
071 |
|
072 |
cache = LRUCache(32) # new cache
|
073 |
cache['foo'] = get_file_contents('foo') # or whatever
|
074 |
|
075 |
if 'foo' in cache: # if it's still in cache...
|
076 |
# use cached version
|
077 |
contents = cache['foo']
|
078 |
else:
|
079 |
# recalculate
|
080 |
contents = get_file_contents('foo')
|
081 |
# store in cache for next time
|
082 |
cache['foo'] = contents
|
083 |
|
084 |
print cache.size # Maximum size
|
085 |
|
086 |
print len(cache) # 0 <= len(cache) <= cache.size
|
087 |
|
088 |
cache.size = 10 # Auto-shrink on size assignment
|
089 |
|
090 |
for i in range(50): # note: larger than cache size
|
091 |
cache[i] = i
|
092 |
|
093 |
if 0 not in cache: print 'Zero was discarded.'
|
094 |
|
095 |
if 42 in cache:
|
096 |
del cache[42] # Manual deletion
|
097 |
|
098 |
for j in cache: # iterate (in LRU order)
|
099 |
print j, cache[j] # iterator produces keys, not values
|
100 |
""" |
101 |
|
102 |
class __Node(object):
|
103 |
"""Record of a cached value. Not for public consumption.""" |
104 |
|
105 |
def __init__(self, key, obj, timestamp, sort_key):
|
106 |
object.__init__(self)
|
107 |
self.key = key
|
108 |
self.obj = obj
|
109 |
self.atime = timestamp
|
110 |
self.mtime = self.atime
|
111 |
self._sort_key = sort_key
|
112 |
|
113 |
def __cmp__(self, other):
|
114 |
return cmp(self._sort_key, other._sort_key)
|
115 |
|
116 |
def __repr__(self):
|
117 |
return "<%s %s => %s (%s)>" % \
|
118 |
(self.__class__, self.key, self.obj, \
|
119 |
time.asctime(time.localtime(self.atime)))
|
120 |
|
121 |
def __init__(self, size=DEFAULT_SIZE):
|
122 |
# Check arguments
|
123 |
if size <= 0:
|
124 |
raise ValueError, size
|
125 |
elif type(size) is not type(0):
|
126 |
raise TypeError, size
|
127 |
object.__init__(self)
|
128 |
self.__heap = []
|
129 |
self.__dict = {}
|
130 |
"""Maximum size of the cache.
|
131 |
If more than 'size' elements are added to the cache,
|
132 |
the least-recently-used ones will be discarded.""" |
133 |
self.size = size
|
134 |
self.__counter = 0 |
135 |
|
136 |
def _sort_key(self):
|
137 |
"""Return a new integer value upon every call.
|
138 |
|
139 |
Cache nodes need a monotonically increasing time indicator.
|
140 |
time.time() and time.clock() don't guarantee this in a
|
141 |
platform-independent way.
|
142 |
""" |
143 |
self.__counter += 1 |
144 |
return self.__counter
|
145 |
|
146 |
def __len__(self):
|
147 |
return len(self.__heap)
|
148 |
|
149 |
def __contains__(self, key):
|
150 |
return self.__dict.has_key(key)
|
151 |
|
152 |
def __setitem__(self, key, obj):
|
153 |
if self.__dict.has_key(key):
|
154 |
node = self.__dict[key]
|
155 |
# update node object in-place
|
156 |
node.obj = obj
|
157 |
node.atime = time.time()
|
158 |
node.mtime = node.atime
|
159 |
node._sort_key = self._sort_key()
|
160 |
heapify(self.__heap)
|
161 |
else:
|
162 |
# size may have been reset, so we loop
|
163 |
while len(self.__heap) >= self.size:
|
164 |
lru = heappop(self.__heap)
|
165 |
del self.__dict[lru.key]
|
166 |
node = self.__Node(key, obj, time.time(), self._sort_key())
|
167 |
self.__dict[key] = node
|
168 |
heappush(self.__heap, node)
|
169 |
|
170 |
def __getitem__(self, key):
|
171 |
if not self.__dict.has_key(key):
|
172 |
raise CacheKeyError(key)
|
173 |
else:
|
174 |
node = self.__dict[key]
|
175 |
# update node object in-place
|
176 |
node.atime = time.time()
|
177 |
node._sort_key = self._sort_key()
|
178 |
heapify(self.__heap)
|
179 |
return node.obj
|
180 |
|
181 |
def __delitem__(self, key):
|
182 |
if not self.__dict.has_key(key):
|
183 |
raise CacheKeyError(key)
|
184 |
else:
|
185 |
node = self.__dict[key]
|
186 |
del self.__dict[key]
|
187 |
self.__heap.remove(node)
|
188 |
heapify(self.__heap)
|
189 |
return node.obj
|
190 |
|
191 |
def __iter__(self):
|
192 |
copy = self.__heap[:]
|
193 |
while len(copy) > 0:
|
194 |
node = heappop(copy)
|
195 |
yield node.key
|
196 |
raise StopIteration
|
197 |
|
198 |
def __setattr__(self, name, value):
|
199 |
object.__setattr__(self, name, value)
|
200 |
# automagically shrink heap on resize
|
201 |
if name == 'size':
|
202 |
while len(self.__heap) > value:
|
203 |
lru = heappop(self.__heap)
|
204 |
del self.__dict[lru.key]
|
205 |
|
206 |
def __repr__(self):
|
207 |
return "<%s (%d elements)>" % (str(self.__class__), len(self.__heap))
|
208 |
|
209 |
def mtime(self, key):
|
210 |
"""Return the last modification time for the cache record with key.
|
211 |
May be useful for cache instances where the stored values can get
|
212 |
'stale', such as caching file or network resource contents.""" |
213 |
if not self.__dict.has_key(key):
|
214 |
raise CacheKeyError(key)
|
215 |
else:
|
216 |
node = self.__dict[key]
|
217 |
return node.mtime
|
218 |
|
219 |
if __name__ == "__main__":
|
220 |
cache = LRUCache(25)
|
221 |
print cache
|
222 |
for i in range(50):
|
223 |
cache[i] = str(i)
|
224 |
print cache
|
225 |
if 46 in cache:
|
226 |
print "46 in cache" |
227 |
del cache[46]
|
228 |
print cache
|
229 |
cache.size = 10 |
230 |
print cache
|
231 |
cache[46] = '46' |
232 |
print cache
|
233 |
print len(cache)
|
234 |
for c in cache:
|
235 |
print c
|
236 |
print cache
|
237 |
print cache.mtime(46)
|
238 |
for c in cache:
|
239 |
print c
|