Python 最近最少使用算法 LRUCache

# lrucache.py -- a simple LRU (Least-Recently-Used) cache class

002

004 # Licensed under the Academic Free License 2.1

005

006 # Licensed for ftputil under the revised BSD license

007 # with permission by the author, Evan Prodromou. Many

008 # thanks, Evan! :-)

009 #

010 # The original file is available at

011 # http://pypi.python.org/pypi/lrucache/0.2 .

012

013 # arch-tag: LRU cache main module

014

015 """a simple LRU (Least-Recently-Used) cache module

016

017 This module provides very simple LRU (Least-Recently-Used) cache

018 functionality.

019

020 An *in-memory cache* is useful for storing the results of an

021 'expe\nsive' process (one that takes a lot of time or resources) for

022 later re-use. Typical examples are accessing data from the filesystem,

023 a database, or a network location. If you know you'll need to re-read

024 the data again, it can help to keep it in a cache.

025

026 You *can* use a Python dictionary as a cache for some purposes.

027 However, if the results you're caching are large, or you have a lot of

028 possible results, this can be impractical memory-wise.

029

030 An *LRU cache*, on the other hand, only keeps _some_ of the results in

031 memory, which keeps you from overusing resources. The cache is bounded

032 by a maximum size; if you try to add more values to the cache, it will

033 automatically discard the values that you haven't read or written to

034 in the longest time. In other words, the least-recently-used items are

035 discarded. [1]_

036

037 .. [1]: 'Discarded' here means 'removed from the cache'.

038

039 """

040

041 from __future__ import generators

042 import time

043 from heapq import heappush, heappop, heapify

044

045 # the suffix after the hyphen denotes modifications by the

046 # ftputil project with respect to the original version

047 __version__ = "0.2-1"

048 __all__ = ['CacheKeyError', 'LRUCache', 'DEFAULT_SIZE']

049 __docformat__ = 'reStructuredText en'

050

051 DEFAULT_SIZE = 16

052 """Default size of a new LRUCache object, if no 'size' argument is given."""

053

054 class CacheKeyError(KeyError):

055 """Error raised when cache requests fail

056

057 When a cache record is accessed which no longer exists (or never did),

058 this error is raised. To avoid it, you may want to check for the existence

059 of a cache record before reading or deleting it."""

060 pass

061

062 class LRUCache(object):

063 """Least-Recently-Used (LRU) cache.

064

065 Instances of this class provide a least-recently-used (LRU) cache. They

066 emulate a Python mapping type. You can use an LRU cache more or less like

067 a Python dictionary, with the exception that objects you put into the

068 cache may be discarded before you take them out.

069

070 Some example usage::

071

072 cache = LRUCache(32) # new cache

073 cache['foo'] = get_file_contents('foo') # or whatever

074

075 if 'foo' in cache: # if it's still in cache...

076 # use cached version

077 contents = cache['foo']

078 else:

079 # recalculate

080 contents = get_file_contents('foo')

081 # store in cache for next time

082 cache['foo'] = contents

083

084 print cache.size # Maximum size

085

086 print len(cache) # 0 <= len(cache) <= cache.size

087

088 cache.size = 10 # Auto-shrink on size assignment

089

090 for i in range(50): # note: larger than cache size

091 cache[i] = i

092

093 if 0 not in cache: print 'Zero was discarded.'

094

095 if 42 in cache:

096 del cache[42] # Manual deletion

097

098 for j in cache: # iterate (in LRU order)

099 print j, cache[j] # iterator produces keys, not values

100 """

101

102 class __Node(object):

103 """Record of a cached value. Not for public consumption."""

104

105 def __init__(self, key, obj, timestamp, sort_key):

106 object.__init__(self)

107 self.key = key

108 self.obj = obj

109 self.atime = timestamp

110 self.mtime = self.atime

111 self._sort_key = sort_key

112

113 def __cmp__(self, other):

114 return cmp(self._sort_key, other._sort_key)

115

116 def __repr__(self):

117 return "<%s %s => %s (%s)>" % \

118 (self.__class__, self.key, self.obj, \

119 time.asctime(time.localtime(self.atime)))

120

121 def __init__(self, size=DEFAULT_SIZE):

122 # Check arguments

123 if size <= 0:

124 raise ValueError, size

125 elif type(size) is not type(0):

126 raise TypeError, size

127 object.__init__(self)

128 self.__heap = []

129 self.__dict = {}

130 """Maximum size of the cache.

131 If more than 'size' elements are added to the cache,

132 the least-recently-used ones will be discarded."""

133 self.size = size

134 self.__counter = 0

135

136 def _sort_key(self):

137 """Return a new integer value upon every call.

138

139 Cache nodes need a monotonically increasing time indicator.

140 time.time() and time.clock() don't guarantee this in a

141 platform-independent way.

142 """

143 self.__counter += 1

144 return self.__counter

145

146 def __len__(self):

147 return len(self.__heap)

148

149 def __contains__(self, key):

150 return self.__dict.has_key(key)

151

152 def __setitem__(self, key, obj):

153 if self.__dict.has_key(key):

154 node = self.__dict[key]

155 # update node object in-place

156 node.obj = obj

157 node.atime = time.time()

158 node.mtime = node.atime

159 node._sort_key = self._sort_key()

160 heapify(self.__heap)

161 else:

162 # size may have been reset, so we loop

163 while len(self.__heap) >= self.size:

164 lru = heappop(self.__heap)

165 del self.__dict[lru.key]

166 node = self.__Node(key, obj, time.time(), self._sort_key())

167 self.__dict[key] = node

168 heappush(self.__heap, node)

169

170 def __getitem__(self, key):

171 if not self.__dict.has_key(key):

172 raise CacheKeyError(key)

173 else:

174 node = self.__dict[key]

175 # update node object in-place

176 node.atime = time.time()

177 node._sort_key = self._sort_key()

178 heapify(self.__heap)

179 return node.obj

180

181 def __delitem__(self, key):

182 if not self.__dict.has_key(key):

183 raise CacheKeyError(key)

184 else:

185 node = self.__dict[key]

186 del self.__dict[key]

187 self.__heap.remove(node)

188 heapify(self.__heap)

189 return node.obj

190

191 def __iter__(self):

192 copy = self.__heap[:]

193 while len(copy) > 0:

194 node = heappop(copy)

195 yield node.key

196 raise StopIteration

197

198 def __setattr__(self, name, value):

199 object.__setattr__(self, name, value)

200 # automagically shrink heap on resize

201 if name == 'size':

202 while len(self.__heap) > value:

203 lru = heappop(self.__heap)

204 del self.__dict[lru.key]

205

206 def __repr__(self):

207 return "<%s (%d elements)>" % (str(self.__class__), len(self.__heap))

208

209 def mtime(self, key):

210 """Return the last modification time for the cache record with key.

211 May be useful for cache instances where the stored values can get

212 'stale', such as caching file or network resource contents."""

213 if not self.__dict.has_key(key):

214 raise CacheKeyError(key)

215 else:

216 node = self.__dict[key]

217 return node.mtime

218

219 if __name__ == "__main__":

220 cache = LRUCache(25)

221 print cache

222 for i in range(50):

223 cache[i] = str(i)

224 print cache

225 if 46 in cache:

226 print "46 in cache"

227 del cache[46]

228 print cache

229 cache.size = 10

230 print cache

231 cache[46] = '46'

232 print cache

233 print len(cache)

234 for c in cache:

235 print c

236 print cache

237 print cache.mtime(46)

238 for c in cache:

239 print c