您描述的情况似乎更适合从文件中获取下一个整数或下一行并允许您处理它的生成器。
def sanify(s):
while s.startswith('['):
s = s[1:]
while s.endswith(']'):
s = s[:-1]
return int(s)
def get_numbers(file_obj):
file_obj.seek(0)
i = j = 0
for line in file_obj:
for item in line.split(', '):
if item and not item.isspace():
yield sanify(item), i, j
j += 1
i += 1
j = 0
这样可以确保每次只有一行驻留在内存中。
这可以像这样使用:
import io
s = '''[[2, 2, 6, 10, 2, 6, 7, 15, 14, 10, 17, 14, 7, 14, 15, 7, 17],
[3, 3, 7, 11, 3, 7, 0, 11, 7, 16, 0, 17, 17, 7, 16, 0, 0],
[4, 4, 8, 7, 4, 13, 0, 0, 15, 7, 8, 7, 0, 7, 0, 15, 13],
[5, 5, 9, 12, 5, 14, 7, 13, 9, 14, 16, 12, 13, 14, 7, 16, 7]]'''
items = get_numbers(io.StringIO(s))
for item, i, j in items:
print(item, i, j)
如果您真的希望能够访问矩阵的任意元素,您可以将上述逻辑调整为实现__getitem__ 的类,您只需要跟踪每行开头的位置。
在代码中,这看起来像:
class MatrixData(object):
def __init__(self, file_obj):
self._file_obj = file_obj
self._line_offsets = list(self._get_line_offsets(file_obj))[:-1]
file_obj.seek(0)
row = list(self._read_row(file_obj.readline()))
self.shape = len(self._line_offsets), len(row)
self.length = self.shape[0] * self.shape[1]
def __len__(self):
return self.length
def __iter__(self):
self._file_obj.seek(0)
i = j = 0
for line in self._file_obj:
for item in _read_row(line):
yield item, i, j
j += 1
i += 1
j = 0
def __getitem__(self, indices):
i, j = indices
self._file_obj.seek(self._line_offsets[i])
line = self._file_obj.readline()
row = self._read_row(line)
return row[j]
@staticmethod
def _get_line_offsets(file_obj):
file_obj.seek(0)
yield file_obj.tell()
for line in file_obj:
yield file_obj.tell()
@staticmethod
def _read_row(line):
for item in line.split(', '):
if item and not item.isspace():
yield MatrixData._sanify(item)
@staticmethod
def _sanify(item, dtype=int):
while item.startswith('['):
item = item[1:]
while item.endswith(']'):
item = item[:-1]
return dtype(item)
class MatrixData(object):
def __init__(self, file_obj):
self._file_obj = file_obj
self._line_offsets = list(self._get_line_offsets(file_obj))[:-1]
file_obj.seek(0)
row = list(self._read_row(file_obj.readline()))
self.shape = len(self._line_offsets), len(row)
self.length = self.shape[0] * self.shape[1]
def __len__(self):
return self.length
def __iter__(self):
self._file_obj.seek(0)
i = j = 0
for line in self._file_obj:
for item in self._read_row(line):
yield item, i, j
j += 1
i += 1
j = 0
def __getitem__(self, indices):
i, j = indices
self._file_obj.seek(self._line_offsets[i])
line = self._file_obj.readline()
row = list(self._read_row(line))
return row[j]
@staticmethod
def _get_line_offsets(file_obj):
file_obj.seek(0)
yield file_obj.tell()
for line in file_obj:
yield file_obj.tell()
@staticmethod
def _read_row(line):
for item in line.split(', '):
if item and not item.isspace():
yield MatrixData._sanify(item)
@staticmethod
def _sanify(item, dtype=int):
while item.startswith('['):
item = item[1:]
while item.endswith(']'):
item = item[:-1]
return dtype(item)
用作:
m = MatrixData(io.StringIO(s))
# get total number of elements
len(m)
# get number of row and col
m.shape
# access a specific element
m[3, 12]
# iterate through
for x, i, j in m:
...