倒序按行读取大文件,速度快效率高(相比readlines()内存占用少)

   def reverse_readline(filename, buf_size=1024*16):
       """
       A generator that returns the lines of a file in reverse order
       倒序按行读取大文件,速度快效率高(相比readlines()内存占用少)
       """
       with open(filename, 'rb') as fh:  # 'rb'二进制方式读取文件,防止字符有不同的编码格式
           segment = None
           offset = 0
           fh.seek(0, os.SEEK_END)
           file_size = remaining_size = fh.tell()
           while remaining_size > 0:
               offset = min(file_size, offset + buf_size)
               fh.seek(file_size - offset)
               buffer = fh.read(min(remaining_size, buf_size))
               remaining_size -= buf_size
               buffer = buffer.decode('utf-8', errors='ignore')  # utf-8解码,忽略错误
               lines = buffer.split('\n')
               # The first line of the buffer is probably not a complete line so
               # we'll save it and append it to the last line of the next buffer
               # we read
               if segment is not None:
                   # If the previous chunk starts right from the beginning of line
                   # do not concat the segment to the last line of new chunk.
                   # Instead, yield the segment first
                   if buffer[-1] != '\n':
                       lines[-1] += segment
                   else:
                       yield segment
               segment = lines[0]
               for index in range(len(lines) - 1, 0, -1):
                   if lines[index]:
                       yield lines[index]
           # Don't yield None if the file was empty
           if segment is not None:
               yield segment

for line in reverse_readline(path):
	print(line)

版权声明:本文为weixin_44695969原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。