1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
|
#!/usr/bin/python3
#
# Copyright (c) 2018-2019 Collabora, Ltd.
#
# SPDX-License-Identifier: Apache-2.0
#
# Author(s): Ryan Pavlik <ryan.pavlik@collabora.com>
"Utilities for processing files."
from pathlib import Path
class LinewiseFileProcessor:
"""A base class for code that processes an input file (or file handle) one line at a time."""
def __init__(self):
self._lines = []
self._line_num = 0
self._next_line = None
self._line = ''
self._filename = Path()
@property
def filename(self):
"""The Path object of the currently processed file"""
return self._filename
@property
def relative_filename(self):
"""The current file's Path relative to the current working directory"""
return self.filename.relative_to(Path('.').resolve())
@property
def line(self):
"""The current line, including any trailing whitespace and the line ending."""
return self._line
@property
def line_number(self):
"""Get 1-indexed line number."""
return self._line_num
@property
def line_rstripped(self):
"""The current line without any trailing whitespace."""
if self.line is None:
return None
return self.line.rstrip()
@property
def trailing_whitespace(self):
"""The trailing whitespace of the current line that gets removed when accessing rstrippedLine"""
non_whitespace_length = len(self.line_rstripped)
return self.line[non_whitespace_length:]
@property
def next_line(self):
"""Peek at the next line, if any."""
return self._next_line
@property
def next_line_rstripped(self):
"""Peek at the next line, if any, without any trailing whitespace."""
if self.next_line is None:
return None
return self.next_line.rstrip()
def get_preceding_line(self, relative_index=-1):
"""Retrieve the line at an line number at the given relative index, if one exists. Returns None if there is no line there."""
if relative_index >= 0:
raise RuntimeError(
'relativeIndex must be negative, to retrieve a preceding line.')
if relative_index + self.line_number <= 0:
# There is no line at this index
return None
return self._lines[self.line_number + relative_index - 1]
def get_preceding_lines(self, num):
"""Get *up to* the preceding num lines. Fewer may be returned if the requested number aren't available."""
return self._lines[- (num + 1):-1]
def process_line(self, line_num, line):
"""Implement in your subclass to handle each new line."""
raise NotImplementedError
def _process_file_handle(self, file_handle):
# These are so we can process one line earlier than we're actually iterating thru.
processing_line_num = None
processing_line = None
def do_process_line():
self._line_num = processing_line_num
self._line = processing_line
if processing_line is not None:
self._lines.append(processing_line)
self.process_line(processing_line_num, processing_line)
for line_num, line in enumerate(file_handle, 1):
self._next_line = line
do_process_line()
processing_line_num = line_num
processing_line = line
# Finally process the left-over line
self._next_line = None
do_process_line()
def process_file(self, filename, file_handle=None):
"""Main entry point - call with a filename and optionally the file handle to read from."""
if isinstance(filename, str):
filename = Path(filename).resolve()
self._filename = filename
if file_handle:
self._process_file_handle(file_handle)
else:
with self._filename.open('r', encoding='utf-8') as f:
self._process_file_handle(f)
|