Coverage for src/shellman/reader.py: 65.17%

137 statements  

« prev     ^ index     » next       coverage.py v7.3.0, created at 2023-09-03 19:58 +0200

1"""Module to read a file/stream and pre-process the documentation lines. 

2 

3Algorithm is as follows: 

4 

51. preprocess_stream: yield documentation lines. 

62. preprocess_lines: group documentation lines as blocks of documentation. 

73. process_blocks: tidy blocks by tag in a dictionary. 

8""" 

9 

10from __future__ import annotations 

11 

12import logging 

13import os 

14import re 

15from collections import defaultdict 

16from typing import Iterable, Iterator, Sequence 

17 

18from shellman.tags import TAGS, Tag 

19 

20logger = logging.getLogger(__name__) 

21 

22tag_value_regex = re.compile(r"^\s*[\\@]([_a-zA-Z][\w-]*)\s+(.+)$") 

23tag_no_value_regex = re.compile(r"^\s*[\\@]([_a-zA-Z][\w-]*)\s*$") 

24 

25 

26class DocType: 

27 """Enumeration of the possible types of documentation.""" 

28 

29 TAG = "T" 

30 """A tag.""" 

31 

32 TAG_VALUE = "TV" 

33 """A tag its value.""" 

34 

35 VALUE = "V" 

36 """A value.""" 

37 

38 INVALID = "I" 

39 """Invalid type.""" 

40 

41 

42class DocLine: 

43 """A documentation line.""" 

44 

45 def __init__(self, path: str, lineno: int, tag: str | None, value: str) -> None: 

46 """Initialize the doc line. 

47 

48 Parameters: 

49 path: The origin file path. 

50 lineno: The line number in the file. 

51 tag: The line's tag, if any. 

52 value: The line's value. 

53 """ 

54 self.path = path 

55 self.lineno = lineno 

56 self.tag = tag or "" 

57 self.value = value 

58 

59 def __str__(self) -> str: 

60 doc_type = self.doc_type 

61 if doc_type == DocType.TAG_VALUE: 

62 s = f'{self.tag}, "{self.value}"' 

63 elif doc_type == DocType.TAG: 

64 s = self.tag 

65 elif doc_type == DocType.VALUE: 

66 s = '"%s"' % self.value 

67 else: 

68 s = "invalid" 

69 return f"{self.path}:{self.lineno}: {doc_type}: {s}" 

70 

71 @property 

72 def doc_type(self) -> str: 

73 """The line's doc type.""" 

74 if self.tag: 

75 if self.value: 

76 return DocType.TAG_VALUE 

77 return DocType.TAG 

78 if self.value is not None: 

79 return DocType.VALUE 

80 return DocType.INVALID 

81 

82 

83class DocBlock: 

84 """A documentation block.""" 

85 

86 def __init__(self, lines: list[DocLine] | None = None) -> None: 

87 """Initialize the doc block. 

88 

89 Parameters: 

90 lines: The block's doc lines. 

91 """ 

92 if lines is None: 92 ↛ 94line 92 didn't jump to line 94, because the condition on line 92 was never false

93 lines = [] 

94 self.lines = lines 

95 

96 def __bool__(self) -> bool: 

97 return bool(self.lines) 

98 

99 def __str__(self) -> str: 

100 return "\n".join([str(line) for line in self.lines]) 

101 

102 def append(self, line: DocLine) -> None: 

103 """Append a line to the block. 

104 

105 Parameters: 

106 line: The doc line to append. 

107 """ 

108 self.lines.append(line) 

109 

110 @property 

111 def doc_type(self) -> str: 

112 """The block type.""" 

113 return self.lines[0].doc_type 

114 

115 @property 

116 def first_line(self) -> DocLine: 

117 """The block's first doc line.""" 

118 return self.lines[0] 

119 

120 @property 

121 def lines_number(self) -> int: 

122 """The number of lines in the block.""" 

123 return len(self.lines) 

124 

125 @property 

126 def path(self) -> str: 

127 """The block's origin file path.""" 

128 return self.first_line.path 

129 

130 @property 

131 def lineno(self) -> int: 

132 """The block's first line number.""" 

133 return self.first_line.lineno 

134 

135 @property 

136 def tag(self) -> str: 

137 """The block's tag.""" 

138 if self.lines: 138 ↛ 140line 138 didn't jump to line 140, because the condition on line 138 was never false

139 return self.first_line.tag 

140 return "" 

141 

142 @property 

143 def value(self) -> str: 

144 """The block's first line.""" 

145 return self.first_line.value 

146 

147 @property 

148 def values(self) -> list[str]: 

149 """The block's lines.""" 

150 return [line.value for line in self.lines] 

151 

152 

153class DocStream: 

154 """A stream of shell code or documentation.""" 

155 

156 def __init__(self, stream: Iterable[str], filename: str = "") -> None: 

157 """Initialize the documentation file. 

158 

159 Parameters: 

160 stream: A text stream. 

161 filename: An optional file name. 

162 """ 

163 self.filepath = None 

164 self.filename = filename 

165 self.sections = _process_blocks(_preprocess_lines(_preprocess_stream(stream))) 

166 

167 

168class DocFile: 

169 """A shell script or documentation file.""" 

170 

171 def __init__(self, path: str) -> None: 

172 """Initialize the documentation file. 

173 

174 Parameters: 

175 path: The path to the file. 

176 """ 

177 self.filepath = path 

178 self.filename = os.path.basename(path) 

179 with open(path, encoding="utf-8") as stream: 

180 try: 

181 self.sections = _process_blocks(_preprocess_lines(_preprocess_stream(stream))) 

182 except UnicodeDecodeError: 

183 logger.error(f"Cannot read file {path}") # noqa: TRY400 

184 self.sections = {} 

185 

186 

187def _preprocess_stream(stream: Iterable[str]) -> Iterator[tuple[str, int, str]]: 

188 name = getattr(stream, "name", "") 

189 for lineno, line in enumerate(stream, 1): 

190 line = line.lstrip(" \t").rstrip("\n") # noqa: PLW2901 

191 if line.startswith("##"): 

192 yield name, lineno, line 

193 

194 

195def _preprocess_lines(lines: Iterable[tuple[str, int, str]]) -> Iterator[DocBlock]: 

196 current_block = DocBlock() 

197 for path, lineno, line in lines: 

198 line = line[3:] # noqa: PLW2901 

199 res = tag_value_regex.search(line) 

200 if res: 

201 tag, value = res.groups() 

202 if current_block and not tag.startswith(current_block.tag + "-"): 

203 yield current_block 

204 current_block = DocBlock() 

205 current_block.append(DocLine(path, lineno, tag, value)) 

206 else: 

207 res = tag_no_value_regex.search(line) 

208 if res: 208 ↛ 209line 208 didn't jump to line 209, because the condition on line 208 was never true

209 tag = res.groups()[0] 

210 if current_block and not tag.startswith(current_block.tag + "-"): 

211 yield current_block 

212 current_block = DocBlock() 

213 current_block.append(DocLine(path, lineno, tag, "")) 

214 else: 

215 current_block.append(DocLine(path, lineno, None, line)) 

216 if current_block: 216 ↛ exitline 216 didn't return from function '_preprocess_lines', because the condition on line 216 was never false

217 yield current_block 

218 

219 

220def _process_blocks(blocks: Iterable[DocBlock]) -> dict[str, list[Tag]]: 

221 sections: dict[str, list[Tag]] = defaultdict(list) 

222 for block in blocks: 

223 tag_class = TAGS.get(block.tag, TAGS[None]) 

224 sections[block.tag].append(tag_class.from_lines(block.lines)) 

225 return dict(sections) 

226 

227 

228def _merge(docs: Sequence[DocStream | DocFile], filename: str) -> DocStream: 

229 final_doc = DocStream(stream=[], filename=filename) 

230 for doc in docs: 

231 for section, values in doc.sections.items(): 

232 if section not in final_doc.sections: 

233 final_doc.sections[section] = [] 

234 final_doc.sections[section].extend(values) 

235 return final_doc