Coverage for src/shellman/reader.py: 70.62%

137 statements  

« prev     ^ index     » next       coverage.py v7.7.1, created at 2025-03-27 14:35 +0100

1"""Module to read a file/stream and pre-process the documentation lines. 

2 

3Algorithm is as follows: 

4 

51. preprocess_stream: yield documentation lines. 

62. preprocess_lines: group documentation lines as blocks of documentation. 

73. process_blocks: tidy blocks by tag in a dictionary. 

8""" 

9 

10from __future__ import annotations 

11 

12import logging 

13import os 

14import re 

15from collections import defaultdict 

16from typing import TYPE_CHECKING 

17 

18from shellman.tags import TAGS, Tag 

19 

20if TYPE_CHECKING: 

21 from collections.abc import Iterable, Iterator, Sequence 

22 

23logger = logging.getLogger(__name__) 

24 

25tag_value_regex = re.compile(r"^\s*[\\@]([_a-zA-Z][\w-]*)\s+(.+)$") 

26tag_no_value_regex = re.compile(r"^\s*[\\@]([_a-zA-Z][\w-]*)\s*$") 

27 

28 

29class DocType: 

30 """Enumeration of the possible types of documentation.""" 

31 

32 TAG = "T" 

33 """A tag.""" 

34 

35 TAG_VALUE = "TV" 

36 """A tag its value.""" 

37 

38 VALUE = "V" 

39 """A value.""" 

40 

41 INVALID = "I" 

42 """Invalid type.""" 

43 

44 

45class DocLine: 

46 """A documentation line.""" 

47 

48 def __init__(self, path: str, lineno: int, tag: str | None, value: str) -> None: 

49 """Initialize the doc line. 

50 

51 Parameters: 

52 path: The origin file path. 

53 lineno: The line number in the file. 

54 tag: The line's tag, if any. 

55 value: The line's value. 

56 """ 

57 self.path = path 

58 self.lineno = lineno 

59 self.tag = tag or "" 

60 self.value = value 

61 

62 def __str__(self) -> str: 

63 doc_type = self.doc_type 

64 if doc_type == DocType.TAG_VALUE: 

65 s = f'{self.tag}, "{self.value}"' 

66 elif doc_type == DocType.TAG: 

67 s = self.tag 

68 elif doc_type == DocType.VALUE: 

69 s = f'"{self.value}"' 

70 else: 

71 s = "invalid" 

72 return f"{self.path}:{self.lineno}: {doc_type}: {s}" 

73 

74 @property 

75 def doc_type(self) -> str: 

76 """The line's doc type.""" 

77 if self.tag: 

78 if self.value: 

79 return DocType.TAG_VALUE 

80 return DocType.TAG 

81 if self.value is not None: 

82 return DocType.VALUE 

83 return DocType.INVALID 

84 

85 

86class DocBlock: 

87 """A documentation block.""" 

88 

89 def __init__(self, lines: list[DocLine] | None = None) -> None: 

90 """Initialize the doc block. 

91 

92 Parameters: 

93 lines: The block's doc lines. 

94 """ 

95 if lines is None: 95 ↛ 97line 95 didn't jump to line 97 because the condition on line 95 was always true

96 lines = [] 

97 self.lines = lines 

98 

99 def __bool__(self) -> bool: 

100 return bool(self.lines) 

101 

102 def __str__(self) -> str: 

103 return "\n".join([str(line) for line in self.lines]) 

104 

105 def append(self, line: DocLine) -> None: 

106 """Append a line to the block. 

107 

108 Parameters: 

109 line: The doc line to append. 

110 """ 

111 self.lines.append(line) 

112 

113 @property 

114 def doc_type(self) -> str: 

115 """The block type.""" 

116 return self.lines[0].doc_type 

117 

118 @property 

119 def first_line(self) -> DocLine: 

120 """The block's first doc line.""" 

121 return self.lines[0] 

122 

123 @property 

124 def lines_number(self) -> int: 

125 """The number of lines in the block.""" 

126 return len(self.lines) 

127 

128 @property 

129 def path(self) -> str: 

130 """The block's origin file path.""" 

131 return self.first_line.path 

132 

133 @property 

134 def lineno(self) -> int: 

135 """The block's first line number.""" 

136 return self.first_line.lineno 

137 

138 @property 

139 def tag(self) -> str: 

140 """The block's tag.""" 

141 if self.lines: 141 ↛ 143line 141 didn't jump to line 143 because the condition on line 141 was always true

142 return self.first_line.tag 

143 return "" 

144 

145 @property 

146 def value(self) -> str: 

147 """The block's first line.""" 

148 return self.first_line.value 

149 

150 @property 

151 def values(self) -> list[str]: 

152 """The block's lines.""" 

153 return [line.value for line in self.lines] 

154 

155 

156class DocStream: 

157 """A stream of shell code or documentation.""" 

158 

159 def __init__(self, stream: Iterable[str], filename: str = "") -> None: 

160 """Initialize the documentation file. 

161 

162 Parameters: 

163 stream: A text stream. 

164 filename: An optional file name. 

165 """ 

166 self.filepath = None 

167 self.filename = filename 

168 self.sections = _process_blocks(_preprocess_lines(_preprocess_stream(stream))) 

169 

170 

171class DocFile: 

172 """A shell script or documentation file.""" 

173 

174 def __init__(self, path: str) -> None: 

175 """Initialize the documentation file. 

176 

177 Parameters: 

178 path: The path to the file. 

179 """ 

180 self.filepath = path 

181 self.filename = os.path.basename(path) 

182 with open(path, encoding="utf-8") as stream: 

183 try: 

184 self.sections = _process_blocks(_preprocess_lines(_preprocess_stream(stream))) 

185 except UnicodeDecodeError: 

186 logger.error(f"Cannot read file {path}") # noqa: TRY400 

187 self.sections = {} 

188 

189 

190def _preprocess_stream(stream: Iterable[str]) -> Iterator[tuple[str, int, str]]: 

191 name = getattr(stream, "name", "") 

192 for lineno, line in enumerate(stream, 1): 

193 line = line.lstrip(" \t").rstrip("\n") # noqa: PLW2901 

194 if line.startswith("##"): 

195 yield name, lineno, line 

196 

197 

198def _preprocess_lines(lines: Iterable[tuple[str, int, str]]) -> Iterator[DocBlock]: 

199 current_block = DocBlock() 

200 for path, lineno, line in lines: 

201 line = line[3:] # noqa: PLW2901 

202 res = tag_value_regex.search(line) 

203 if res: 

204 tag, value = res.groups() 

205 if current_block and not tag.startswith(current_block.tag + "-"): 

206 yield current_block 

207 current_block = DocBlock() 

208 current_block.append(DocLine(path, lineno, tag, value)) 

209 else: 

210 res = tag_no_value_regex.search(line) 

211 if res: 211 ↛ 212line 211 didn't jump to line 212 because the condition on line 211 was never true

212 tag = res.groups()[0] 

213 if current_block and not tag.startswith(current_block.tag + "-"): 

214 yield current_block 

215 current_block = DocBlock() 

216 current_block.append(DocLine(path, lineno, tag, "")) 

217 else: 

218 current_block.append(DocLine(path, lineno, None, line)) 

219 if current_block: 219 ↛ exitline 219 didn't return from function '_preprocess_lines' because the condition on line 219 was always true

220 yield current_block 

221 

222 

223def _process_blocks(blocks: Iterable[DocBlock]) -> dict[str, list[Tag]]: 

224 sections: dict[str, list[Tag]] = defaultdict(list) 

225 for block in blocks: 

226 tag_class = TAGS.get(block.tag, TAGS[None]) 

227 sections[block.tag].append(tag_class.from_lines(block.lines)) 

228 return dict(sections) 

229 

230 

231def _merge(docs: Sequence[DocStream | DocFile], filename: str) -> DocStream: 

232 final_doc = DocStream(stream=[], filename=filename) 

233 for doc in docs: 

234 for section, values in doc.sections.items(): 

235 if section not in final_doc.sections: 

236 final_doc.sections[section] = [] 

237 final_doc.sections[section].extend(values) 

238 return final_doc