Coverage for tests/test_words.py: 100.00%

31 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-05-05 19:28 +0200

1"""Tests for the `cli` module.""" 

2 

3from __future__ import annotations 

4 

5import pytest 

6 

7from mkdocs_spellcheck.words import get_words 

8 

9 

10@pytest.mark.parametrize("tag", ["p", "em", "div", "article"]) 

11def test_remove_tags(tag: str) -> None: 

12 """Assert tags are removed from HTML text. 

13 

14 Parameters: 

15 tag: Some HTML tag (parametrized). 

16 """ 

17 html = f"<{tag}>Some text.</{tag}><br><hr/>" 

18 words = get_words(html, min_length=1) 

19 assert tag not in words 

20 

21 

22def test_remove_single_tags() -> None: 

23 """Assert single tags like `br` are removed from HTML text.""" 

24 html = "Some text.<br><br/><br /><img /></br>" 

25 words = get_words(html, min_length=1) 

26 assert "br" not in words 

27 assert "img" not in words 

28 

29 

30@pytest.mark.parametrize( 

31 ("text", "known_words", "expected"), 

32 [ 

33 ("hello", {}, ["hello"]), 

34 ("hello", {"hello"}, []), 

35 ("hello", {"world"}, ["hello"]), 

36 ], 

37) 

38def test_ignore_known_words(text: str, known_words: set[str], expected: list[str]) -> None: 

39 """Assert known words are correctly removed. 

40 

41 Parameters: 

42 text: Some text (parametrized). 

43 known_words: Some known words (parametrized). 

44 expected: Expected list result (parametrized). 

45 """ 

46 assert get_words(text, known_words=known_words) == expected 

47 

48 

49@pytest.mark.parametrize( 

50 ("text", "min_length", "expected"), 

51 [ 

52 ("a bb ccc", 0, ["a", "bb", "ccc"]), 

53 ("a bb ccc", 1, ["a", "bb", "ccc"]), 

54 ("a bb ccc", 2, ["bb", "ccc"]), 

55 ("a bb ccc", 3, ["ccc"]), 

56 ("a bb ccc", 4, []), 

57 ], 

58) 

59def test_ignore_too_short_words(text: str, min_length: int, expected: list[str]) -> None: 

60 """Assert known words are correctly removed. 

61 

62 Parameters: 

63 text: Some text (parametrized). 

64 min_length: Minimum word length (parametrized). 

65 expected: Expected list result (parametrized). 

66 """ 

67 assert get_words(text, min_length=min_length) == expected 

68 

69 

70@pytest.mark.parametrize( 

71 ("text", "ignore_code", "expected"), 

72 [ 

73 ("Hello <code>world!<code>", True, ["hello"]), 

74 ("Hello <code>world!<code>", False, ["hello", "world"]), 

75 ], 

76) 

77def test_ignore_text_in_code_tags(text: str, ignore_code: bool, expected: list[str]) -> None: 

78 """Assert known words are correctly removed. 

79 

80 Parameters: 

81 text: Some text (parametrized). 

82 ignore_code: Whether to ignore words in code tags (parametrized). 

83 expected: Expected list result (parametrized). 

84 """ 

85 assert get_words(text, ignore_code=ignore_code) == expected 

86 

87 

88@pytest.mark.parametrize( 

89 ("text", "allow_unicode", "expected"), 

90 [ 

91 ("Hello world! ハローワールド!", True, ["hello", "world", "ハローワールド"]), 

92 ("Hello world! ハローワールド!", False, ["hello", "world"]), 

93 ], 

94) 

95def test_allow_unicode_characters(text: str, allow_unicode: bool, expected: list[str]) -> None: 

96 """Assert known words are correctly removed. 

97 

98 Parameters: 

99 text: Some text (parametrized). 

100 allow_unicode: Whether to allow unicode characters in words (parametrized). 

101 expected: Expected list result (parametrized). 

102 """ 

103 assert get_words(text, allow_unicode=allow_unicode) == expected 

104 

105 

106def test_prevent_words_concatenation() -> None: 

107 """Assert words are not concatenated when removing HTML tags.""" 

108 html = "<p>Hello</p><p>world!</p>" 

109 assert get_words(html) == ["hello", "world"] 

110 

111 

112def test_reset_after_code_endtag() -> None: 

113 """Assert the HTML stripper correctly resets its state after finding a `</code>` end tag.""" 

114 html = "<p>Some</p><code>code</code><p>snippet</p>" 

115 assert "snippet" in get_words(html, ignore_code=True)