Coverage for tests/test_words.py: 100.00%
31 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-05-05 19:28 +0200
« prev ^ index » next coverage.py v7.4.4, created at 2024-05-05 19:28 +0200
1"""Tests for the `cli` module."""
3from __future__ import annotations
5import pytest
7from mkdocs_spellcheck.words import get_words
10@pytest.mark.parametrize("tag", ["p", "em", "div", "article"])
11def test_remove_tags(tag: str) -> None:
12 """Assert tags are removed from HTML text.
14 Parameters:
15 tag: Some HTML tag (parametrized).
16 """
17 html = f"<{tag}>Some text.</{tag}><br><hr/>"
18 words = get_words(html, min_length=1)
19 assert tag not in words
22def test_remove_single_tags() -> None:
23 """Assert single tags like `br` are removed from HTML text."""
24 html = "Some text.<br><br/><br /><img /></br>"
25 words = get_words(html, min_length=1)
26 assert "br" not in words
27 assert "img" not in words
30@pytest.mark.parametrize(
31 ("text", "known_words", "expected"),
32 [
33 ("hello", {}, ["hello"]),
34 ("hello", {"hello"}, []),
35 ("hello", {"world"}, ["hello"]),
36 ],
37)
38def test_ignore_known_words(text: str, known_words: set[str], expected: list[str]) -> None:
39 """Assert known words are correctly removed.
41 Parameters:
42 text: Some text (parametrized).
43 known_words: Some known words (parametrized).
44 expected: Expected list result (parametrized).
45 """
46 assert get_words(text, known_words=known_words) == expected
49@pytest.mark.parametrize(
50 ("text", "min_length", "expected"),
51 [
52 ("a bb ccc", 0, ["a", "bb", "ccc"]),
53 ("a bb ccc", 1, ["a", "bb", "ccc"]),
54 ("a bb ccc", 2, ["bb", "ccc"]),
55 ("a bb ccc", 3, ["ccc"]),
56 ("a bb ccc", 4, []),
57 ],
58)
59def test_ignore_too_short_words(text: str, min_length: int, expected: list[str]) -> None:
60 """Assert known words are correctly removed.
62 Parameters:
63 text: Some text (parametrized).
64 min_length: Minimum word length (parametrized).
65 expected: Expected list result (parametrized).
66 """
67 assert get_words(text, min_length=min_length) == expected
70@pytest.mark.parametrize(
71 ("text", "ignore_code", "expected"),
72 [
73 ("Hello <code>world!<code>", True, ["hello"]),
74 ("Hello <code>world!<code>", False, ["hello", "world"]),
75 ],
76)
77def test_ignore_text_in_code_tags(text: str, ignore_code: bool, expected: list[str]) -> None:
78 """Assert known words are correctly removed.
80 Parameters:
81 text: Some text (parametrized).
82 ignore_code: Whether to ignore words in code tags (parametrized).
83 expected: Expected list result (parametrized).
84 """
85 assert get_words(text, ignore_code=ignore_code) == expected
88@pytest.mark.parametrize(
89 ("text", "allow_unicode", "expected"),
90 [
91 ("Hello world! ハローワールド!", True, ["hello", "world", "ハローワールド"]),
92 ("Hello world! ハローワールド!", False, ["hello", "world"]),
93 ],
94)
95def test_allow_unicode_characters(text: str, allow_unicode: bool, expected: list[str]) -> None:
96 """Assert known words are correctly removed.
98 Parameters:
99 text: Some text (parametrized).
100 allow_unicode: Whether to allow unicode characters in words (parametrized).
101 expected: Expected list result (parametrized).
102 """
103 assert get_words(text, allow_unicode=allow_unicode) == expected
106def test_prevent_words_concatenation() -> None:
107 """Assert words are not concatenated when removing HTML tags."""
108 html = "<p>Hello</p><p>world!</p>"
109 assert get_words(html) == ["hello", "world"]
112def test_reset_after_code_endtag() -> None:
113 """Assert the HTML stripper correctly resets its state after finding a `</code>` end tag."""
114 html = "<p>Some</p><code>code</code><p>snippet</p>"
115 assert "snippet" in get_words(html, ignore_code=True)