You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
yamllint/tests/test_unicode.py

193 lines
6.0 KiB
Python

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# vim:set sw=4 ts=8 et fileencoding=utf8:
# Copyright (C) 2023 Serguei E. Leontiev
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import io
import locale
import os
import shutil
import sys
import unittest
from tests.common import build_temp_workspace
from yamllint import linter
from yamllint.config import YamlLintConfig
CONFIG = """
extends: default
"""
GREEK = """---
greek:
8: [Θ, θ, θήτα, [тета], Т]
20: [Υ, υ, ύψιλον, [ипсилон], И]
"""
GREEK_P = set([('document-end', 4)])
CP1252 = """---
capitals:
1: Reykjavík
2: Tórshavn
"""
CP1252_P = set([('unicode-decode', 0)])
MINIMAL = "m:\n"
MINIMAL_P = set([('document-start', 1),
('document-end', 1)])
FIRST = """Θ:\n"""
FIRST_P = set([('unicode-first-not-ascii', 1),
('document-start', 1),
('document-end', 1)])
ENC = ['utf-8', 'utf-16le', 'utf-16be', 'utf-32le', 'utf-32be']
class UnicodeTestCase(unittest.TestCase):
@classmethod
def fn(cls, enc: str, bom: bool) -> str:
return os.path.join(cls.wd, enc + ("-bom" if bom else "") + ".yml")
@classmethod
def create_file(cls, body: str, enc: str, bom: bool) -> None:
with open(cls.fn(enc, bom), 'w', encoding=enc) as f:
f.write(("\uFEFF" if bom else "") + body)
@classmethod
def setUpClass(cls):
super(UnicodeTestCase, cls).setUpClass()
cls.slc = locale.getlocale(locale.LC_ALL)
cls.cfg = YamlLintConfig('extends: default\n'
'rules: {document-end: {level: warning}}\n'
)
cls.wd = build_temp_workspace({})
for enc in ENC:
cls.create_file(GREEK, enc, True)
cls.create_file(GREEK, enc, False)
cls.create_file(GREEK, 'utf-7', True)
cls.create_file(CP1252, 'cp1252', False)
cls.create_file(MINIMAL, 'ascii', False)
@classmethod
def tearDownClass(cls):
super(UnicodeTestCase, cls).tearDownClass()
shutil.rmtree(cls.wd)
locale.setlocale(locale.LC_ALL, cls.slc)
def run_fobj(self, fobj, exp):
ep = exp.copy()
pcnt = 0
for p in linter.run(fobj, self.cfg):
if (p.rule, p.line) in ep:
ep.remove((p.rule, p.line),)
else:
print('UnicodeTestCase', p.desc, p.line, p.rule)
pcnt += 1
self.assertEqual(len(ep), 0)
self.assertEqual(pcnt, 0)
def run_file(self, lc, enc, bom, exp):
try:
locale.setlocale(locale.LC_ALL, lc)
with open(self.fn(enc, bom)) as f:
self.run_fobj(f, exp)
locale.setlocale(locale.LC_ALL, self.slc)
except locale.Error:
self.skipTest('locale ' + lc + ' not available')
def run_bytes(self, body, enc, bom, buf, exp):
bs = (("\uFEFF" if bom else "") + body).encode(enc)
if buf:
self.run_fobj(io.TextIOWrapper(io.BufferedReader(io.BytesIO(bs))),
exp)
else:
self.run_fobj(io.TextIOWrapper(io.BytesIO(bs)), exp)
def test_file_en_US_UTF_8_utf8_nob(self):
self.run_file('en_US.UTF-8', 'utf-8', False, GREEK_P)
def test_file_ru_RU_CP1251_utf8_nob(self):
self.run_file('ru_RU.CP1251', 'utf-8', False, GREEK_P)
def test_file_en_US_utf8_cp1252(self):
self.run_file('en_US.utf8' if sys.platform.startswith('linux')
else 'en_US.UTF-8',
'cp1252', False, CP1252_P)
def test_file_en_US_ISO8859_1_cp1252(self):
self.run_file('en_US.ISO8859-1', 'cp1252', False, CP1252_P)
def test_file_C_utf8_nob(self):
self.run_file('C', 'utf-8', False, GREEK_P)
def test_file_C_utf8(self):
self.run_file('C', 'utf-8', True, GREEK_P)
def test_file_C_utf16le_nob(self):
self.run_file('C', 'utf-16le', False, GREEK_P)
def test_file_C_utf16le(self):
self.run_file('C', 'utf-16le', True, GREEK_P)
def test_file_C_utf16be_nob(self):
self.run_file('C', 'utf-16be', False, GREEK_P)
def test_file_C_utf16be(self):
self.run_file('C', 'utf-16be', True, GREEK_P)
def test_file_C_utf32le_nob(self):
self.run_file('C', 'utf-32le', False, GREEK_P)
def test_file_C_utf32le(self):
self.run_file('C', 'utf-32le', True, GREEK_P)
def test_file_C_utf32be_nob(self):
self.run_file('C', 'utf-32be', False, GREEK_P)
def test_file_C_utf32be(self):
self.run_file('C', 'utf-32be', True, GREEK_P)
def test_file_C_utf7(self):
self.run_file('C', 'utf-7', True, GREEK_P)
def test_file_minimal_nob(self):
self.run_file('C', 'ascii', False, MINIMAL_P)
def test_bytes_utf8_nob(self):
self.run_bytes(GREEK, 'utf-8', False, False, GREEK_P)
def test_bytes_utf16(self):
# .encode('utf-16') insert BOM automatically
self.run_bytes(GREEK, 'utf-16', False, False, GREEK_P)
def test_bytes_utf32_buf(self):
# .encode('utf-32') insert BOM automatically
self.run_bytes(GREEK, 'utf-32', False, True, GREEK_P)
def test_bytes_minimal_nob(self):
self.run_bytes(MINIMAL, 'ascii', False, False, MINIMAL_P)
def test_bytes_minimal_nob_buf(self):
self.run_bytes(MINIMAL, 'ascii', False, True, MINIMAL_P)
def test_bytes_first_nob(self):
self.run_bytes(FIRST, 'utf-8', False, False, FIRST_P)
def test_bytes_first_nob_buf(self):
self.run_bytes(FIRST, 'utf-8', False, True, FIRST_P)