|
|
|
@ -26,19 +26,34 @@ from tests.common import build_temp_workspace
|
|
|
|
|
from yamllint import linter
|
|
|
|
|
from yamllint.config import YamlLintConfig
|
|
|
|
|
|
|
|
|
|
CONFIG = """
|
|
|
|
|
extends: default
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
GREEK = """---
|
|
|
|
|
greek:
|
|
|
|
|
8: [Θ, θ, θήτα, [тета], Т]
|
|
|
|
|
20: [Υ, υ, ύψιλον, [ипсилон], И]
|
|
|
|
|
"""
|
|
|
|
|
GREEK_P = set([('document-end', 4)])
|
|
|
|
|
|
|
|
|
|
CP1252 = """---
|
|
|
|
|
capitals:
|
|
|
|
|
1: Reykjavík
|
|
|
|
|
2: Tórshavn
|
|
|
|
|
"""
|
|
|
|
|
CP1252_P = set([('unicode-decode', 0)])
|
|
|
|
|
|
|
|
|
|
MINIMAL = "m:\n"
|
|
|
|
|
MINIMAL_P = set([('document-start', 1),
|
|
|
|
|
('document-end', 1)])
|
|
|
|
|
|
|
|
|
|
ENC = ['utf-8']
|
|
|
|
|
FIRST = """Θ:\n"""
|
|
|
|
|
FIRST_P = set([('unicode-first-not-ascii', 1),
|
|
|
|
|
('document-start', 1),
|
|
|
|
|
('document-end', 1)])
|
|
|
|
|
|
|
|
|
|
ENC = ['utf-8', 'utf-16le', 'utf-16be', 'utf-32le', 'utf-32be']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class UnicodeTestCase(unittest.TestCase):
|
|
|
|
@ -63,7 +78,9 @@ class UnicodeTestCase(unittest.TestCase):
|
|
|
|
|
for enc in ENC:
|
|
|
|
|
cls.create_file(GREEK, enc, True)
|
|
|
|
|
cls.create_file(GREEK, enc, False)
|
|
|
|
|
cls.create_file(GREEK, 'utf-7', True)
|
|
|
|
|
cls.create_file(CP1252, 'cp1252', False)
|
|
|
|
|
cls.create_file(MINIMAL, 'ascii', False)
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
def tearDownClass(cls):
|
|
|
|
@ -72,55 +89,104 @@ class UnicodeTestCase(unittest.TestCase):
|
|
|
|
|
shutil.rmtree(cls.wd)
|
|
|
|
|
locale.setlocale(locale.LC_ALL, cls.slc)
|
|
|
|
|
|
|
|
|
|
def run_fobj(self, fobj):
|
|
|
|
|
decnt = 0
|
|
|
|
|
def run_fobj(self, fobj, exp):
|
|
|
|
|
ep = exp.copy()
|
|
|
|
|
pcnt = 0
|
|
|
|
|
for p in linter.run(fobj, self.cfg):
|
|
|
|
|
if p.rule == 'document-end' or p.line == 4:
|
|
|
|
|
decnt += 1
|
|
|
|
|
if (p.rule, p.line) in ep:
|
|
|
|
|
ep.remove((p.rule, p.line),)
|
|
|
|
|
else:
|
|
|
|
|
print('UnicodeTestCase', p.desc, p.line, p.rule)
|
|
|
|
|
pcnt += 1
|
|
|
|
|
self.assertEqual(decnt, 1)
|
|
|
|
|
self.assertEqual(len(ep), 0)
|
|
|
|
|
self.assertEqual(pcnt, 0)
|
|
|
|
|
|
|
|
|
|
def run_file(self, lc, enc, bom):
|
|
|
|
|
def run_file(self, lc, enc, bom, exp):
|
|
|
|
|
try:
|
|
|
|
|
locale.setlocale(locale.LC_ALL, lc)
|
|
|
|
|
with open(self.fn(enc, bom)) as f:
|
|
|
|
|
self.run_fobj(f)
|
|
|
|
|
self.run_fobj(f, exp)
|
|
|
|
|
locale.setlocale(locale.LC_ALL, self.slc)
|
|
|
|
|
except locale.Error:
|
|
|
|
|
self.skipTest('locale ' + lc + ' not available')
|
|
|
|
|
|
|
|
|
|
def run_bytes(self, body, enc, bom, buf):
|
|
|
|
|
def run_bytes(self, body, enc, bom, buf, exp):
|
|
|
|
|
bs = (("\uFEFF" if bom else "") + body).encode(enc)
|
|
|
|
|
if buf:
|
|
|
|
|
self.run_fobj(io.TextIOWrapper(io.BufferedReader(io.BytesIO(bs))))
|
|
|
|
|
self.run_fobj(io.TextIOWrapper(io.BufferedReader(io.BytesIO(bs))),
|
|
|
|
|
exp)
|
|
|
|
|
else:
|
|
|
|
|
self.run_fobj(io.TextIOWrapper(io.BytesIO(bs)))
|
|
|
|
|
self.run_fobj(io.TextIOWrapper(io.BytesIO(bs)), exp)
|
|
|
|
|
|
|
|
|
|
def test_file_en_US_UTF_8_utf8_nob(self):
|
|
|
|
|
self.run_file('en_US.UTF-8', 'utf-8', False)
|
|
|
|
|
self.run_file('en_US.UTF-8', 'utf-8', False, GREEK_P)
|
|
|
|
|
|
|
|
|
|
def test_file_ru_RU_CP1251_utf8_nob(self):
|
|
|
|
|
self.run_file('ru_RU.CP1251', 'utf-8', False)
|
|
|
|
|
self.run_file('ru_RU.CP1251', 'utf-8', False, GREEK_P)
|
|
|
|
|
|
|
|
|
|
@unittest.expectedFailure
|
|
|
|
|
def test_file_en_US_utf8_cp1252(self):
|
|
|
|
|
self.run_file('en_US.utf8' if sys.platform.startswith('linux')
|
|
|
|
|
else 'en_US.UTF-8',
|
|
|
|
|
'cp1252', False)
|
|
|
|
|
'cp1252', False, CP1252_P)
|
|
|
|
|
|
|
|
|
|
@unittest.expectedFailure
|
|
|
|
|
def test_file_en_US_ISO8859_1_cp1252(self):
|
|
|
|
|
self.run_file('en_US.ISO8859-1', 'cp1252', False)
|
|
|
|
|
self.run_file('en_US.ISO8859-1', 'cp1252', False, CP1252_P)
|
|
|
|
|
|
|
|
|
|
def test_file_C_utf8_nob(self):
|
|
|
|
|
self.run_file('C', 'utf-8', False)
|
|
|
|
|
self.run_file('C', 'utf-8', False, GREEK_P)
|
|
|
|
|
|
|
|
|
|
def test_file_C_utf8(self):
|
|
|
|
|
self.run_file('C', 'utf-8', True)
|
|
|
|
|
self.run_file('C', 'utf-8', True, GREEK_P)
|
|
|
|
|
|
|
|
|
|
def test_file_C_utf16le_nob(self):
|
|
|
|
|
self.run_file('C', 'utf-16le', False, GREEK_P)
|
|
|
|
|
|
|
|
|
|
def test_file_C_utf16le(self):
|
|
|
|
|
self.run_file('C', 'utf-16le', True, GREEK_P)
|
|
|
|
|
|
|
|
|
|
def test_file_C_utf16be_nob(self):
|
|
|
|
|
self.run_file('C', 'utf-16be', False, GREEK_P)
|
|
|
|
|
|
|
|
|
|
def test_file_C_utf16be(self):
|
|
|
|
|
self.run_file('C', 'utf-16be', True, GREEK_P)
|
|
|
|
|
|
|
|
|
|
def test_file_C_utf32le_nob(self):
|
|
|
|
|
self.run_file('C', 'utf-32le', False, GREEK_P)
|
|
|
|
|
|
|
|
|
|
def test_file_C_utf32le(self):
|
|
|
|
|
self.run_file('C', 'utf-32le', True, GREEK_P)
|
|
|
|
|
|
|
|
|
|
def test_file_C_utf32be_nob(self):
|
|
|
|
|
self.run_file('C', 'utf-32be', False, GREEK_P)
|
|
|
|
|
|
|
|
|
|
def test_file_C_utf32be(self):
|
|
|
|
|
self.run_file('C', 'utf-32be', True, GREEK_P)
|
|
|
|
|
|
|
|
|
|
def test_file_C_utf7(self):
|
|
|
|
|
self.run_file('C', 'utf-7', True, GREEK_P)
|
|
|
|
|
|
|
|
|
|
def test_file_minimal_nob(self):
|
|
|
|
|
self.run_file('C', 'ascii', False, MINIMAL_P)
|
|
|
|
|
|
|
|
|
|
def test_bytes_utf8_nob(self):
|
|
|
|
|
self.run_bytes(GREEK, 'utf-8', False, False)
|
|
|
|
|
self.run_bytes(GREEK, 'utf-8', False, False, GREEK_P)
|
|
|
|
|
|
|
|
|
|
def test_bytes_utf16(self):
|
|
|
|
|
# .encode('utf-16') insert BOM automatically
|
|
|
|
|
self.run_bytes(GREEK, 'utf-16', False, False, GREEK_P)
|
|
|
|
|
|
|
|
|
|
def test_bytes_utf32_buf(self):
|
|
|
|
|
# .encode('utf-32') insert BOM automatically
|
|
|
|
|
self.run_bytes(GREEK, 'utf-32', False, True, GREEK_P)
|
|
|
|
|
|
|
|
|
|
def test_bytes_minimal_nob(self):
|
|
|
|
|
self.run_bytes(MINIMAL, 'ascii', False, False, MINIMAL_P)
|
|
|
|
|
|
|
|
|
|
def test_bytes_minimal_nob_buf(self):
|
|
|
|
|
self.run_bytes(MINIMAL, 'ascii', False, True, MINIMAL_P)
|
|
|
|
|
|
|
|
|
|
def test_bytes_first_nob(self):
|
|
|
|
|
self.run_bytes(FIRST, 'utf-8', False, False, FIRST_P)
|
|
|
|
|
|
|
|
|
|
def test_bytes_first_nob_buf(self):
|
|
|
|
|
self.run_bytes(FIRST, 'utf-8', False, True, FIRST_P)
|
|
|
|
|