From 7a7d98c96adf0e56ba326d005a274532766f8289 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrien=20Verg=C3=A9?= Date: Sat, 25 Jun 2016 13:54:42 +0200 Subject: [PATCH] parser: Iterate over lines + tokens + comments Instead of iterating over lines and tokens (and find comments between tokens in the comment rules), add a new `Comment` type and set rules with `type = 'comment'`. --- tests/rules/test_comments_indentation.py | 14 +++- tests/rules/test_common.py | 54 +-------------- tests/rules/test_indentation.py | 5 +- tests/test_parser.py | 58 ++++++++++++---- yamllint/linter.py | 10 ++- yamllint/parser.py | 87 +++++++++++++++++++++--- yamllint/rules/comments.py | 36 ++++------ yamllint/rules/comments_indentation.py | 58 ++++++++++------ yamllint/rules/common.py | 29 -------- 9 files changed, 200 insertions(+), 151 deletions(-) diff --git a/tests/rules/test_comments_indentation.py b/tests/rules/test_comments_indentation.py index 4e1b9f4..149b6b2 100644 --- a/tests/rules/test_comments_indentation.py +++ b/tests/rules/test_comments_indentation.py @@ -58,7 +58,7 @@ class CommentsIndentationTestCase(RuleTestCase): '# line 2\n', conf, problem=(2, 2)) self.check('---\n' ' # line 1\n' - ' # line 2\n', conf, problem1=(2, 3), problem2=(3, 3)) + ' # line 2\n', conf, problem1=(2, 3)) self.check('---\n' 'obj:\n' ' # normal\n' @@ -143,3 +143,15 @@ class CommentsIndentationTestCase(RuleTestCase): '# hey\n' '# normal\n' ' #\n', conf, problem=(4, 2)) + + def test_inline_comment(self): + conf = 'comments-indentation: enable' + self.check('---\n' + '- a # inline\n' + '# ok\n', conf) + self.check('---\n' + '- a # inline\n' + ' # not ok\n', conf, problem=(3, 2)) + self.check('---\n' + ' # not ok\n' + '- a # inline\n', conf, problem=(2, 2)) diff --git a/tests/rules/test_common.py b/tests/rules/test_common.py index 1588914..87db047 100644 --- a/tests/rules/test_common.py +++ b/tests/rules/test_common.py @@ -18,8 +18,7 @@ import unittest import yaml -from yamllint.rules.common import (Comment, get_line_indent, - get_comments_between_tokens) +from yamllint.rules.common import get_line_indent class CommonTestCase(unittest.TestCase): @@ -43,54 +42,3 @@ class CommonTestCase(unittest.TestCase): self.assertEqual(get_line_indent(tokens[i]), 0) for i in (13, 16, 18, 22, 24): self.assertEqual(get_line_indent(tokens[i]), 2) - - def check_comments(self, buffer, *expected): - yaml_loader = yaml.BaseLoader(buffer) - - comments = [] - - next = yaml_loader.peek_token() - while next is not None: - curr = yaml_loader.get_token() - next = yaml_loader.peek_token() - for comment in get_comments_between_tokens(curr, next): - comments.append(comment) - - self.assertEqual(comments, list(expected)) - - def test_get_comments_between_tokens(self): - self.check_comments('# comment\n', - Comment(1, 1, '# comment', 0)) - self.check_comments('---\n' - '# comment\n' - '...\n', - Comment(2, 1, '# comment', 0)) - self.check_comments('---\n' - '# no newline char', - Comment(2, 1, '# no newline char', 0)) - self.check_comments('# just comment', - Comment(1, 1, '# just comment', 0)) - self.check_comments('\n' - ' # indented comment\n', - Comment(2, 4, '# indented comment', 0)) - self.check_comments('\n' - '# trailing spaces \n', - Comment(2, 1, '# trailing spaces ', 0)) - self.check_comments('# comment one\n' - '\n' - 'key: val # key=val\n' - '\n' - '# this is\n' - '# a block \n' - '# comment\n' - '\n' - 'other:\n' - ' - foo # equals\n' - ' # bar\n', - Comment(1, 1, '# comment one', 0), - Comment(3, 11, '# key=val', 0), - Comment(5, 1, '# this is', 0), - Comment(6, 1, '# a block ', 0), - Comment(7, 1, '# comment', 0), - Comment(10, 10, '# equals', 0), - Comment(11, 10, '# bar', 0)) diff --git a/tests/rules/test_indentation.py b/tests/rules/test_indentation.py index 12a3c8a..cbbc979 100644 --- a/tests/rules/test_indentation.py +++ b/tests/rules/test_indentation.py @@ -15,7 +15,7 @@ # along with this program. If not, see . from tests.common import RuleTestCase -from yamllint.parser import token_generator +from yamllint.parser import token_or_comment_generator, Comment from yamllint.rules.indentation import check @@ -38,7 +38,8 @@ class IndentationStackTestCase(RuleTestCase): 'check-multi-line-strings': False} context = {} output = '' - for elem in token_generator(source): + for elem in [t for t in token_or_comment_generator(source) + if not isinstance(t, Comment)]: list(check(conf, elem.curr, elem.prev, elem.next, elem.nextnext, context)) diff --git a/tests/test_parser.py b/tests/test_parser.py index 1c46f66..2ed5d25 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -18,8 +18,9 @@ import unittest import yaml -from yamllint.parser import (line_generator, token_generator, - token_or_line_generator, Line, Token) +from yamllint.parser import (line_generator, token_or_comment_generator, + token_or_comment_or_line_generator, + Line, Token, Comment) class ParserTestCase(unittest.TestCase): @@ -61,8 +62,8 @@ class ParserTestCase(unittest.TestCase): self.assertEqual(e[2].line_no, 3) self.assertEqual(e[2].content, 'at the end') - def test_token_generator(self): - e = list(token_generator('')) + def test_token_or_comment_generator(self): + e = list(token_or_comment_generator('')) self.assertEqual(len(e), 2) self.assertEqual(e[0].prev, None) self.assertIsInstance(e[0].curr, yaml.Token) @@ -71,16 +72,48 @@ class ParserTestCase(unittest.TestCase): self.assertEqual(e[1].curr, e[0].next) self.assertEqual(e[1].next, None) - e = list(token_generator('---\n' - 'k: v\n')) + e = list(token_or_comment_generator('---\n' + 'k: v\n')) self.assertEqual(len(e), 9) self.assertIsInstance(e[3].curr, yaml.KeyToken) self.assertIsInstance(e[5].curr, yaml.ValueToken) - def test_token_or_line_generator(self): - e = list(token_or_line_generator('---\n' - 'k: v\n')) - self.assertEqual(len(e), 12) + e = list(token_or_comment_generator('# start comment\n' + '- a\n' + '- key: val # key=val\n' + '# this is\n' + '# a block \n' + '# comment\n' + '- c\n' + '# end comment\n')) + self.assertEqual(len(e), 21) + self.assertIsInstance(e[1], Comment) + self.assertEqual(e[1], Comment(1, 1, '# start comment', 0)) + self.assertEqual(e[11], Comment(3, 13, '# key=val', 0)) + self.assertEqual(e[12], Comment(4, 1, '# this is', 0)) + self.assertEqual(e[13], Comment(5, 1, '# a block ', 0)) + self.assertEqual(e[14], Comment(6, 1, '# comment', 0)) + self.assertEqual(e[18], Comment(8, 1, '# end comment', 0)) + + e = list(token_or_comment_generator('---\n' + '# no newline char')) + self.assertEqual(e[2], Comment(2, 1, '# no newline char', 0)) + + e = list(token_or_comment_generator('# just comment')) + self.assertEqual(e[1], Comment(1, 1, '# just comment', 0)) + + e = list(token_or_comment_generator('\n' + ' # indented comment\n')) + self.assertEqual(e[1], Comment(2, 4, '# indented comment', 0)) + + e = list(token_or_comment_generator('\n' + '# trailing spaces \n')) + self.assertEqual(e[1], Comment(2, 1, '# trailing spaces ', 0)) + + def test_token_or_comment_or_line_generator(self): + e = list(token_or_comment_or_line_generator('---\n' + 'k: v # k=v\n')) + self.assertEqual(len(e), 13) self.assertIsInstance(e[0], Token) self.assertIsInstance(e[0].curr, yaml.StreamStartToken) self.assertIsInstance(e[1], Token) @@ -89,5 +122,6 @@ class ParserTestCase(unittest.TestCase): self.assertIsInstance(e[3].curr, yaml.BlockMappingStartToken) self.assertIsInstance(e[4].curr, yaml.KeyToken) self.assertIsInstance(e[6].curr, yaml.ValueToken) - self.assertIsInstance(e[8], Line) - self.assertIsInstance(e[11], Line) + self.assertIsInstance(e[8], Comment) + self.assertIsInstance(e[9], Line) + self.assertIsInstance(e[12], Line) diff --git a/yamllint/linter.py b/yamllint/linter.py index b8fb9e9..98919d2 100644 --- a/yamllint/linter.py +++ b/yamllint/linter.py @@ -56,13 +56,14 @@ def get_costemic_problems(buffer, conf): # Split token rules from line rules token_rules = [r for r in rules if r.TYPE == 'token'] + comment_rules = [r for r in rules if r.TYPE == 'comment'] line_rules = [r for r in rules if r.TYPE == 'line'] context = {} for rule in token_rules: context[rule.ID] = {} - for elem in parser.token_or_line_generator(buffer): + for elem in parser.token_or_comment_or_line_generator(buffer): if isinstance(elem, parser.Token): for rule in token_rules: rule_conf = conf.rules[rule.ID] @@ -73,6 +74,13 @@ def get_costemic_problems(buffer, conf): problem.rule = rule.ID problem.level = rule_conf['level'] yield problem + elif isinstance(elem, parser.Comment): + for rule in comment_rules: + rule_conf = conf.rules[rule.ID] + for problem in rule.check(rule_conf, elem): + problem.rule = rule.ID + problem.level = rule_conf['level'] + yield problem elif isinstance(elem, parser.Line): for rule in line_rules: rule_conf = conf.rules[rule.ID] diff --git a/yamllint/parser.py b/yamllint/parser.py index eccfb42..a757aa9 100644 --- a/yamllint/parser.py +++ b/yamllint/parser.py @@ -38,6 +38,40 @@ class Token(object): self.nextnext = nextnext +class Comment(object): + def __init__(self, line_no, column_no, buffer, pointer, + token_before=None, token_after=None, comment_before=None): + self.line_no = line_no + self.column_no = column_no + self.buffer = buffer + self.pointer = pointer + self.token_before = token_before + self.token_after = token_after + self.comment_before = comment_before + + def __repr__(self): + end = self.buffer.find('\n', self.pointer) + if end == -1: + end = self.buffer.find('\0', self.pointer) + if end != -1: + return self.buffer[self.pointer:end] + return self.buffer[self.pointer:] + + def __eq__(self, other): + return (isinstance(other, Comment) and + self.line_no == other.line_no and + self.column_no == other.column_no and + str(self) == str(other)) + + def is_inline(self): + return ( + not isinstance(self.token_before, yaml.StreamStartToken) and + self.line_no == self.token_before.end_mark.line + 1 and + # sometimes token end marks are on the next line + self.buffer[self.token_before.end_mark.pointer - 1] != '\n' + ) + + def line_generator(buffer): line_no = 1 cur = 0 @@ -51,7 +85,39 @@ def line_generator(buffer): yield Line(line_no, buffer, start=cur, end=len(buffer)) -def token_generator(buffer): +def comments_between_tokens(token1, token2): + """Find all comments between two tokens""" + if token2 is None: + buf = token1.end_mark.buffer[token1.end_mark.pointer:] + elif (token1.end_mark.line == token2.start_mark.line and + not isinstance(token1, yaml.StreamStartToken) and + not isinstance(token2, yaml.StreamEndToken)): + return + else: + buf = token1.end_mark.buffer[token1.end_mark.pointer: + token2.start_mark.pointer] + + line_no = token1.end_mark.line + 1 + column_no = token1.end_mark.column + 1 + pointer = token1.end_mark.pointer + + comment_before = None + for line in buf.split('\n'): + pos = line.find('#') + if pos != -1: + comment = Comment(line_no, column_no + pos, + token1.end_mark.buffer, pointer + pos, + token1, token2, comment_before) + yield comment + + comment_before = comment + + pointer += len(line) + 1 + line_no += 1 + column_no = 1 + + +def token_or_comment_generator(buffer): yaml_loader = yaml.BaseLoader(buffer) try: @@ -63,6 +129,9 @@ def token_generator(buffer): yield Token(curr.start_mark.line + 1, curr, prev, next, nextnext) + for comment in comments_between_tokens(curr, next): + yield comment + prev = curr curr = next @@ -70,19 +139,19 @@ def token_generator(buffer): pass -def token_or_line_generator(buffer): +def token_or_comment_or_line_generator(buffer): """Generator that mixes tokens and lines, ordering them by line number""" - token_gen = token_generator(buffer) + tok_or_com_gen = token_or_comment_generator(buffer) line_gen = line_generator(buffer) - token = next(token_gen, None) + tok_or_com = next(tok_or_com_gen, None) line = next(line_gen, None) - while token is not None or line is not None: - if token is None or (line is not None and - token.line_no > line.line_no): + while tok_or_com is not None or line is not None: + if tok_or_com is None or (line is not None and + tok_or_com.line_no > line.line_no): yield line line = next(line_gen, None) else: - yield token - token = next(token_gen, None) + yield tok_or_com + tok_or_com = next(tok_or_com_gen, None) diff --git a/yamllint/rules/comments.py b/yamllint/rules/comments.py index af58de8..e848a4c 100644 --- a/yamllint/rules/comments.py +++ b/yamllint/rules/comments.py @@ -55,33 +55,25 @@ Use this rule to control the position and formatting of comments. """ -import yaml - from yamllint.linter import LintProblem -from yamllint.rules.common import get_comments_between_tokens ID = 'comments' -TYPE = 'token' +TYPE = 'comment' CONF = {'require-starting-space': bool, 'min-spaces-from-content': int} -def check(conf, token, prev, next, nextnext, context): - for comment in get_comments_between_tokens(token, next): - if (conf['min-spaces-from-content'] != -1 and - not isinstance(token, yaml.StreamStartToken) and - comment.line == token.end_mark.line + 1): - # Sometimes token end marks are on the next line - if token.end_mark.buffer[token.end_mark.pointer - 1] != '\n': - if (comment.pointer - token.end_mark.pointer < - conf['min-spaces-from-content']): - yield LintProblem(comment.line, comment.column, - 'too few spaces before comment') - - if (conf['require-starting-space'] and - comment.pointer + 1 < len(comment.buffer) and - comment.buffer[comment.pointer + 1] != ' ' and - comment.buffer[comment.pointer + 1] != '\n'): - yield LintProblem(comment.line, comment.column + 1, - 'missing starting space in comment') +def check(conf, comment): + if (conf['min-spaces-from-content'] != -1 and comment.is_inline() and + comment.pointer - comment.token_before.end_mark.pointer < + conf['min-spaces-from-content']): + yield LintProblem(comment.line_no, comment.column_no, + 'too few spaces before comment') + + if (conf['require-starting-space'] and + comment.pointer + 1 < len(comment.buffer) and + comment.buffer[comment.pointer + 1] != ' ' and + comment.buffer[comment.pointer + 1] != '\n'): + yield LintProblem(comment.line_no, comment.column_no + 1, + 'missing starting space in comment') diff --git a/yamllint/rules/comments_indentation.py b/yamllint/rules/comments_indentation.py index 52923dc..22ab55d 100644 --- a/yamllint/rules/comments_indentation.py +++ b/yamllint/rules/comments_indentation.py @@ -78,11 +78,11 @@ Use this rule to force comments to be indented like content. import yaml from yamllint.linter import LintProblem -from yamllint.rules.common import get_line_indent, get_comments_between_tokens +from yamllint.rules.common import get_line_indent ID = 'comments-indentation' -TYPE = 'token' +TYPE = 'comment' # Case A: @@ -98,28 +98,42 @@ TYPE = 'token' # # commented line 2 # current: line -def check(conf, token, prev, next, nextnext, context): - if prev is None: +def check(conf, comment): + # Only check block comments + if (not isinstance(comment.token_before, yaml.StreamStartToken) and + comment.token_before.end_mark.line + 1 == comment.line_no): return - curr_line_indent = token.start_mark.column - if isinstance(token, yaml.StreamEndToken): - curr_line_indent = 0 + next_line_indent = comment.token_after.start_mark.column + if isinstance(comment.token_after, yaml.StreamEndToken): + next_line_indent = 0 - skip_first_line = True - if isinstance(prev, yaml.StreamStartToken): - skip_first_line = False + if isinstance(comment.token_before, yaml.StreamStartToken): prev_line_indent = 0 else: - prev_line_indent = get_line_indent(prev) - - if prev_line_indent <= curr_line_indent: - prev_line_indent = -1 # disable it - - for comment in get_comments_between_tokens( - prev, token, skip_first_line=skip_first_line): - if comment.column - 1 == curr_line_indent: - prev_line_indent = -1 # disable it - elif comment.column - 1 != prev_line_indent: - yield LintProblem(comment.line, comment.column, - 'comment not indented like content') + prev_line_indent = get_line_indent(comment.token_before) + + # In the following case only the next line indent is valid: + # list: + # # comment + # - 1 + # - 2 + if prev_line_indent <= next_line_indent: + prev_line_indent = next_line_indent + + # If two indents are valid but a previous comment went back to normal + # indent, for the next ones to do the same. In other words, avoid this: + # list: + # - 1 + # # comment on valid indent (0) + # # comment on valid indent (4) + # other-list: + # - 2 + if (comment.comment_before is not None and + not comment.comment_before.is_inline()): + prev_line_indent = comment.comment_before.column_no - 1 + + if (comment.column_no - 1 != prev_line_indent and + comment.column_no - 1 != next_line_indent): + yield LintProblem(comment.line_no, comment.column_no, + 'comment not indented like content') diff --git a/yamllint/rules/common.py b/yamllint/rules/common.py index e343e62..07baced 100644 --- a/yamllint/rules/common.py +++ b/yamllint/rules/common.py @@ -98,35 +98,6 @@ def get_real_end_line(token): return end_line -def get_comments_between_tokens(token1, token2, skip_first_line=False): - if token2 is None: - buf = token1.end_mark.buffer[token1.end_mark.pointer:] - elif (token1.end_mark.line == token2.start_mark.line and - not isinstance(token1, yaml.StreamStartToken) and - not isinstance(token2, yaml.StreamEndToken)): - return - else: - buf = token1.end_mark.buffer[token1.end_mark.pointer: - token2.start_mark.pointer] - - line_no = token1.end_mark.line + 1 - column_no = token1.end_mark.column + 1 - pointer = token1.end_mark.pointer - - for line in buf.split('\n'): - if skip_first_line: - skip_first_line = False - else: - pos = line.find('#') - if pos != -1: - yield Comment(line_no, column_no + pos, - token1.end_mark.buffer, pointer + pos) - - pointer += len(line) + 1 - line_no += 1 - column_no = 1 - - def is_explicit_key(token): # explicit key: # ? key