parser: Iterate over lines + tokens + comments

Instead of iterating over lines and tokens (and find comments between tokens in the comment rules), add a new `Comment` type and set rules with `type = 'comment'`.
2016-06-25 13:54:42 +02:00
parent 9f99f25db5
commit 7a7d98c96a
9 changed files with 197 additions and 148 deletions
@@ -58,7 +58,7 @@ class CommentsIndentationTestCase(RuleTestCase):
                   '# line 2\n', conf, problem=(2, 2))
        self.check('---\n'
                   '  # line 1\n'
-                   '  # line 2\n', conf, problem1=(2, 3), problem2=(3, 3))
+                   '  # line 2\n', conf, problem1=(2, 3))
        self.check('---\n'
                   'obj:\n'
                   '  # normal\n'
@@ -143,3 +143,15 @@ class CommentsIndentationTestCase(RuleTestCase):
                   '# hey\n'
                   '# normal\n'
                   ' #\n', conf, problem=(4, 2))
+
+    def test_inline_comment(self):
+        conf = 'comments-indentation: enable'
+        self.check('---\n'
+                   '- a  # inline\n'
+                   '# ok\n', conf)
+        self.check('---\n'
+                   '- a  # inline\n'
+                   ' # not ok\n', conf, problem=(3, 2))
+        self.check('---\n'
+                   ' # not ok\n'
+                   '- a  # inline\n', conf, problem=(2, 2))
@@ -18,8 +18,7 @@ import unittest

 import yaml

-from yamllint.rules.common import (Comment, get_line_indent,
-                                   get_comments_between_tokens)
+from yamllint.rules.common import get_line_indent


 class CommonTestCase(unittest.TestCase):
@@ -43,54 +42,3 @@ class CommonTestCase(unittest.TestCase):
            self.assertEqual(get_line_indent(tokens[i]), 0)
        for i in (13, 16, 18, 22, 24):
            self.assertEqual(get_line_indent(tokens[i]), 2)
-
-    def check_comments(self, buffer, *expected):
-        yaml_loader = yaml.BaseLoader(buffer)
-
-        comments = []
-
-        next = yaml_loader.peek_token()
-        while next is not None:
-            curr = yaml_loader.get_token()
-            next = yaml_loader.peek_token()
-            for comment in get_comments_between_tokens(curr, next):
-                comments.append(comment)
-
-        self.assertEqual(comments, list(expected))
-
-    def test_get_comments_between_tokens(self):
-        self.check_comments('# comment\n',
-                            Comment(1, 1, '# comment', 0))
-        self.check_comments('---\n'
-                            '# comment\n'
-                            '...\n',
-                            Comment(2, 1, '# comment', 0))
-        self.check_comments('---\n'
-                            '# no newline char',
-                            Comment(2, 1, '# no newline char', 0))
-        self.check_comments('# just comment',
-                            Comment(1, 1, '# just comment', 0))
-        self.check_comments('\n'
-                            '   # indented comment\n',
-                            Comment(2, 4, '# indented comment', 0))
-        self.check_comments('\n'
-                            '# trailing spaces    \n',
-                            Comment(2, 1, '# trailing spaces    ', 0))
-        self.check_comments('# comment one\n'
-                            '\n'
-                            'key: val  # key=val\n'
-                            '\n'
-                            '# this is\n'
-                            '# a block     \n'
-                            '# comment\n'
-                            '\n'
-                            'other:\n'
-                            '  - foo  # equals\n'
-                            '         # bar\n',
-                            Comment(1, 1, '# comment one', 0),
-                            Comment(3, 11, '# key=val', 0),
-                            Comment(5, 1, '# this is', 0),
-                            Comment(6, 1, '# a block     ', 0),
-                            Comment(7, 1, '# comment', 0),
-                            Comment(10, 10, '# equals', 0),
-                            Comment(11, 10, '# bar', 0))
@@ -15,7 +15,7 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.

 from tests.common import RuleTestCase
-from yamllint.parser import token_generator
+from yamllint.parser import token_or_comment_generator, Comment
 from yamllint.rules.indentation import check


@@ -38,7 +38,8 @@ class IndentationStackTestCase(RuleTestCase):
                'check-multi-line-strings': False}
        context = {}
        output = ''
-        for elem in token_generator(source):
+        for elem in [t for t in token_or_comment_generator(source)
+                     if not isinstance(t, Comment)]:
            list(check(conf, elem.curr, elem.prev, elem.next, elem.nextnext,
                       context))

@@ -18,8 +18,9 @@ import unittest

 import yaml

-from yamllint.parser import (line_generator, token_generator,
-                             token_or_line_generator, Line, Token)
+from yamllint.parser import (line_generator, token_or_comment_generator,
+                             token_or_comment_or_line_generator,
+                             Line, Token, Comment)


 class ParserTestCase(unittest.TestCase):
@@ -61,8 +62,8 @@ class ParserTestCase(unittest.TestCase):
        self.assertEqual(e[2].line_no, 3)
        self.assertEqual(e[2].content, 'at the end')

-    def test_token_generator(self):
-        e = list(token_generator(''))
+    def test_token_or_comment_generator(self):
+        e = list(token_or_comment_generator(''))
        self.assertEqual(len(e), 2)
        self.assertEqual(e[0].prev, None)
        self.assertIsInstance(e[0].curr, yaml.Token)
@@ -71,16 +72,48 @@ class ParserTestCase(unittest.TestCase):
        self.assertEqual(e[1].curr, e[0].next)
        self.assertEqual(e[1].next, None)

-        e = list(token_generator('---\n'
+        e = list(token_or_comment_generator('---\n'
                                            'k: v\n'))
        self.assertEqual(len(e), 9)
        self.assertIsInstance(e[3].curr, yaml.KeyToken)
        self.assertIsInstance(e[5].curr, yaml.ValueToken)

-    def test_token_or_line_generator(self):
-        e = list(token_or_line_generator('---\n'
-                                         'k: v\n'))
-        self.assertEqual(len(e), 12)
+        e = list(token_or_comment_generator('# start comment\n'
+                                            '- a\n'
+                                            '- key: val  # key=val\n'
+                                            '# this is\n'
+                                            '# a block     \n'
+                                            '# comment\n'
+                                            '- c\n'
+                                            '# end comment\n'))
+        self.assertEqual(len(e), 21)
+        self.assertIsInstance(e[1], Comment)
+        self.assertEqual(e[1], Comment(1, 1, '# start comment', 0))
+        self.assertEqual(e[11], Comment(3, 13, '# key=val', 0))
+        self.assertEqual(e[12], Comment(4, 1, '# this is', 0))
+        self.assertEqual(e[13], Comment(5, 1, '# a block     ', 0))
+        self.assertEqual(e[14], Comment(6, 1, '# comment', 0))
+        self.assertEqual(e[18], Comment(8, 1, '# end comment', 0))
+
+        e = list(token_or_comment_generator('---\n'
+                                            '# no newline char'))
+        self.assertEqual(e[2], Comment(2, 1, '# no newline char', 0))
+
+        e = list(token_or_comment_generator('# just comment'))
+        self.assertEqual(e[1], Comment(1, 1, '# just comment', 0))
+
+        e = list(token_or_comment_generator('\n'
+                                            '   # indented comment\n'))
+        self.assertEqual(e[1], Comment(2, 4, '# indented comment', 0))
+
+        e = list(token_or_comment_generator('\n'
+                                            '# trailing spaces    \n'))
+        self.assertEqual(e[1], Comment(2, 1, '# trailing spaces    ', 0))
+
+    def test_token_or_comment_or_line_generator(self):
+        e = list(token_or_comment_or_line_generator('---\n'
+                                                    'k: v  # k=v\n'))
+        self.assertEqual(len(e), 13)
        self.assertIsInstance(e[0], Token)
        self.assertIsInstance(e[0].curr, yaml.StreamStartToken)
        self.assertIsInstance(e[1], Token)
@@ -89,5 +122,6 @@ class ParserTestCase(unittest.TestCase):
        self.assertIsInstance(e[3].curr, yaml.BlockMappingStartToken)
        self.assertIsInstance(e[4].curr, yaml.KeyToken)
        self.assertIsInstance(e[6].curr, yaml.ValueToken)
-        self.assertIsInstance(e[8], Line)
-        self.assertIsInstance(e[11], Line)
+        self.assertIsInstance(e[8], Comment)
+        self.assertIsInstance(e[9], Line)
+        self.assertIsInstance(e[12], Line)
@@ -56,13 +56,14 @@ def get_costemic_problems(buffer, conf):

    # Split token rules from line rules
    token_rules = [r for r in rules if r.TYPE == 'token']
+    comment_rules = [r for r in rules if r.TYPE == 'comment']
    line_rules = [r for r in rules if r.TYPE == 'line']

    context = {}
    for rule in token_rules:
        context[rule.ID] = {}

-    for elem in parser.token_or_line_generator(buffer):
+    for elem in parser.token_or_comment_or_line_generator(buffer):
        if isinstance(elem, parser.Token):
            for rule in token_rules:
                rule_conf = conf.rules[rule.ID]
@@ -73,6 +74,13 @@ def get_costemic_problems(buffer, conf):
                    problem.rule = rule.ID
                    problem.level = rule_conf['level']
                    yield problem
+        elif isinstance(elem, parser.Comment):
+            for rule in comment_rules:
+                rule_conf = conf.rules[rule.ID]
+                for problem in rule.check(rule_conf, elem):
+                    problem.rule = rule.ID
+                    problem.level = rule_conf['level']
+                    yield problem
        elif isinstance(elem, parser.Line):
            for rule in line_rules:
                rule_conf = conf.rules[rule.ID]
@@ -38,6 +38,40 @@ class Token(object):
        self.nextnext = nextnext


+class Comment(object):
+    def __init__(self, line_no, column_no, buffer, pointer,
+                 token_before=None, token_after=None, comment_before=None):
+        self.line_no = line_no
+        self.column_no = column_no
+        self.buffer = buffer
+        self.pointer = pointer
+        self.token_before = token_before
+        self.token_after = token_after
+        self.comment_before = comment_before
+
+    def __repr__(self):
+        end = self.buffer.find('\n', self.pointer)
+        if end == -1:
+            end = self.buffer.find('\0', self.pointer)
+        if end != -1:
+            return self.buffer[self.pointer:end]
+        return self.buffer[self.pointer:]
+
+    def __eq__(self, other):
+        return (isinstance(other, Comment) and
+                self.line_no == other.line_no and
+                self.column_no == other.column_no and
+                str(self) == str(other))
+
+    def is_inline(self):
+        return (
+            not isinstance(self.token_before, yaml.StreamStartToken) and
+            self.line_no == self.token_before.end_mark.line + 1 and
+            # sometimes token end marks are on the next line
+            self.buffer[self.token_before.end_mark.pointer - 1] != '\n'
+        )
+
+
 def line_generator(buffer):
    line_no = 1
    cur = 0
@@ -51,7 +85,39 @@ def line_generator(buffer):
    yield Line(line_no, buffer, start=cur, end=len(buffer))


-def token_generator(buffer):
+def comments_between_tokens(token1, token2):
+    """Find all comments between two tokens"""
+    if token2 is None:
+        buf = token1.end_mark.buffer[token1.end_mark.pointer:]
+    elif (token1.end_mark.line == token2.start_mark.line and
+          not isinstance(token1, yaml.StreamStartToken) and
+          not isinstance(token2, yaml.StreamEndToken)):
+        return
+    else:
+        buf = token1.end_mark.buffer[token1.end_mark.pointer:
+                                     token2.start_mark.pointer]
+
+    line_no = token1.end_mark.line + 1
+    column_no = token1.end_mark.column + 1
+    pointer = token1.end_mark.pointer
+
+    comment_before = None
+    for line in buf.split('\n'):
+        pos = line.find('#')
+        if pos != -1:
+            comment = Comment(line_no, column_no + pos,
+                              token1.end_mark.buffer, pointer + pos,
+                              token1, token2, comment_before)
+            yield comment
+
+            comment_before = comment
+
+        pointer += len(line) + 1
+        line_no += 1
+        column_no = 1
+
+
+def token_or_comment_generator(buffer):
    yaml_loader = yaml.BaseLoader(buffer)

    try:
@@ -63,6 +129,9 @@ def token_generator(buffer):

            yield Token(curr.start_mark.line + 1, curr, prev, next, nextnext)

+            for comment in comments_between_tokens(curr, next):
+                yield comment
+
            prev = curr
            curr = next

@@ -70,19 +139,19 @@ def token_generator(buffer):
        pass


-def token_or_line_generator(buffer):
+def token_or_comment_or_line_generator(buffer):
    """Generator that mixes tokens and lines, ordering them by line number"""
-    token_gen = token_generator(buffer)
+    tok_or_com_gen = token_or_comment_generator(buffer)
    line_gen = line_generator(buffer)

-    token = next(token_gen, None)
+    tok_or_com = next(tok_or_com_gen, None)
    line = next(line_gen, None)

-    while token is not None or line is not None:
-        if token is None or (line is not None and
-                             token.line_no > line.line_no):
+    while tok_or_com is not None or line is not None:
+        if tok_or_com is None or (line is not None and
+                                  tok_or_com.line_no > line.line_no):
            yield line
            line = next(line_gen, None)
        else:
-            yield token
-            token = next(token_gen, None)
+            yield tok_or_com
+            tok_or_com = next(tok_or_com_gen, None)
@@ -55,33 +55,25 @@ Use this rule to control the position and formatting of comments.
 """


-import yaml
-
 from yamllint.linter import LintProblem
-from yamllint.rules.common import get_comments_between_tokens


 ID = 'comments'
-TYPE = 'token'
+TYPE = 'comment'
 CONF = {'require-starting-space': bool,
        'min-spaces-from-content': int}


-def check(conf, token, prev, next, nextnext, context):
-    for comment in get_comments_between_tokens(token, next):
-        if (conf['min-spaces-from-content'] != -1 and
-                not isinstance(token, yaml.StreamStartToken) and
-                comment.line == token.end_mark.line + 1):
-            # Sometimes token end marks are on the next line
-            if token.end_mark.buffer[token.end_mark.pointer - 1] != '\n':
-                if (comment.pointer - token.end_mark.pointer <
+def check(conf, comment):
+    if (conf['min-spaces-from-content'] != -1 and comment.is_inline() and
+            comment.pointer - comment.token_before.end_mark.pointer <
            conf['min-spaces-from-content']):
-                    yield LintProblem(comment.line, comment.column,
+        yield LintProblem(comment.line_no, comment.column_no,
                          'too few spaces before comment')

    if (conf['require-starting-space'] and
            comment.pointer + 1 < len(comment.buffer) and
            comment.buffer[comment.pointer + 1] != ' ' and
            comment.buffer[comment.pointer + 1] != '\n'):
-            yield LintProblem(comment.line, comment.column + 1,
+        yield LintProblem(comment.line_no, comment.column_no + 1,
                          'missing starting space in comment')
@@ -78,11 +78,11 @@ Use this rule to force comments to be indented like content.
 import yaml

 from yamllint.linter import LintProblem
-from yamllint.rules.common import get_line_indent, get_comments_between_tokens
+from yamllint.rules.common import get_line_indent


 ID = 'comments-indentation'
-TYPE = 'token'
+TYPE = 'comment'


 # Case A:
@@ -98,28 +98,42 @@ TYPE = 'token'
 #     # commented line 2
 #     current: line

-def check(conf, token, prev, next, nextnext, context):
-    if prev is None:
+def check(conf, comment):
+    # Only check block comments
+    if (not isinstance(comment.token_before, yaml.StreamStartToken) and
+            comment.token_before.end_mark.line + 1 == comment.line_no):
        return

-    curr_line_indent = token.start_mark.column
-    if isinstance(token, yaml.StreamEndToken):
-        curr_line_indent = 0
+    next_line_indent = comment.token_after.start_mark.column
+    if isinstance(comment.token_after, yaml.StreamEndToken):
+        next_line_indent = 0

-    skip_first_line = True
-    if isinstance(prev, yaml.StreamStartToken):
-        skip_first_line = False
+    if isinstance(comment.token_before, yaml.StreamStartToken):
        prev_line_indent = 0
    else:
-        prev_line_indent = get_line_indent(prev)
+        prev_line_indent = get_line_indent(comment.token_before)

-    if prev_line_indent <= curr_line_indent:
-        prev_line_indent = -1  # disable it
+    # In the following case only the next line indent is valid:
+    #     list:
+    #         # comment
+    #         - 1
+    #         - 2
+    if prev_line_indent <= next_line_indent:
+        prev_line_indent = next_line_indent

-    for comment in get_comments_between_tokens(
-            prev, token, skip_first_line=skip_first_line):
-        if comment.column - 1 == curr_line_indent:
-            prev_line_indent = -1  # disable it
-        elif comment.column - 1 != prev_line_indent:
-            yield LintProblem(comment.line, comment.column,
+    # If two indents are valid but a previous comment went back to normal
+    # indent, for the next ones to do the same. In other words, avoid this:
+    #     list:
+    #         - 1
+    #     # comment on valid indent (0)
+    #         # comment on valid indent (4)
+    #     other-list:
+    #         - 2
+    if (comment.comment_before is not None and
+            not comment.comment_before.is_inline()):
+        prev_line_indent = comment.comment_before.column_no - 1
+
+    if (comment.column_no - 1 != prev_line_indent and
+            comment.column_no - 1 != next_line_indent):
+        yield LintProblem(comment.line_no, comment.column_no,
                          'comment not indented like content')
@@ -98,35 +98,6 @@ def get_real_end_line(token):
    return end_line


-def get_comments_between_tokens(token1, token2, skip_first_line=False):
-    if token2 is None:
-        buf = token1.end_mark.buffer[token1.end_mark.pointer:]
-    elif (token1.end_mark.line == token2.start_mark.line and
-          not isinstance(token1, yaml.StreamStartToken) and
-          not isinstance(token2, yaml.StreamEndToken)):
-        return
-    else:
-        buf = token1.end_mark.buffer[token1.end_mark.pointer:
-                                     token2.start_mark.pointer]
-
-    line_no = token1.end_mark.line + 1
-    column_no = token1.end_mark.column + 1
-    pointer = token1.end_mark.pointer
-
-    for line in buf.split('\n'):
-        if skip_first_line:
-            skip_first_line = False
-        else:
-            pos = line.find('#')
-            if pos != -1:
-                yield Comment(line_no, column_no + pos,
-                              token1.end_mark.buffer, pointer + pos)
-
-        pointer += len(line) + 1
-        line_no += 1
-        column_no = 1
-
-
 def is_explicit_key(token):
    # explicit key:
    #   ? key