parser: Iterate over lines + tokens + comments

Instead of iterating over lines and tokens (and find comments between tokens in the comment rules), add a new `Comment` type and set rules with `type = 'comment'`.
2016-06-25 13:54:42 +02:00
parent 9f99f25db5
commit 7a7d98c96a
9 changed files with 197 additions and 148 deletions
@@ -56,13 +56,14 @@ def get_costemic_problems(buffer, conf):

    # Split token rules from line rules
    token_rules = [r for r in rules if r.TYPE == 'token']
+    comment_rules = [r for r in rules if r.TYPE == 'comment']
    line_rules = [r for r in rules if r.TYPE == 'line']

    context = {}
    for rule in token_rules:
        context[rule.ID] = {}

-    for elem in parser.token_or_line_generator(buffer):
+    for elem in parser.token_or_comment_or_line_generator(buffer):
        if isinstance(elem, parser.Token):
            for rule in token_rules:
                rule_conf = conf.rules[rule.ID]
@@ -73,6 +74,13 @@ def get_costemic_problems(buffer, conf):
                    problem.rule = rule.ID
                    problem.level = rule_conf['level']
                    yield problem
+        elif isinstance(elem, parser.Comment):
+            for rule in comment_rules:
+                rule_conf = conf.rules[rule.ID]
+                for problem in rule.check(rule_conf, elem):
+                    problem.rule = rule.ID
+                    problem.level = rule_conf['level']
+                    yield problem
        elif isinstance(elem, parser.Line):
            for rule in line_rules:
                rule_conf = conf.rules[rule.ID]
@@ -38,6 +38,40 @@ class Token(object):
        self.nextnext = nextnext


+class Comment(object):
+    def __init__(self, line_no, column_no, buffer, pointer,
+                 token_before=None, token_after=None, comment_before=None):
+        self.line_no = line_no
+        self.column_no = column_no
+        self.buffer = buffer
+        self.pointer = pointer
+        self.token_before = token_before
+        self.token_after = token_after
+        self.comment_before = comment_before
+
+    def __repr__(self):
+        end = self.buffer.find('\n', self.pointer)
+        if end == -1:
+            end = self.buffer.find('\0', self.pointer)
+        if end != -1:
+            return self.buffer[self.pointer:end]
+        return self.buffer[self.pointer:]
+
+    def __eq__(self, other):
+        return (isinstance(other, Comment) and
+                self.line_no == other.line_no and
+                self.column_no == other.column_no and
+                str(self) == str(other))
+
+    def is_inline(self):
+        return (
+            not isinstance(self.token_before, yaml.StreamStartToken) and
+            self.line_no == self.token_before.end_mark.line + 1 and
+            # sometimes token end marks are on the next line
+            self.buffer[self.token_before.end_mark.pointer - 1] != '\n'
+        )
+
+
 def line_generator(buffer):
    line_no = 1
    cur = 0
@@ -51,7 +85,39 @@ def line_generator(buffer):
    yield Line(line_no, buffer, start=cur, end=len(buffer))


-def token_generator(buffer):
+def comments_between_tokens(token1, token2):
+    """Find all comments between two tokens"""
+    if token2 is None:
+        buf = token1.end_mark.buffer[token1.end_mark.pointer:]
+    elif (token1.end_mark.line == token2.start_mark.line and
+          not isinstance(token1, yaml.StreamStartToken) and
+          not isinstance(token2, yaml.StreamEndToken)):
+        return
+    else:
+        buf = token1.end_mark.buffer[token1.end_mark.pointer:
+                                     token2.start_mark.pointer]
+
+    line_no = token1.end_mark.line + 1
+    column_no = token1.end_mark.column + 1
+    pointer = token1.end_mark.pointer
+
+    comment_before = None
+    for line in buf.split('\n'):
+        pos = line.find('#')
+        if pos != -1:
+            comment = Comment(line_no, column_no + pos,
+                              token1.end_mark.buffer, pointer + pos,
+                              token1, token2, comment_before)
+            yield comment
+
+            comment_before = comment
+
+        pointer += len(line) + 1
+        line_no += 1
+        column_no = 1
+
+
+def token_or_comment_generator(buffer):
    yaml_loader = yaml.BaseLoader(buffer)

    try:
@@ -63,6 +129,9 @@ def token_generator(buffer):

            yield Token(curr.start_mark.line + 1, curr, prev, next, nextnext)

+            for comment in comments_between_tokens(curr, next):
+                yield comment
+
            prev = curr
            curr = next

@@ -70,19 +139,19 @@ def token_generator(buffer):
        pass


-def token_or_line_generator(buffer):
+def token_or_comment_or_line_generator(buffer):
    """Generator that mixes tokens and lines, ordering them by line number"""
-    token_gen = token_generator(buffer)
+    tok_or_com_gen = token_or_comment_generator(buffer)
    line_gen = line_generator(buffer)

-    token = next(token_gen, None)
+    tok_or_com = next(tok_or_com_gen, None)
    line = next(line_gen, None)

-    while token is not None or line is not None:
-        if token is None or (line is not None and
-                             token.line_no > line.line_no):
+    while tok_or_com is not None or line is not None:
+        if tok_or_com is None or (line is not None and
+                                  tok_or_com.line_no > line.line_no):
            yield line
            line = next(line_gen, None)
        else:
-            yield token
-            token = next(token_gen, None)
+            yield tok_or_com
+            tok_or_com = next(tok_or_com_gen, None)
@@ -55,33 +55,25 @@ Use this rule to control the position and formatting of comments.
 """


-import yaml
-
 from yamllint.linter import LintProblem
-from yamllint.rules.common import get_comments_between_tokens


 ID = 'comments'
-TYPE = 'token'
+TYPE = 'comment'
 CONF = {'require-starting-space': bool,
        'min-spaces-from-content': int}


-def check(conf, token, prev, next, nextnext, context):
-    for comment in get_comments_between_tokens(token, next):
-        if (conf['min-spaces-from-content'] != -1 and
-                not isinstance(token, yaml.StreamStartToken) and
-                comment.line == token.end_mark.line + 1):
-            # Sometimes token end marks are on the next line
-            if token.end_mark.buffer[token.end_mark.pointer - 1] != '\n':
-                if (comment.pointer - token.end_mark.pointer <
-                        conf['min-spaces-from-content']):
-                    yield LintProblem(comment.line, comment.column,
-                                      'too few spaces before comment')
+def check(conf, comment):
+    if (conf['min-spaces-from-content'] != -1 and comment.is_inline() and
+            comment.pointer - comment.token_before.end_mark.pointer <
+            conf['min-spaces-from-content']):
+        yield LintProblem(comment.line_no, comment.column_no,
+                          'too few spaces before comment')

-        if (conf['require-starting-space'] and
-                comment.pointer + 1 < len(comment.buffer) and
-                comment.buffer[comment.pointer + 1] != ' ' and
-                comment.buffer[comment.pointer + 1] != '\n'):
-            yield LintProblem(comment.line, comment.column + 1,
-                              'missing starting space in comment')
+    if (conf['require-starting-space'] and
+            comment.pointer + 1 < len(comment.buffer) and
+            comment.buffer[comment.pointer + 1] != ' ' and
+            comment.buffer[comment.pointer + 1] != '\n'):
+        yield LintProblem(comment.line_no, comment.column_no + 1,
+                          'missing starting space in comment')
@@ -78,11 +78,11 @@ Use this rule to force comments to be indented like content.
 import yaml

 from yamllint.linter import LintProblem
-from yamllint.rules.common import get_line_indent, get_comments_between_tokens
+from yamllint.rules.common import get_line_indent


 ID = 'comments-indentation'
-TYPE = 'token'
+TYPE = 'comment'


 # Case A:
@@ -98,28 +98,42 @@ TYPE = 'token'
 #     # commented line 2
 #     current: line

-def check(conf, token, prev, next, nextnext, context):
-    if prev is None:
+def check(conf, comment):
+    # Only check block comments
+    if (not isinstance(comment.token_before, yaml.StreamStartToken) and
+            comment.token_before.end_mark.line + 1 == comment.line_no):
        return

-    curr_line_indent = token.start_mark.column
-    if isinstance(token, yaml.StreamEndToken):
-        curr_line_indent = 0
+    next_line_indent = comment.token_after.start_mark.column
+    if isinstance(comment.token_after, yaml.StreamEndToken):
+        next_line_indent = 0

-    skip_first_line = True
-    if isinstance(prev, yaml.StreamStartToken):
-        skip_first_line = False
+    if isinstance(comment.token_before, yaml.StreamStartToken):
        prev_line_indent = 0
    else:
-        prev_line_indent = get_line_indent(prev)
+        prev_line_indent = get_line_indent(comment.token_before)

-    if prev_line_indent <= curr_line_indent:
-        prev_line_indent = -1  # disable it
+    # In the following case only the next line indent is valid:
+    #     list:
+    #         # comment
+    #         - 1
+    #         - 2
+    if prev_line_indent <= next_line_indent:
+        prev_line_indent = next_line_indent

-    for comment in get_comments_between_tokens(
-            prev, token, skip_first_line=skip_first_line):
-        if comment.column - 1 == curr_line_indent:
-            prev_line_indent = -1  # disable it
-        elif comment.column - 1 != prev_line_indent:
-            yield LintProblem(comment.line, comment.column,
-                              'comment not indented like content')
+    # If two indents are valid but a previous comment went back to normal
+    # indent, for the next ones to do the same. In other words, avoid this:
+    #     list:
+    #         - 1
+    #     # comment on valid indent (0)
+    #         # comment on valid indent (4)
+    #     other-list:
+    #         - 2
+    if (comment.comment_before is not None and
+            not comment.comment_before.is_inline()):
+        prev_line_indent = comment.comment_before.column_no - 1
+
+    if (comment.column_no - 1 != prev_line_indent and
+            comment.column_no - 1 != next_line_indent):
+        yield LintProblem(comment.line_no, comment.column_no,
+                          'comment not indented like content')
@@ -98,35 +98,6 @@ def get_real_end_line(token):
    return end_line


-def get_comments_between_tokens(token1, token2, skip_first_line=False):
-    if token2 is None:
-        buf = token1.end_mark.buffer[token1.end_mark.pointer:]
-    elif (token1.end_mark.line == token2.start_mark.line and
-          not isinstance(token1, yaml.StreamStartToken) and
-          not isinstance(token2, yaml.StreamEndToken)):
-        return
-    else:
-        buf = token1.end_mark.buffer[token1.end_mark.pointer:
-                                     token2.start_mark.pointer]
-
-    line_no = token1.end_mark.line + 1
-    column_no = token1.end_mark.column + 1
-    pointer = token1.end_mark.pointer
-
-    for line in buf.split('\n'):
-        if skip_first_line:
-            skip_first_line = False
-        else:
-            pos = line.find('#')
-            if pos != -1:
-                yield Comment(line_no, column_no + pos,
-                              token1.end_mark.buffer, pointer + pos)
-
-        pointer += len(line) + 1
-        line_no += 1
-        column_no = 1
-
-
 def is_explicit_key(token):
    # explicit key:
    #   ? key