parser: Iterate over lines + tokens + comments

Instead of iterating over lines and tokens (and find comments between
tokens in the comment rules), add a new `Comment` type and set rules
with `type = 'comment'`.
This commit is contained in:
Adrien Vergé
2016-06-25 13:54:42 +02:00
parent 9f99f25db5
commit 7a7d98c96a
9 changed files with 197 additions and 148 deletions

View File

@@ -56,13 +56,14 @@ def get_costemic_problems(buffer, conf):
# Split token rules from line rules
token_rules = [r for r in rules if r.TYPE == 'token']
comment_rules = [r for r in rules if r.TYPE == 'comment']
line_rules = [r for r in rules if r.TYPE == 'line']
context = {}
for rule in token_rules:
context[rule.ID] = {}
for elem in parser.token_or_line_generator(buffer):
for elem in parser.token_or_comment_or_line_generator(buffer):
if isinstance(elem, parser.Token):
for rule in token_rules:
rule_conf = conf.rules[rule.ID]
@@ -73,6 +74,13 @@ def get_costemic_problems(buffer, conf):
problem.rule = rule.ID
problem.level = rule_conf['level']
yield problem
elif isinstance(elem, parser.Comment):
for rule in comment_rules:
rule_conf = conf.rules[rule.ID]
for problem in rule.check(rule_conf, elem):
problem.rule = rule.ID
problem.level = rule_conf['level']
yield problem
elif isinstance(elem, parser.Line):
for rule in line_rules:
rule_conf = conf.rules[rule.ID]

View File

@@ -38,6 +38,40 @@ class Token(object):
self.nextnext = nextnext
class Comment(object):
def __init__(self, line_no, column_no, buffer, pointer,
token_before=None, token_after=None, comment_before=None):
self.line_no = line_no
self.column_no = column_no
self.buffer = buffer
self.pointer = pointer
self.token_before = token_before
self.token_after = token_after
self.comment_before = comment_before
def __repr__(self):
end = self.buffer.find('\n', self.pointer)
if end == -1:
end = self.buffer.find('\0', self.pointer)
if end != -1:
return self.buffer[self.pointer:end]
return self.buffer[self.pointer:]
def __eq__(self, other):
return (isinstance(other, Comment) and
self.line_no == other.line_no and
self.column_no == other.column_no and
str(self) == str(other))
def is_inline(self):
return (
not isinstance(self.token_before, yaml.StreamStartToken) and
self.line_no == self.token_before.end_mark.line + 1 and
# sometimes token end marks are on the next line
self.buffer[self.token_before.end_mark.pointer - 1] != '\n'
)
def line_generator(buffer):
line_no = 1
cur = 0
@@ -51,7 +85,39 @@ def line_generator(buffer):
yield Line(line_no, buffer, start=cur, end=len(buffer))
def token_generator(buffer):
def comments_between_tokens(token1, token2):
"""Find all comments between two tokens"""
if token2 is None:
buf = token1.end_mark.buffer[token1.end_mark.pointer:]
elif (token1.end_mark.line == token2.start_mark.line and
not isinstance(token1, yaml.StreamStartToken) and
not isinstance(token2, yaml.StreamEndToken)):
return
else:
buf = token1.end_mark.buffer[token1.end_mark.pointer:
token2.start_mark.pointer]
line_no = token1.end_mark.line + 1
column_no = token1.end_mark.column + 1
pointer = token1.end_mark.pointer
comment_before = None
for line in buf.split('\n'):
pos = line.find('#')
if pos != -1:
comment = Comment(line_no, column_no + pos,
token1.end_mark.buffer, pointer + pos,
token1, token2, comment_before)
yield comment
comment_before = comment
pointer += len(line) + 1
line_no += 1
column_no = 1
def token_or_comment_generator(buffer):
yaml_loader = yaml.BaseLoader(buffer)
try:
@@ -63,6 +129,9 @@ def token_generator(buffer):
yield Token(curr.start_mark.line + 1, curr, prev, next, nextnext)
for comment in comments_between_tokens(curr, next):
yield comment
prev = curr
curr = next
@@ -70,19 +139,19 @@ def token_generator(buffer):
pass
def token_or_line_generator(buffer):
def token_or_comment_or_line_generator(buffer):
"""Generator that mixes tokens and lines, ordering them by line number"""
token_gen = token_generator(buffer)
tok_or_com_gen = token_or_comment_generator(buffer)
line_gen = line_generator(buffer)
token = next(token_gen, None)
tok_or_com = next(tok_or_com_gen, None)
line = next(line_gen, None)
while token is not None or line is not None:
if token is None or (line is not None and
token.line_no > line.line_no):
while tok_or_com is not None or line is not None:
if tok_or_com is None or (line is not None and
tok_or_com.line_no > line.line_no):
yield line
line = next(line_gen, None)
else:
yield token
token = next(token_gen, None)
yield tok_or_com
tok_or_com = next(tok_or_com_gen, None)

View File

@@ -55,33 +55,25 @@ Use this rule to control the position and formatting of comments.
"""
import yaml
from yamllint.linter import LintProblem
from yamllint.rules.common import get_comments_between_tokens
ID = 'comments'
TYPE = 'token'
TYPE = 'comment'
CONF = {'require-starting-space': bool,
'min-spaces-from-content': int}
def check(conf, token, prev, next, nextnext, context):
for comment in get_comments_between_tokens(token, next):
if (conf['min-spaces-from-content'] != -1 and
not isinstance(token, yaml.StreamStartToken) and
comment.line == token.end_mark.line + 1):
# Sometimes token end marks are on the next line
if token.end_mark.buffer[token.end_mark.pointer - 1] != '\n':
if (comment.pointer - token.end_mark.pointer <
conf['min-spaces-from-content']):
yield LintProblem(comment.line, comment.column,
'too few spaces before comment')
def check(conf, comment):
if (conf['min-spaces-from-content'] != -1 and comment.is_inline() and
comment.pointer - comment.token_before.end_mark.pointer <
conf['min-spaces-from-content']):
yield LintProblem(comment.line_no, comment.column_no,
'too few spaces before comment')
if (conf['require-starting-space'] and
comment.pointer + 1 < len(comment.buffer) and
comment.buffer[comment.pointer + 1] != ' ' and
comment.buffer[comment.pointer + 1] != '\n'):
yield LintProblem(comment.line, comment.column + 1,
'missing starting space in comment')
if (conf['require-starting-space'] and
comment.pointer + 1 < len(comment.buffer) and
comment.buffer[comment.pointer + 1] != ' ' and
comment.buffer[comment.pointer + 1] != '\n'):
yield LintProblem(comment.line_no, comment.column_no + 1,
'missing starting space in comment')

View File

@@ -78,11 +78,11 @@ Use this rule to force comments to be indented like content.
import yaml
from yamllint.linter import LintProblem
from yamllint.rules.common import get_line_indent, get_comments_between_tokens
from yamllint.rules.common import get_line_indent
ID = 'comments-indentation'
TYPE = 'token'
TYPE = 'comment'
# Case A:
@@ -98,28 +98,42 @@ TYPE = 'token'
# # commented line 2
# current: line
def check(conf, token, prev, next, nextnext, context):
if prev is None:
def check(conf, comment):
# Only check block comments
if (not isinstance(comment.token_before, yaml.StreamStartToken) and
comment.token_before.end_mark.line + 1 == comment.line_no):
return
curr_line_indent = token.start_mark.column
if isinstance(token, yaml.StreamEndToken):
curr_line_indent = 0
next_line_indent = comment.token_after.start_mark.column
if isinstance(comment.token_after, yaml.StreamEndToken):
next_line_indent = 0
skip_first_line = True
if isinstance(prev, yaml.StreamStartToken):
skip_first_line = False
if isinstance(comment.token_before, yaml.StreamStartToken):
prev_line_indent = 0
else:
prev_line_indent = get_line_indent(prev)
prev_line_indent = get_line_indent(comment.token_before)
if prev_line_indent <= curr_line_indent:
prev_line_indent = -1 # disable it
# In the following case only the next line indent is valid:
# list:
# # comment
# - 1
# - 2
if prev_line_indent <= next_line_indent:
prev_line_indent = next_line_indent
for comment in get_comments_between_tokens(
prev, token, skip_first_line=skip_first_line):
if comment.column - 1 == curr_line_indent:
prev_line_indent = -1 # disable it
elif comment.column - 1 != prev_line_indent:
yield LintProblem(comment.line, comment.column,
'comment not indented like content')
# If two indents are valid but a previous comment went back to normal
# indent, for the next ones to do the same. In other words, avoid this:
# list:
# - 1
# # comment on valid indent (0)
# # comment on valid indent (4)
# other-list:
# - 2
if (comment.comment_before is not None and
not comment.comment_before.is_inline()):
prev_line_indent = comment.comment_before.column_no - 1
if (comment.column_no - 1 != prev_line_indent and
comment.column_no - 1 != next_line_indent):
yield LintProblem(comment.line_no, comment.column_no,
'comment not indented like content')

View File

@@ -98,35 +98,6 @@ def get_real_end_line(token):
return end_line
def get_comments_between_tokens(token1, token2, skip_first_line=False):
if token2 is None:
buf = token1.end_mark.buffer[token1.end_mark.pointer:]
elif (token1.end_mark.line == token2.start_mark.line and
not isinstance(token1, yaml.StreamStartToken) and
not isinstance(token2, yaml.StreamEndToken)):
return
else:
buf = token1.end_mark.buffer[token1.end_mark.pointer:
token2.start_mark.pointer]
line_no = token1.end_mark.line + 1
column_no = token1.end_mark.column + 1
pointer = token1.end_mark.pointer
for line in buf.split('\n'):
if skip_first_line:
skip_first_line = False
else:
pos = line.find('#')
if pos != -1:
yield Comment(line_no, column_no + pos,
token1.end_mark.buffer, pointer + pos)
pointer += len(line) + 1
line_no += 1
column_no = 1
def is_explicit_key(token):
# explicit key:
# ? key