parser: Iterate over lines + tokens + comments
Instead of iterating over lines and tokens (and find comments between tokens in the comment rules), add a new `Comment` type and set rules with `type = 'comment'`.
This commit is contained in:
@@ -56,13 +56,14 @@ def get_costemic_problems(buffer, conf):
|
||||
|
||||
# Split token rules from line rules
|
||||
token_rules = [r for r in rules if r.TYPE == 'token']
|
||||
comment_rules = [r for r in rules if r.TYPE == 'comment']
|
||||
line_rules = [r for r in rules if r.TYPE == 'line']
|
||||
|
||||
context = {}
|
||||
for rule in token_rules:
|
||||
context[rule.ID] = {}
|
||||
|
||||
for elem in parser.token_or_line_generator(buffer):
|
||||
for elem in parser.token_or_comment_or_line_generator(buffer):
|
||||
if isinstance(elem, parser.Token):
|
||||
for rule in token_rules:
|
||||
rule_conf = conf.rules[rule.ID]
|
||||
@@ -73,6 +74,13 @@ def get_costemic_problems(buffer, conf):
|
||||
problem.rule = rule.ID
|
||||
problem.level = rule_conf['level']
|
||||
yield problem
|
||||
elif isinstance(elem, parser.Comment):
|
||||
for rule in comment_rules:
|
||||
rule_conf = conf.rules[rule.ID]
|
||||
for problem in rule.check(rule_conf, elem):
|
||||
problem.rule = rule.ID
|
||||
problem.level = rule_conf['level']
|
||||
yield problem
|
||||
elif isinstance(elem, parser.Line):
|
||||
for rule in line_rules:
|
||||
rule_conf = conf.rules[rule.ID]
|
||||
|
||||
@@ -38,6 +38,40 @@ class Token(object):
|
||||
self.nextnext = nextnext
|
||||
|
||||
|
||||
class Comment(object):
|
||||
def __init__(self, line_no, column_no, buffer, pointer,
|
||||
token_before=None, token_after=None, comment_before=None):
|
||||
self.line_no = line_no
|
||||
self.column_no = column_no
|
||||
self.buffer = buffer
|
||||
self.pointer = pointer
|
||||
self.token_before = token_before
|
||||
self.token_after = token_after
|
||||
self.comment_before = comment_before
|
||||
|
||||
def __repr__(self):
|
||||
end = self.buffer.find('\n', self.pointer)
|
||||
if end == -1:
|
||||
end = self.buffer.find('\0', self.pointer)
|
||||
if end != -1:
|
||||
return self.buffer[self.pointer:end]
|
||||
return self.buffer[self.pointer:]
|
||||
|
||||
def __eq__(self, other):
|
||||
return (isinstance(other, Comment) and
|
||||
self.line_no == other.line_no and
|
||||
self.column_no == other.column_no and
|
||||
str(self) == str(other))
|
||||
|
||||
def is_inline(self):
|
||||
return (
|
||||
not isinstance(self.token_before, yaml.StreamStartToken) and
|
||||
self.line_no == self.token_before.end_mark.line + 1 and
|
||||
# sometimes token end marks are on the next line
|
||||
self.buffer[self.token_before.end_mark.pointer - 1] != '\n'
|
||||
)
|
||||
|
||||
|
||||
def line_generator(buffer):
|
||||
line_no = 1
|
||||
cur = 0
|
||||
@@ -51,7 +85,39 @@ def line_generator(buffer):
|
||||
yield Line(line_no, buffer, start=cur, end=len(buffer))
|
||||
|
||||
|
||||
def token_generator(buffer):
|
||||
def comments_between_tokens(token1, token2):
|
||||
"""Find all comments between two tokens"""
|
||||
if token2 is None:
|
||||
buf = token1.end_mark.buffer[token1.end_mark.pointer:]
|
||||
elif (token1.end_mark.line == token2.start_mark.line and
|
||||
not isinstance(token1, yaml.StreamStartToken) and
|
||||
not isinstance(token2, yaml.StreamEndToken)):
|
||||
return
|
||||
else:
|
||||
buf = token1.end_mark.buffer[token1.end_mark.pointer:
|
||||
token2.start_mark.pointer]
|
||||
|
||||
line_no = token1.end_mark.line + 1
|
||||
column_no = token1.end_mark.column + 1
|
||||
pointer = token1.end_mark.pointer
|
||||
|
||||
comment_before = None
|
||||
for line in buf.split('\n'):
|
||||
pos = line.find('#')
|
||||
if pos != -1:
|
||||
comment = Comment(line_no, column_no + pos,
|
||||
token1.end_mark.buffer, pointer + pos,
|
||||
token1, token2, comment_before)
|
||||
yield comment
|
||||
|
||||
comment_before = comment
|
||||
|
||||
pointer += len(line) + 1
|
||||
line_no += 1
|
||||
column_no = 1
|
||||
|
||||
|
||||
def token_or_comment_generator(buffer):
|
||||
yaml_loader = yaml.BaseLoader(buffer)
|
||||
|
||||
try:
|
||||
@@ -63,6 +129,9 @@ def token_generator(buffer):
|
||||
|
||||
yield Token(curr.start_mark.line + 1, curr, prev, next, nextnext)
|
||||
|
||||
for comment in comments_between_tokens(curr, next):
|
||||
yield comment
|
||||
|
||||
prev = curr
|
||||
curr = next
|
||||
|
||||
@@ -70,19 +139,19 @@ def token_generator(buffer):
|
||||
pass
|
||||
|
||||
|
||||
def token_or_line_generator(buffer):
|
||||
def token_or_comment_or_line_generator(buffer):
|
||||
"""Generator that mixes tokens and lines, ordering them by line number"""
|
||||
token_gen = token_generator(buffer)
|
||||
tok_or_com_gen = token_or_comment_generator(buffer)
|
||||
line_gen = line_generator(buffer)
|
||||
|
||||
token = next(token_gen, None)
|
||||
tok_or_com = next(tok_or_com_gen, None)
|
||||
line = next(line_gen, None)
|
||||
|
||||
while token is not None or line is not None:
|
||||
if token is None or (line is not None and
|
||||
token.line_no > line.line_no):
|
||||
while tok_or_com is not None or line is not None:
|
||||
if tok_or_com is None or (line is not None and
|
||||
tok_or_com.line_no > line.line_no):
|
||||
yield line
|
||||
line = next(line_gen, None)
|
||||
else:
|
||||
yield token
|
||||
token = next(token_gen, None)
|
||||
yield tok_or_com
|
||||
tok_or_com = next(tok_or_com_gen, None)
|
||||
|
||||
@@ -55,33 +55,25 @@ Use this rule to control the position and formatting of comments.
|
||||
"""
|
||||
|
||||
|
||||
import yaml
|
||||
|
||||
from yamllint.linter import LintProblem
|
||||
from yamllint.rules.common import get_comments_between_tokens
|
||||
|
||||
|
||||
ID = 'comments'
|
||||
TYPE = 'token'
|
||||
TYPE = 'comment'
|
||||
CONF = {'require-starting-space': bool,
|
||||
'min-spaces-from-content': int}
|
||||
|
||||
|
||||
def check(conf, token, prev, next, nextnext, context):
|
||||
for comment in get_comments_between_tokens(token, next):
|
||||
if (conf['min-spaces-from-content'] != -1 and
|
||||
not isinstance(token, yaml.StreamStartToken) and
|
||||
comment.line == token.end_mark.line + 1):
|
||||
# Sometimes token end marks are on the next line
|
||||
if token.end_mark.buffer[token.end_mark.pointer - 1] != '\n':
|
||||
if (comment.pointer - token.end_mark.pointer <
|
||||
conf['min-spaces-from-content']):
|
||||
yield LintProblem(comment.line, comment.column,
|
||||
'too few spaces before comment')
|
||||
def check(conf, comment):
|
||||
if (conf['min-spaces-from-content'] != -1 and comment.is_inline() and
|
||||
comment.pointer - comment.token_before.end_mark.pointer <
|
||||
conf['min-spaces-from-content']):
|
||||
yield LintProblem(comment.line_no, comment.column_no,
|
||||
'too few spaces before comment')
|
||||
|
||||
if (conf['require-starting-space'] and
|
||||
comment.pointer + 1 < len(comment.buffer) and
|
||||
comment.buffer[comment.pointer + 1] != ' ' and
|
||||
comment.buffer[comment.pointer + 1] != '\n'):
|
||||
yield LintProblem(comment.line, comment.column + 1,
|
||||
'missing starting space in comment')
|
||||
if (conf['require-starting-space'] and
|
||||
comment.pointer + 1 < len(comment.buffer) and
|
||||
comment.buffer[comment.pointer + 1] != ' ' and
|
||||
comment.buffer[comment.pointer + 1] != '\n'):
|
||||
yield LintProblem(comment.line_no, comment.column_no + 1,
|
||||
'missing starting space in comment')
|
||||
|
||||
@@ -78,11 +78,11 @@ Use this rule to force comments to be indented like content.
|
||||
import yaml
|
||||
|
||||
from yamllint.linter import LintProblem
|
||||
from yamllint.rules.common import get_line_indent, get_comments_between_tokens
|
||||
from yamllint.rules.common import get_line_indent
|
||||
|
||||
|
||||
ID = 'comments-indentation'
|
||||
TYPE = 'token'
|
||||
TYPE = 'comment'
|
||||
|
||||
|
||||
# Case A:
|
||||
@@ -98,28 +98,42 @@ TYPE = 'token'
|
||||
# # commented line 2
|
||||
# current: line
|
||||
|
||||
def check(conf, token, prev, next, nextnext, context):
|
||||
if prev is None:
|
||||
def check(conf, comment):
|
||||
# Only check block comments
|
||||
if (not isinstance(comment.token_before, yaml.StreamStartToken) and
|
||||
comment.token_before.end_mark.line + 1 == comment.line_no):
|
||||
return
|
||||
|
||||
curr_line_indent = token.start_mark.column
|
||||
if isinstance(token, yaml.StreamEndToken):
|
||||
curr_line_indent = 0
|
||||
next_line_indent = comment.token_after.start_mark.column
|
||||
if isinstance(comment.token_after, yaml.StreamEndToken):
|
||||
next_line_indent = 0
|
||||
|
||||
skip_first_line = True
|
||||
if isinstance(prev, yaml.StreamStartToken):
|
||||
skip_first_line = False
|
||||
if isinstance(comment.token_before, yaml.StreamStartToken):
|
||||
prev_line_indent = 0
|
||||
else:
|
||||
prev_line_indent = get_line_indent(prev)
|
||||
prev_line_indent = get_line_indent(comment.token_before)
|
||||
|
||||
if prev_line_indent <= curr_line_indent:
|
||||
prev_line_indent = -1 # disable it
|
||||
# In the following case only the next line indent is valid:
|
||||
# list:
|
||||
# # comment
|
||||
# - 1
|
||||
# - 2
|
||||
if prev_line_indent <= next_line_indent:
|
||||
prev_line_indent = next_line_indent
|
||||
|
||||
for comment in get_comments_between_tokens(
|
||||
prev, token, skip_first_line=skip_first_line):
|
||||
if comment.column - 1 == curr_line_indent:
|
||||
prev_line_indent = -1 # disable it
|
||||
elif comment.column - 1 != prev_line_indent:
|
||||
yield LintProblem(comment.line, comment.column,
|
||||
'comment not indented like content')
|
||||
# If two indents are valid but a previous comment went back to normal
|
||||
# indent, for the next ones to do the same. In other words, avoid this:
|
||||
# list:
|
||||
# - 1
|
||||
# # comment on valid indent (0)
|
||||
# # comment on valid indent (4)
|
||||
# other-list:
|
||||
# - 2
|
||||
if (comment.comment_before is not None and
|
||||
not comment.comment_before.is_inline()):
|
||||
prev_line_indent = comment.comment_before.column_no - 1
|
||||
|
||||
if (comment.column_no - 1 != prev_line_indent and
|
||||
comment.column_no - 1 != next_line_indent):
|
||||
yield LintProblem(comment.line_no, comment.column_no,
|
||||
'comment not indented like content')
|
||||
|
||||
@@ -98,35 +98,6 @@ def get_real_end_line(token):
|
||||
return end_line
|
||||
|
||||
|
||||
def get_comments_between_tokens(token1, token2, skip_first_line=False):
|
||||
if token2 is None:
|
||||
buf = token1.end_mark.buffer[token1.end_mark.pointer:]
|
||||
elif (token1.end_mark.line == token2.start_mark.line and
|
||||
not isinstance(token1, yaml.StreamStartToken) and
|
||||
not isinstance(token2, yaml.StreamEndToken)):
|
||||
return
|
||||
else:
|
||||
buf = token1.end_mark.buffer[token1.end_mark.pointer:
|
||||
token2.start_mark.pointer]
|
||||
|
||||
line_no = token1.end_mark.line + 1
|
||||
column_no = token1.end_mark.column + 1
|
||||
pointer = token1.end_mark.pointer
|
||||
|
||||
for line in buf.split('\n'):
|
||||
if skip_first_line:
|
||||
skip_first_line = False
|
||||
else:
|
||||
pos = line.find('#')
|
||||
if pos != -1:
|
||||
yield Comment(line_no, column_no + pos,
|
||||
token1.end_mark.buffer, pointer + pos)
|
||||
|
||||
pointer += len(line) + 1
|
||||
line_no += 1
|
||||
column_no = 1
|
||||
|
||||
|
||||
def is_explicit_key(token):
|
||||
# explicit key:
|
||||
# ? key
|
||||
|
||||
Reference in New Issue
Block a user