parser: Iterate over lines + tokens + comments
Instead of iterating over lines and tokens (and find comments between tokens in the comment rules), add a new `Comment` type and set rules with `type = 'comment'`.
This commit is contained in:
@@ -58,7 +58,7 @@ class CommentsIndentationTestCase(RuleTestCase):
|
||||
'# line 2\n', conf, problem=(2, 2))
|
||||
self.check('---\n'
|
||||
' # line 1\n'
|
||||
' # line 2\n', conf, problem1=(2, 3), problem2=(3, 3))
|
||||
' # line 2\n', conf, problem1=(2, 3))
|
||||
self.check('---\n'
|
||||
'obj:\n'
|
||||
' # normal\n'
|
||||
@@ -143,3 +143,15 @@ class CommentsIndentationTestCase(RuleTestCase):
|
||||
'# hey\n'
|
||||
'# normal\n'
|
||||
' #\n', conf, problem=(4, 2))
|
||||
|
||||
def test_inline_comment(self):
|
||||
conf = 'comments-indentation: enable'
|
||||
self.check('---\n'
|
||||
'- a # inline\n'
|
||||
'# ok\n', conf)
|
||||
self.check('---\n'
|
||||
'- a # inline\n'
|
||||
' # not ok\n', conf, problem=(3, 2))
|
||||
self.check('---\n'
|
||||
' # not ok\n'
|
||||
'- a # inline\n', conf, problem=(2, 2))
|
||||
|
||||
@@ -18,8 +18,7 @@ import unittest
|
||||
|
||||
import yaml
|
||||
|
||||
from yamllint.rules.common import (Comment, get_line_indent,
|
||||
get_comments_between_tokens)
|
||||
from yamllint.rules.common import get_line_indent
|
||||
|
||||
|
||||
class CommonTestCase(unittest.TestCase):
|
||||
@@ -43,54 +42,3 @@ class CommonTestCase(unittest.TestCase):
|
||||
self.assertEqual(get_line_indent(tokens[i]), 0)
|
||||
for i in (13, 16, 18, 22, 24):
|
||||
self.assertEqual(get_line_indent(tokens[i]), 2)
|
||||
|
||||
def check_comments(self, buffer, *expected):
|
||||
yaml_loader = yaml.BaseLoader(buffer)
|
||||
|
||||
comments = []
|
||||
|
||||
next = yaml_loader.peek_token()
|
||||
while next is not None:
|
||||
curr = yaml_loader.get_token()
|
||||
next = yaml_loader.peek_token()
|
||||
for comment in get_comments_between_tokens(curr, next):
|
||||
comments.append(comment)
|
||||
|
||||
self.assertEqual(comments, list(expected))
|
||||
|
||||
def test_get_comments_between_tokens(self):
|
||||
self.check_comments('# comment\n',
|
||||
Comment(1, 1, '# comment', 0))
|
||||
self.check_comments('---\n'
|
||||
'# comment\n'
|
||||
'...\n',
|
||||
Comment(2, 1, '# comment', 0))
|
||||
self.check_comments('---\n'
|
||||
'# no newline char',
|
||||
Comment(2, 1, '# no newline char', 0))
|
||||
self.check_comments('# just comment',
|
||||
Comment(1, 1, '# just comment', 0))
|
||||
self.check_comments('\n'
|
||||
' # indented comment\n',
|
||||
Comment(2, 4, '# indented comment', 0))
|
||||
self.check_comments('\n'
|
||||
'# trailing spaces \n',
|
||||
Comment(2, 1, '# trailing spaces ', 0))
|
||||
self.check_comments('# comment one\n'
|
||||
'\n'
|
||||
'key: val # key=val\n'
|
||||
'\n'
|
||||
'# this is\n'
|
||||
'# a block \n'
|
||||
'# comment\n'
|
||||
'\n'
|
||||
'other:\n'
|
||||
' - foo # equals\n'
|
||||
' # bar\n',
|
||||
Comment(1, 1, '# comment one', 0),
|
||||
Comment(3, 11, '# key=val', 0),
|
||||
Comment(5, 1, '# this is', 0),
|
||||
Comment(6, 1, '# a block ', 0),
|
||||
Comment(7, 1, '# comment', 0),
|
||||
Comment(10, 10, '# equals', 0),
|
||||
Comment(11, 10, '# bar', 0))
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from tests.common import RuleTestCase
|
||||
from yamllint.parser import token_generator
|
||||
from yamllint.parser import token_or_comment_generator, Comment
|
||||
from yamllint.rules.indentation import check
|
||||
|
||||
|
||||
@@ -38,7 +38,8 @@ class IndentationStackTestCase(RuleTestCase):
|
||||
'check-multi-line-strings': False}
|
||||
context = {}
|
||||
output = ''
|
||||
for elem in token_generator(source):
|
||||
for elem in [t for t in token_or_comment_generator(source)
|
||||
if not isinstance(t, Comment)]:
|
||||
list(check(conf, elem.curr, elem.prev, elem.next, elem.nextnext,
|
||||
context))
|
||||
|
||||
|
||||
@@ -18,8 +18,9 @@ import unittest
|
||||
|
||||
import yaml
|
||||
|
||||
from yamllint.parser import (line_generator, token_generator,
|
||||
token_or_line_generator, Line, Token)
|
||||
from yamllint.parser import (line_generator, token_or_comment_generator,
|
||||
token_or_comment_or_line_generator,
|
||||
Line, Token, Comment)
|
||||
|
||||
|
||||
class ParserTestCase(unittest.TestCase):
|
||||
@@ -61,8 +62,8 @@ class ParserTestCase(unittest.TestCase):
|
||||
self.assertEqual(e[2].line_no, 3)
|
||||
self.assertEqual(e[2].content, 'at the end')
|
||||
|
||||
def test_token_generator(self):
|
||||
e = list(token_generator(''))
|
||||
def test_token_or_comment_generator(self):
|
||||
e = list(token_or_comment_generator(''))
|
||||
self.assertEqual(len(e), 2)
|
||||
self.assertEqual(e[0].prev, None)
|
||||
self.assertIsInstance(e[0].curr, yaml.Token)
|
||||
@@ -71,16 +72,48 @@ class ParserTestCase(unittest.TestCase):
|
||||
self.assertEqual(e[1].curr, e[0].next)
|
||||
self.assertEqual(e[1].next, None)
|
||||
|
||||
e = list(token_generator('---\n'
|
||||
e = list(token_or_comment_generator('---\n'
|
||||
'k: v\n'))
|
||||
self.assertEqual(len(e), 9)
|
||||
self.assertIsInstance(e[3].curr, yaml.KeyToken)
|
||||
self.assertIsInstance(e[5].curr, yaml.ValueToken)
|
||||
|
||||
def test_token_or_line_generator(self):
|
||||
e = list(token_or_line_generator('---\n'
|
||||
'k: v\n'))
|
||||
self.assertEqual(len(e), 12)
|
||||
e = list(token_or_comment_generator('# start comment\n'
|
||||
'- a\n'
|
||||
'- key: val # key=val\n'
|
||||
'# this is\n'
|
||||
'# a block \n'
|
||||
'# comment\n'
|
||||
'- c\n'
|
||||
'# end comment\n'))
|
||||
self.assertEqual(len(e), 21)
|
||||
self.assertIsInstance(e[1], Comment)
|
||||
self.assertEqual(e[1], Comment(1, 1, '# start comment', 0))
|
||||
self.assertEqual(e[11], Comment(3, 13, '# key=val', 0))
|
||||
self.assertEqual(e[12], Comment(4, 1, '# this is', 0))
|
||||
self.assertEqual(e[13], Comment(5, 1, '# a block ', 0))
|
||||
self.assertEqual(e[14], Comment(6, 1, '# comment', 0))
|
||||
self.assertEqual(e[18], Comment(8, 1, '# end comment', 0))
|
||||
|
||||
e = list(token_or_comment_generator('---\n'
|
||||
'# no newline char'))
|
||||
self.assertEqual(e[2], Comment(2, 1, '# no newline char', 0))
|
||||
|
||||
e = list(token_or_comment_generator('# just comment'))
|
||||
self.assertEqual(e[1], Comment(1, 1, '# just comment', 0))
|
||||
|
||||
e = list(token_or_comment_generator('\n'
|
||||
' # indented comment\n'))
|
||||
self.assertEqual(e[1], Comment(2, 4, '# indented comment', 0))
|
||||
|
||||
e = list(token_or_comment_generator('\n'
|
||||
'# trailing spaces \n'))
|
||||
self.assertEqual(e[1], Comment(2, 1, '# trailing spaces ', 0))
|
||||
|
||||
def test_token_or_comment_or_line_generator(self):
|
||||
e = list(token_or_comment_or_line_generator('---\n'
|
||||
'k: v # k=v\n'))
|
||||
self.assertEqual(len(e), 13)
|
||||
self.assertIsInstance(e[0], Token)
|
||||
self.assertIsInstance(e[0].curr, yaml.StreamStartToken)
|
||||
self.assertIsInstance(e[1], Token)
|
||||
@@ -89,5 +122,6 @@ class ParserTestCase(unittest.TestCase):
|
||||
self.assertIsInstance(e[3].curr, yaml.BlockMappingStartToken)
|
||||
self.assertIsInstance(e[4].curr, yaml.KeyToken)
|
||||
self.assertIsInstance(e[6].curr, yaml.ValueToken)
|
||||
self.assertIsInstance(e[8], Line)
|
||||
self.assertIsInstance(e[11], Line)
|
||||
self.assertIsInstance(e[8], Comment)
|
||||
self.assertIsInstance(e[9], Line)
|
||||
self.assertIsInstance(e[12], Line)
|
||||
|
||||
@@ -56,13 +56,14 @@ def get_costemic_problems(buffer, conf):
|
||||
|
||||
# Split token rules from line rules
|
||||
token_rules = [r for r in rules if r.TYPE == 'token']
|
||||
comment_rules = [r for r in rules if r.TYPE == 'comment']
|
||||
line_rules = [r for r in rules if r.TYPE == 'line']
|
||||
|
||||
context = {}
|
||||
for rule in token_rules:
|
||||
context[rule.ID] = {}
|
||||
|
||||
for elem in parser.token_or_line_generator(buffer):
|
||||
for elem in parser.token_or_comment_or_line_generator(buffer):
|
||||
if isinstance(elem, parser.Token):
|
||||
for rule in token_rules:
|
||||
rule_conf = conf.rules[rule.ID]
|
||||
@@ -73,6 +74,13 @@ def get_costemic_problems(buffer, conf):
|
||||
problem.rule = rule.ID
|
||||
problem.level = rule_conf['level']
|
||||
yield problem
|
||||
elif isinstance(elem, parser.Comment):
|
||||
for rule in comment_rules:
|
||||
rule_conf = conf.rules[rule.ID]
|
||||
for problem in rule.check(rule_conf, elem):
|
||||
problem.rule = rule.ID
|
||||
problem.level = rule_conf['level']
|
||||
yield problem
|
||||
elif isinstance(elem, parser.Line):
|
||||
for rule in line_rules:
|
||||
rule_conf = conf.rules[rule.ID]
|
||||
|
||||
@@ -38,6 +38,40 @@ class Token(object):
|
||||
self.nextnext = nextnext
|
||||
|
||||
|
||||
class Comment(object):
|
||||
def __init__(self, line_no, column_no, buffer, pointer,
|
||||
token_before=None, token_after=None, comment_before=None):
|
||||
self.line_no = line_no
|
||||
self.column_no = column_no
|
||||
self.buffer = buffer
|
||||
self.pointer = pointer
|
||||
self.token_before = token_before
|
||||
self.token_after = token_after
|
||||
self.comment_before = comment_before
|
||||
|
||||
def __repr__(self):
|
||||
end = self.buffer.find('\n', self.pointer)
|
||||
if end == -1:
|
||||
end = self.buffer.find('\0', self.pointer)
|
||||
if end != -1:
|
||||
return self.buffer[self.pointer:end]
|
||||
return self.buffer[self.pointer:]
|
||||
|
||||
def __eq__(self, other):
|
||||
return (isinstance(other, Comment) and
|
||||
self.line_no == other.line_no and
|
||||
self.column_no == other.column_no and
|
||||
str(self) == str(other))
|
||||
|
||||
def is_inline(self):
|
||||
return (
|
||||
not isinstance(self.token_before, yaml.StreamStartToken) and
|
||||
self.line_no == self.token_before.end_mark.line + 1 and
|
||||
# sometimes token end marks are on the next line
|
||||
self.buffer[self.token_before.end_mark.pointer - 1] != '\n'
|
||||
)
|
||||
|
||||
|
||||
def line_generator(buffer):
|
||||
line_no = 1
|
||||
cur = 0
|
||||
@@ -51,7 +85,39 @@ def line_generator(buffer):
|
||||
yield Line(line_no, buffer, start=cur, end=len(buffer))
|
||||
|
||||
|
||||
def token_generator(buffer):
|
||||
def comments_between_tokens(token1, token2):
|
||||
"""Find all comments between two tokens"""
|
||||
if token2 is None:
|
||||
buf = token1.end_mark.buffer[token1.end_mark.pointer:]
|
||||
elif (token1.end_mark.line == token2.start_mark.line and
|
||||
not isinstance(token1, yaml.StreamStartToken) and
|
||||
not isinstance(token2, yaml.StreamEndToken)):
|
||||
return
|
||||
else:
|
||||
buf = token1.end_mark.buffer[token1.end_mark.pointer:
|
||||
token2.start_mark.pointer]
|
||||
|
||||
line_no = token1.end_mark.line + 1
|
||||
column_no = token1.end_mark.column + 1
|
||||
pointer = token1.end_mark.pointer
|
||||
|
||||
comment_before = None
|
||||
for line in buf.split('\n'):
|
||||
pos = line.find('#')
|
||||
if pos != -1:
|
||||
comment = Comment(line_no, column_no + pos,
|
||||
token1.end_mark.buffer, pointer + pos,
|
||||
token1, token2, comment_before)
|
||||
yield comment
|
||||
|
||||
comment_before = comment
|
||||
|
||||
pointer += len(line) + 1
|
||||
line_no += 1
|
||||
column_no = 1
|
||||
|
||||
|
||||
def token_or_comment_generator(buffer):
|
||||
yaml_loader = yaml.BaseLoader(buffer)
|
||||
|
||||
try:
|
||||
@@ -63,6 +129,9 @@ def token_generator(buffer):
|
||||
|
||||
yield Token(curr.start_mark.line + 1, curr, prev, next, nextnext)
|
||||
|
||||
for comment in comments_between_tokens(curr, next):
|
||||
yield comment
|
||||
|
||||
prev = curr
|
||||
curr = next
|
||||
|
||||
@@ -70,19 +139,19 @@ def token_generator(buffer):
|
||||
pass
|
||||
|
||||
|
||||
def token_or_line_generator(buffer):
|
||||
def token_or_comment_or_line_generator(buffer):
|
||||
"""Generator that mixes tokens and lines, ordering them by line number"""
|
||||
token_gen = token_generator(buffer)
|
||||
tok_or_com_gen = token_or_comment_generator(buffer)
|
||||
line_gen = line_generator(buffer)
|
||||
|
||||
token = next(token_gen, None)
|
||||
tok_or_com = next(tok_or_com_gen, None)
|
||||
line = next(line_gen, None)
|
||||
|
||||
while token is not None or line is not None:
|
||||
if token is None or (line is not None and
|
||||
token.line_no > line.line_no):
|
||||
while tok_or_com is not None or line is not None:
|
||||
if tok_or_com is None or (line is not None and
|
||||
tok_or_com.line_no > line.line_no):
|
||||
yield line
|
||||
line = next(line_gen, None)
|
||||
else:
|
||||
yield token
|
||||
token = next(token_gen, None)
|
||||
yield tok_or_com
|
||||
tok_or_com = next(tok_or_com_gen, None)
|
||||
|
||||
@@ -55,33 +55,25 @@ Use this rule to control the position and formatting of comments.
|
||||
"""
|
||||
|
||||
|
||||
import yaml
|
||||
|
||||
from yamllint.linter import LintProblem
|
||||
from yamllint.rules.common import get_comments_between_tokens
|
||||
|
||||
|
||||
ID = 'comments'
|
||||
TYPE = 'token'
|
||||
TYPE = 'comment'
|
||||
CONF = {'require-starting-space': bool,
|
||||
'min-spaces-from-content': int}
|
||||
|
||||
|
||||
def check(conf, token, prev, next, nextnext, context):
|
||||
for comment in get_comments_between_tokens(token, next):
|
||||
if (conf['min-spaces-from-content'] != -1 and
|
||||
not isinstance(token, yaml.StreamStartToken) and
|
||||
comment.line == token.end_mark.line + 1):
|
||||
# Sometimes token end marks are on the next line
|
||||
if token.end_mark.buffer[token.end_mark.pointer - 1] != '\n':
|
||||
if (comment.pointer - token.end_mark.pointer <
|
||||
def check(conf, comment):
|
||||
if (conf['min-spaces-from-content'] != -1 and comment.is_inline() and
|
||||
comment.pointer - comment.token_before.end_mark.pointer <
|
||||
conf['min-spaces-from-content']):
|
||||
yield LintProblem(comment.line, comment.column,
|
||||
yield LintProblem(comment.line_no, comment.column_no,
|
||||
'too few spaces before comment')
|
||||
|
||||
if (conf['require-starting-space'] and
|
||||
comment.pointer + 1 < len(comment.buffer) and
|
||||
comment.buffer[comment.pointer + 1] != ' ' and
|
||||
comment.buffer[comment.pointer + 1] != '\n'):
|
||||
yield LintProblem(comment.line, comment.column + 1,
|
||||
yield LintProblem(comment.line_no, comment.column_no + 1,
|
||||
'missing starting space in comment')
|
||||
|
||||
@@ -78,11 +78,11 @@ Use this rule to force comments to be indented like content.
|
||||
import yaml
|
||||
|
||||
from yamllint.linter import LintProblem
|
||||
from yamllint.rules.common import get_line_indent, get_comments_between_tokens
|
||||
from yamllint.rules.common import get_line_indent
|
||||
|
||||
|
||||
ID = 'comments-indentation'
|
||||
TYPE = 'token'
|
||||
TYPE = 'comment'
|
||||
|
||||
|
||||
# Case A:
|
||||
@@ -98,28 +98,42 @@ TYPE = 'token'
|
||||
# # commented line 2
|
||||
# current: line
|
||||
|
||||
def check(conf, token, prev, next, nextnext, context):
|
||||
if prev is None:
|
||||
def check(conf, comment):
|
||||
# Only check block comments
|
||||
if (not isinstance(comment.token_before, yaml.StreamStartToken) and
|
||||
comment.token_before.end_mark.line + 1 == comment.line_no):
|
||||
return
|
||||
|
||||
curr_line_indent = token.start_mark.column
|
||||
if isinstance(token, yaml.StreamEndToken):
|
||||
curr_line_indent = 0
|
||||
next_line_indent = comment.token_after.start_mark.column
|
||||
if isinstance(comment.token_after, yaml.StreamEndToken):
|
||||
next_line_indent = 0
|
||||
|
||||
skip_first_line = True
|
||||
if isinstance(prev, yaml.StreamStartToken):
|
||||
skip_first_line = False
|
||||
if isinstance(comment.token_before, yaml.StreamStartToken):
|
||||
prev_line_indent = 0
|
||||
else:
|
||||
prev_line_indent = get_line_indent(prev)
|
||||
prev_line_indent = get_line_indent(comment.token_before)
|
||||
|
||||
if prev_line_indent <= curr_line_indent:
|
||||
prev_line_indent = -1 # disable it
|
||||
# In the following case only the next line indent is valid:
|
||||
# list:
|
||||
# # comment
|
||||
# - 1
|
||||
# - 2
|
||||
if prev_line_indent <= next_line_indent:
|
||||
prev_line_indent = next_line_indent
|
||||
|
||||
for comment in get_comments_between_tokens(
|
||||
prev, token, skip_first_line=skip_first_line):
|
||||
if comment.column - 1 == curr_line_indent:
|
||||
prev_line_indent = -1 # disable it
|
||||
elif comment.column - 1 != prev_line_indent:
|
||||
yield LintProblem(comment.line, comment.column,
|
||||
# If two indents are valid but a previous comment went back to normal
|
||||
# indent, for the next ones to do the same. In other words, avoid this:
|
||||
# list:
|
||||
# - 1
|
||||
# # comment on valid indent (0)
|
||||
# # comment on valid indent (4)
|
||||
# other-list:
|
||||
# - 2
|
||||
if (comment.comment_before is not None and
|
||||
not comment.comment_before.is_inline()):
|
||||
prev_line_indent = comment.comment_before.column_no - 1
|
||||
|
||||
if (comment.column_no - 1 != prev_line_indent and
|
||||
comment.column_no - 1 != next_line_indent):
|
||||
yield LintProblem(comment.line_no, comment.column_no,
|
||||
'comment not indented like content')
|
||||
|
||||
@@ -98,35 +98,6 @@ def get_real_end_line(token):
|
||||
return end_line
|
||||
|
||||
|
||||
def get_comments_between_tokens(token1, token2, skip_first_line=False):
|
||||
if token2 is None:
|
||||
buf = token1.end_mark.buffer[token1.end_mark.pointer:]
|
||||
elif (token1.end_mark.line == token2.start_mark.line and
|
||||
not isinstance(token1, yaml.StreamStartToken) and
|
||||
not isinstance(token2, yaml.StreamEndToken)):
|
||||
return
|
||||
else:
|
||||
buf = token1.end_mark.buffer[token1.end_mark.pointer:
|
||||
token2.start_mark.pointer]
|
||||
|
||||
line_no = token1.end_mark.line + 1
|
||||
column_no = token1.end_mark.column + 1
|
||||
pointer = token1.end_mark.pointer
|
||||
|
||||
for line in buf.split('\n'):
|
||||
if skip_first_line:
|
||||
skip_first_line = False
|
||||
else:
|
||||
pos = line.find('#')
|
||||
if pos != -1:
|
||||
yield Comment(line_no, column_no + pos,
|
||||
token1.end_mark.buffer, pointer + pos)
|
||||
|
||||
pointer += len(line) + 1
|
||||
line_no += 1
|
||||
column_no = 1
|
||||
|
||||
|
||||
def is_explicit_key(token):
|
||||
# explicit key:
|
||||
# ? key
|
||||
|
||||
Reference in New Issue
Block a user