parser: Iterate over lines + tokens + comments

Instead of iterating over lines and tokens (and find comments between
tokens in the comment rules), add a new `Comment` type and set rules
with `type = 'comment'`.
pull/12/head
Adrien Vergé 9 years ago
parent 9f99f25db5
commit 7a7d98c96a

@ -58,7 +58,7 @@ class CommentsIndentationTestCase(RuleTestCase):
'# line 2\n', conf, problem=(2, 2))
self.check('---\n'
' # line 1\n'
' # line 2\n', conf, problem1=(2, 3), problem2=(3, 3))
' # line 2\n', conf, problem1=(2, 3))
self.check('---\n'
'obj:\n'
' # normal\n'
@ -143,3 +143,15 @@ class CommentsIndentationTestCase(RuleTestCase):
'# hey\n'
'# normal\n'
' #\n', conf, problem=(4, 2))
def test_inline_comment(self):
conf = 'comments-indentation: enable'
self.check('---\n'
'- a # inline\n'
'# ok\n', conf)
self.check('---\n'
'- a # inline\n'
' # not ok\n', conf, problem=(3, 2))
self.check('---\n'
' # not ok\n'
'- a # inline\n', conf, problem=(2, 2))

@ -18,8 +18,7 @@ import unittest
import yaml
from yamllint.rules.common import (Comment, get_line_indent,
get_comments_between_tokens)
from yamllint.rules.common import get_line_indent
class CommonTestCase(unittest.TestCase):
@ -43,54 +42,3 @@ class CommonTestCase(unittest.TestCase):
self.assertEqual(get_line_indent(tokens[i]), 0)
for i in (13, 16, 18, 22, 24):
self.assertEqual(get_line_indent(tokens[i]), 2)
def check_comments(self, buffer, *expected):
yaml_loader = yaml.BaseLoader(buffer)
comments = []
next = yaml_loader.peek_token()
while next is not None:
curr = yaml_loader.get_token()
next = yaml_loader.peek_token()
for comment in get_comments_between_tokens(curr, next):
comments.append(comment)
self.assertEqual(comments, list(expected))
def test_get_comments_between_tokens(self):
self.check_comments('# comment\n',
Comment(1, 1, '# comment', 0))
self.check_comments('---\n'
'# comment\n'
'...\n',
Comment(2, 1, '# comment', 0))
self.check_comments('---\n'
'# no newline char',
Comment(2, 1, '# no newline char', 0))
self.check_comments('# just comment',
Comment(1, 1, '# just comment', 0))
self.check_comments('\n'
' # indented comment\n',
Comment(2, 4, '# indented comment', 0))
self.check_comments('\n'
'# trailing spaces \n',
Comment(2, 1, '# trailing spaces ', 0))
self.check_comments('# comment one\n'
'\n'
'key: val # key=val\n'
'\n'
'# this is\n'
'# a block \n'
'# comment\n'
'\n'
'other:\n'
' - foo # equals\n'
' # bar\n',
Comment(1, 1, '# comment one', 0),
Comment(3, 11, '# key=val', 0),
Comment(5, 1, '# this is', 0),
Comment(6, 1, '# a block ', 0),
Comment(7, 1, '# comment', 0),
Comment(10, 10, '# equals', 0),
Comment(11, 10, '# bar', 0))

@ -15,7 +15,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from tests.common import RuleTestCase
from yamllint.parser import token_generator
from yamllint.parser import token_or_comment_generator, Comment
from yamllint.rules.indentation import check
@ -38,7 +38,8 @@ class IndentationStackTestCase(RuleTestCase):
'check-multi-line-strings': False}
context = {}
output = ''
for elem in token_generator(source):
for elem in [t for t in token_or_comment_generator(source)
if not isinstance(t, Comment)]:
list(check(conf, elem.curr, elem.prev, elem.next, elem.nextnext,
context))

@ -18,8 +18,9 @@ import unittest
import yaml
from yamllint.parser import (line_generator, token_generator,
token_or_line_generator, Line, Token)
from yamllint.parser import (line_generator, token_or_comment_generator,
token_or_comment_or_line_generator,
Line, Token, Comment)
class ParserTestCase(unittest.TestCase):
@ -61,8 +62,8 @@ class ParserTestCase(unittest.TestCase):
self.assertEqual(e[2].line_no, 3)
self.assertEqual(e[2].content, 'at the end')
def test_token_generator(self):
e = list(token_generator(''))
def test_token_or_comment_generator(self):
e = list(token_or_comment_generator(''))
self.assertEqual(len(e), 2)
self.assertEqual(e[0].prev, None)
self.assertIsInstance(e[0].curr, yaml.Token)
@ -71,16 +72,48 @@ class ParserTestCase(unittest.TestCase):
self.assertEqual(e[1].curr, e[0].next)
self.assertEqual(e[1].next, None)
e = list(token_generator('---\n'
e = list(token_or_comment_generator('---\n'
'k: v\n'))
self.assertEqual(len(e), 9)
self.assertIsInstance(e[3].curr, yaml.KeyToken)
self.assertIsInstance(e[5].curr, yaml.ValueToken)
def test_token_or_line_generator(self):
e = list(token_or_line_generator('---\n'
'k: v\n'))
self.assertEqual(len(e), 12)
e = list(token_or_comment_generator('# start comment\n'
'- a\n'
'- key: val # key=val\n'
'# this is\n'
'# a block \n'
'# comment\n'
'- c\n'
'# end comment\n'))
self.assertEqual(len(e), 21)
self.assertIsInstance(e[1], Comment)
self.assertEqual(e[1], Comment(1, 1, '# start comment', 0))
self.assertEqual(e[11], Comment(3, 13, '# key=val', 0))
self.assertEqual(e[12], Comment(4, 1, '# this is', 0))
self.assertEqual(e[13], Comment(5, 1, '# a block ', 0))
self.assertEqual(e[14], Comment(6, 1, '# comment', 0))
self.assertEqual(e[18], Comment(8, 1, '# end comment', 0))
e = list(token_or_comment_generator('---\n'
'# no newline char'))
self.assertEqual(e[2], Comment(2, 1, '# no newline char', 0))
e = list(token_or_comment_generator('# just comment'))
self.assertEqual(e[1], Comment(1, 1, '# just comment', 0))
e = list(token_or_comment_generator('\n'
' # indented comment\n'))
self.assertEqual(e[1], Comment(2, 4, '# indented comment', 0))
e = list(token_or_comment_generator('\n'
'# trailing spaces \n'))
self.assertEqual(e[1], Comment(2, 1, '# trailing spaces ', 0))
def test_token_or_comment_or_line_generator(self):
e = list(token_or_comment_or_line_generator('---\n'
'k: v # k=v\n'))
self.assertEqual(len(e), 13)
self.assertIsInstance(e[0], Token)
self.assertIsInstance(e[0].curr, yaml.StreamStartToken)
self.assertIsInstance(e[1], Token)
@ -89,5 +122,6 @@ class ParserTestCase(unittest.TestCase):
self.assertIsInstance(e[3].curr, yaml.BlockMappingStartToken)
self.assertIsInstance(e[4].curr, yaml.KeyToken)
self.assertIsInstance(e[6].curr, yaml.ValueToken)
self.assertIsInstance(e[8], Line)
self.assertIsInstance(e[11], Line)
self.assertIsInstance(e[8], Comment)
self.assertIsInstance(e[9], Line)
self.assertIsInstance(e[12], Line)

@ -56,13 +56,14 @@ def get_costemic_problems(buffer, conf):
# Split token rules from line rules
token_rules = [r for r in rules if r.TYPE == 'token']
comment_rules = [r for r in rules if r.TYPE == 'comment']
line_rules = [r for r in rules if r.TYPE == 'line']
context = {}
for rule in token_rules:
context[rule.ID] = {}
for elem in parser.token_or_line_generator(buffer):
for elem in parser.token_or_comment_or_line_generator(buffer):
if isinstance(elem, parser.Token):
for rule in token_rules:
rule_conf = conf.rules[rule.ID]
@ -73,6 +74,13 @@ def get_costemic_problems(buffer, conf):
problem.rule = rule.ID
problem.level = rule_conf['level']
yield problem
elif isinstance(elem, parser.Comment):
for rule in comment_rules:
rule_conf = conf.rules[rule.ID]
for problem in rule.check(rule_conf, elem):
problem.rule = rule.ID
problem.level = rule_conf['level']
yield problem
elif isinstance(elem, parser.Line):
for rule in line_rules:
rule_conf = conf.rules[rule.ID]

@ -38,6 +38,40 @@ class Token(object):
self.nextnext = nextnext
class Comment(object):
def __init__(self, line_no, column_no, buffer, pointer,
token_before=None, token_after=None, comment_before=None):
self.line_no = line_no
self.column_no = column_no
self.buffer = buffer
self.pointer = pointer
self.token_before = token_before
self.token_after = token_after
self.comment_before = comment_before
def __repr__(self):
end = self.buffer.find('\n', self.pointer)
if end == -1:
end = self.buffer.find('\0', self.pointer)
if end != -1:
return self.buffer[self.pointer:end]
return self.buffer[self.pointer:]
def __eq__(self, other):
return (isinstance(other, Comment) and
self.line_no == other.line_no and
self.column_no == other.column_no and
str(self) == str(other))
def is_inline(self):
return (
not isinstance(self.token_before, yaml.StreamStartToken) and
self.line_no == self.token_before.end_mark.line + 1 and
# sometimes token end marks are on the next line
self.buffer[self.token_before.end_mark.pointer - 1] != '\n'
)
def line_generator(buffer):
line_no = 1
cur = 0
@ -51,7 +85,39 @@ def line_generator(buffer):
yield Line(line_no, buffer, start=cur, end=len(buffer))
def token_generator(buffer):
def comments_between_tokens(token1, token2):
"""Find all comments between two tokens"""
if token2 is None:
buf = token1.end_mark.buffer[token1.end_mark.pointer:]
elif (token1.end_mark.line == token2.start_mark.line and
not isinstance(token1, yaml.StreamStartToken) and
not isinstance(token2, yaml.StreamEndToken)):
return
else:
buf = token1.end_mark.buffer[token1.end_mark.pointer:
token2.start_mark.pointer]
line_no = token1.end_mark.line + 1
column_no = token1.end_mark.column + 1
pointer = token1.end_mark.pointer
comment_before = None
for line in buf.split('\n'):
pos = line.find('#')
if pos != -1:
comment = Comment(line_no, column_no + pos,
token1.end_mark.buffer, pointer + pos,
token1, token2, comment_before)
yield comment
comment_before = comment
pointer += len(line) + 1
line_no += 1
column_no = 1
def token_or_comment_generator(buffer):
yaml_loader = yaml.BaseLoader(buffer)
try:
@ -63,6 +129,9 @@ def token_generator(buffer):
yield Token(curr.start_mark.line + 1, curr, prev, next, nextnext)
for comment in comments_between_tokens(curr, next):
yield comment
prev = curr
curr = next
@ -70,19 +139,19 @@ def token_generator(buffer):
pass
def token_or_line_generator(buffer):
def token_or_comment_or_line_generator(buffer):
"""Generator that mixes tokens and lines, ordering them by line number"""
token_gen = token_generator(buffer)
tok_or_com_gen = token_or_comment_generator(buffer)
line_gen = line_generator(buffer)
token = next(token_gen, None)
tok_or_com = next(tok_or_com_gen, None)
line = next(line_gen, None)
while token is not None or line is not None:
if token is None or (line is not None and
token.line_no > line.line_no):
while tok_or_com is not None or line is not None:
if tok_or_com is None or (line is not None and
tok_or_com.line_no > line.line_no):
yield line
line = next(line_gen, None)
else:
yield token
token = next(token_gen, None)
yield tok_or_com
tok_or_com = next(tok_or_com_gen, None)

@ -55,33 +55,25 @@ Use this rule to control the position and formatting of comments.
"""
import yaml
from yamllint.linter import LintProblem
from yamllint.rules.common import get_comments_between_tokens
ID = 'comments'
TYPE = 'token'
TYPE = 'comment'
CONF = {'require-starting-space': bool,
'min-spaces-from-content': int}
def check(conf, token, prev, next, nextnext, context):
for comment in get_comments_between_tokens(token, next):
if (conf['min-spaces-from-content'] != -1 and
not isinstance(token, yaml.StreamStartToken) and
comment.line == token.end_mark.line + 1):
# Sometimes token end marks are on the next line
if token.end_mark.buffer[token.end_mark.pointer - 1] != '\n':
if (comment.pointer - token.end_mark.pointer <
def check(conf, comment):
if (conf['min-spaces-from-content'] != -1 and comment.is_inline() and
comment.pointer - comment.token_before.end_mark.pointer <
conf['min-spaces-from-content']):
yield LintProblem(comment.line, comment.column,
yield LintProblem(comment.line_no, comment.column_no,
'too few spaces before comment')
if (conf['require-starting-space'] and
comment.pointer + 1 < len(comment.buffer) and
comment.buffer[comment.pointer + 1] != ' ' and
comment.buffer[comment.pointer + 1] != '\n'):
yield LintProblem(comment.line, comment.column + 1,
yield LintProblem(comment.line_no, comment.column_no + 1,
'missing starting space in comment')

@ -78,11 +78,11 @@ Use this rule to force comments to be indented like content.
import yaml
from yamllint.linter import LintProblem
from yamllint.rules.common import get_line_indent, get_comments_between_tokens
from yamllint.rules.common import get_line_indent
ID = 'comments-indentation'
TYPE = 'token'
TYPE = 'comment'
# Case A:
@ -98,28 +98,42 @@ TYPE = 'token'
# # commented line 2
# current: line
def check(conf, token, prev, next, nextnext, context):
if prev is None:
def check(conf, comment):
# Only check block comments
if (not isinstance(comment.token_before, yaml.StreamStartToken) and
comment.token_before.end_mark.line + 1 == comment.line_no):
return
curr_line_indent = token.start_mark.column
if isinstance(token, yaml.StreamEndToken):
curr_line_indent = 0
next_line_indent = comment.token_after.start_mark.column
if isinstance(comment.token_after, yaml.StreamEndToken):
next_line_indent = 0
skip_first_line = True
if isinstance(prev, yaml.StreamStartToken):
skip_first_line = False
if isinstance(comment.token_before, yaml.StreamStartToken):
prev_line_indent = 0
else:
prev_line_indent = get_line_indent(prev)
if prev_line_indent <= curr_line_indent:
prev_line_indent = -1 # disable it
for comment in get_comments_between_tokens(
prev, token, skip_first_line=skip_first_line):
if comment.column - 1 == curr_line_indent:
prev_line_indent = -1 # disable it
elif comment.column - 1 != prev_line_indent:
yield LintProblem(comment.line, comment.column,
prev_line_indent = get_line_indent(comment.token_before)
# In the following case only the next line indent is valid:
# list:
# # comment
# - 1
# - 2
if prev_line_indent <= next_line_indent:
prev_line_indent = next_line_indent
# If two indents are valid but a previous comment went back to normal
# indent, for the next ones to do the same. In other words, avoid this:
# list:
# - 1
# # comment on valid indent (0)
# # comment on valid indent (4)
# other-list:
# - 2
if (comment.comment_before is not None and
not comment.comment_before.is_inline()):
prev_line_indent = comment.comment_before.column_no - 1
if (comment.column_no - 1 != prev_line_indent and
comment.column_no - 1 != next_line_indent):
yield LintProblem(comment.line_no, comment.column_no,
'comment not indented like content')

@ -98,35 +98,6 @@ def get_real_end_line(token):
return end_line
def get_comments_between_tokens(token1, token2, skip_first_line=False):
if token2 is None:
buf = token1.end_mark.buffer[token1.end_mark.pointer:]
elif (token1.end_mark.line == token2.start_mark.line and
not isinstance(token1, yaml.StreamStartToken) and
not isinstance(token2, yaml.StreamEndToken)):
return
else:
buf = token1.end_mark.buffer[token1.end_mark.pointer:
token2.start_mark.pointer]
line_no = token1.end_mark.line + 1
column_no = token1.end_mark.column + 1
pointer = token1.end_mark.pointer
for line in buf.split('\n'):
if skip_first_line:
skip_first_line = False
else:
pos = line.find('#')
if pos != -1:
yield Comment(line_no, column_no + pos,
token1.end_mark.buffer, pointer + pos)
pointer += len(line) + 1
line_no += 1
column_no = 1
def is_explicit_key(token):
# explicit key:
# ? key

Loading…
Cancel
Save