From 39c878c819fee4b636f653d81560a668e5f33044 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrien=20Verg=C3=A9?= <adrienverge@gmail.com>
Date: Mon, 18 Jan 2016 21:36:32 +0100
Subject: [PATCH] Rules: indentation: Rewrite the algorithm (again)

Use a new, better thought algorithm that keeps an history stack with all
the parents indentations.
---
 tests/rules/test_indentation.py  |  66 +++++++++--
 tests/rules/test_syntax_error.py |  18 +++
 yamllint/conf/default.yml        |   4 +-
 yamllint/rules/indentation.py    | 194 +++++++++++++++++++++++++------
 4 files changed, 235 insertions(+), 47 deletions(-)

diff --git a/tests/rules/test_indentation.py b/tests/rules/test_indentation.py
index e71fdc9..8eb7174 100644
--- a/tests/rules/test_indentation.py
+++ b/tests/rules/test_indentation.py
@@ -55,6 +55,11 @@ class IndentationTestCase(RuleTestCase):
                    '  - a\n'
                    '  - b\n'
                    ' k2: v2\n'
+                   ' k3:\n'
+                   '  - name: Unix\n'
+                   '    date: 1969\n'
+                   '  - name: Linux\n'
+                   '    date: 1991\n'
                    '...\n', conf)
 
     def test_two_spaces(self):
@@ -65,6 +70,11 @@ class IndentationTestCase(RuleTestCase):
                    '    - a\n'
                    '    - b\n'
                    '  k2: v2\n'
+                   '  k3:\n'
+                   '    - name: Unix\n'
+                   '      date: 1969\n'
+                   '    - name: Linux\n'
+                   '      date: 1991\n'
                    '...\n', conf)
 
     def test_three_spaces(self):
@@ -75,6 +85,11 @@ class IndentationTestCase(RuleTestCase):
                    '      - a\n'
                    '      - b\n'
                    '   k2: v2\n'
+                   '   k3:\n'
+                   '      - name: Unix\n'
+                   '        date: 1969\n'
+                   '      - name: Linux\n'
+                   '        date: 1991\n'
                    '...\n', conf)
 
     def test_under_indented(self):
@@ -88,6 +103,12 @@ class IndentationTestCase(RuleTestCase):
                    '  k1:\n'
                    '   - a\n'
                    '...\n', conf, problem=(4, 4))
+        self.check('---\n'
+                   'object:\n'
+                   '  k3:\n'
+                   '    - name: Unix\n'
+                   '     date: 1969\n'
+                   '...\n', conf, problem=(5, 6, 'syntax'))
         conf = 'indentation: {spaces: 4}'
         self.check('---\n'
                    'object:\n'
@@ -98,6 +119,12 @@ class IndentationTestCase(RuleTestCase):
                    '- el2:\n'
                    '   - subel\n'
                    '...\n', conf, problem=(4, 4))
+        self.check('---\n'
+                   'object:\n'
+                   '    k3:\n'
+                   '        - name: Linux\n'
+                   '         date: 1991\n'
+                   '...\n', conf, problem=(5, 10, 'syntax'))
 
     def test_over_indented(self):
         conf = 'indentation: {spaces: 2}'
@@ -110,6 +137,12 @@ class IndentationTestCase(RuleTestCase):
                    '  k1:\n'
                    '     - a\n'
                    '...\n', conf, problem=(4, 6))
+        self.check('---\n'
+                   'object:\n'
+                   '  k3:\n'
+                   '    - name: Unix\n'
+                   '       date: 1969\n'
+                   '...\n', conf, problem=(5, 12, 'syntax'))
         conf = 'indentation: {spaces: 4}'
         self.check('---\n'
                    'object:\n'
@@ -118,7 +151,7 @@ class IndentationTestCase(RuleTestCase):
         self.check('---\n'
                    ' object:\n'
                    '     val: 1\n'
-                   '...\n', conf, problem1=(2, 2), problem2=(3, 6))
+                   '...\n', conf, problem=(2, 2))
         self.check('---\n'
                    '- el1\n'
                    '- el2:\n'
@@ -134,7 +167,13 @@ class IndentationTestCase(RuleTestCase):
                    '  - el2:\n'
                    '    - subel\n'
                    '...\n', conf,
-                   problem1=(2, 3), problem2=(3, 3), problem3=(4, 5))
+                   problem=(2, 3))
+        self.check('---\n'
+                   'object:\n'
+                   '    k3:\n'
+                   '        - name: Linux\n'
+                   '           date: 1991\n'
+                   '...\n', conf, problem=(5, 16, 'syntax'))
 
     def test_multi_lines(self):
         self.check('---\n'
@@ -154,6 +193,19 @@ class IndentationTestCase(RuleTestCase):
                    '      blah bla bla\n'
                    '...\n', None)
 
+    def test_empty_value(self):
+        conf = 'indentation: {spaces: 2}'
+        self.check('---\n'
+                   'key1:\n'
+                   'key2: not empty\n'
+                   'key3:\n'
+                   '...\n', conf)
+        self.check('---\n'
+                   '-\n'
+                   '- item 2\n'
+                   '-\n'
+                   '...\n', conf)
+
     def test_nested_collections(self):
         conf = 'indentation: {spaces: 2}'
         self.check('---\n'
@@ -171,16 +223,16 @@ class IndentationTestCase(RuleTestCase):
         conf = 'indentation: {spaces: 4}'
         self.check('---\n'
                    '- o:\n'
-                   '    k1: v1\n'
+                   '      k1: v1\n'
                    '...\n', conf)
-        self.check('---\n'
-                   '- o:\n'
-                   '   k1: v1\n'
-                   '...\n', conf, problem=(3, 4))
         self.check('---\n'
                    '- o:\n'
                    '     k1: v1\n'
                    '...\n', conf, problem=(3, 6))
+        self.check('---\n'
+                   '- o:\n'
+                   '       k1: v1\n'
+                   '...\n', conf, problem=(3, 8))
 
     def test_return(self):
         self.check('---\n'
diff --git a/tests/rules/test_syntax_error.py b/tests/rules/test_syntax_error.py
index db88539..94faf0b 100644
--- a/tests/rules/test_syntax_error.py
+++ b/tests/rules/test_syntax_error.py
@@ -36,6 +36,24 @@ class YamlLintTestCase(RuleTestCase):
                    'doc: ument\n'
                    '...\n', None, problem=(3, 1))
 
+    def test_explicit_mapping(self):
+        self.check('---\n'
+                   '? key\n'
+                   ': - value 1\n'
+                   '  - value 2\n'
+                   '...\n', None)
+        self.check('---\n'
+                   '?\n'
+                   '  key\n'
+                   ': {a: 1}\n'
+                   '...\n', None)
+        self.check('---\n'
+                   '?\n'
+                   '  key\n'
+                   ':\n'
+                   '  val\n'
+                   '...\n', None)
+
     def test_mapping_between_sequences(self):
         # This is valid YAML. See http://www.yaml.org/spec/1.2/spec.html,
         # example 2.11
diff --git a/yamllint/conf/default.yml b/yamllint/conf/default.yml
index 5a30434..bca683f 100644
--- a/yamllint/conf/default.yml
+++ b/yamllint/conf/default.yml
@@ -31,12 +31,10 @@ rules:
     max-spaces-after: 1
   indentation:
     spaces: 2
+    # indent-sequences: no
   line-length:
     max: 80
   new-line-at-end-of-file: {level: error}
   new-lines:
     type: unix
-  #sequences-indentation:
-  #  level: warning
-  #  present: yes
   trailing-spaces: {}
diff --git a/yamllint/rules/indentation.py b/yamllint/rules/indentation.py
index 320742c..eab3a40 100644
--- a/yamllint/rules/indentation.py
+++ b/yamllint/rules/indentation.py
@@ -23,48 +23,168 @@ ID = 'indentation'
 TYPE = 'token'
 CONF = {'spaces': int}
 
+ROOT, MAP, B_SEQ, F_SEQ, KEY, VAL = range(6)
+
+
+class Parent(object):
+    def __init__(self, type, indent):
+        self.type = type
+        self.indent = indent
+        self.explicit_key = False
+
 
 def check(conf, token, prev, next, context):
-    if isinstance(token, (yaml.StreamStartToken, yaml.StreamEndToken)):
-        return
+    if 'stack' not in context:
+        context['stack'] = [Parent(ROOT, 0)]
+        context['cur_line'] = -1
 
-    # Check if first token in line
-    if (not isinstance(prev, (yaml.StreamStartToken, yaml.DirectiveToken)) and
-            token.start_mark.line == prev.end_mark.line):
-        return
+    # Step 1: Lint
 
-    if token.start_mark.column % conf['spaces'] != 0:
-        yield LintProblem(
-            token.end_mark.line + 1, token.start_mark.column + 1,
-            'indentation is not a multiple of %d' % conf['spaces'])
-        return
+    if (not isinstance(token, (yaml.StreamStartToken, yaml.StreamEndToken)) and
+            not isinstance(token, yaml.BlockEndToken) and
+            not (isinstance(token, yaml.ScalarToken) and token.value == '') and
+            token.start_mark.line + 1 > context['cur_line']):
 
-    if isinstance(prev, (yaml.StreamStartToken,
-                         yaml.DirectiveToken,
-                         yaml.DocumentStartToken,
-                         yaml.DocumentEndToken)):
-        indent = 0
-    else:
-        buffer = prev.end_mark.buffer
-        start = buffer.rfind('\n', 0, prev.end_mark.pointer) + 1
+        found_indentation = token.start_mark.column
+        expected = context['stack'][-1].indent
 
-        indent = 0
-        while buffer[start + indent] == ' ':
-            indent += 1
+        if isinstance(token, (yaml.FlowMappingEndToken,
+                              yaml.FlowSequenceEndToken)):
+            expected = 0
+        elif (context['stack'][-1].type == KEY and
+                context['stack'][-1].explicit_key and
+                not isinstance(token, yaml.ValueToken)):
+            expected += conf['spaces']
 
-    if token.start_mark.column > indent:
-        if not isinstance(prev, (yaml.BlockSequenceStartToken,
-                                 yaml.BlockMappingStartToken,
-                                 yaml.FlowSequenceStartToken,
-                                 yaml.FlowMappingStartToken,
-                                 yaml.KeyToken,
-                                 yaml.ValueToken)):
-            yield LintProblem(
-                token.end_mark.line + 1, token.start_mark.column + 1,
-                'unexpected indentation')
+        if found_indentation != expected:
+            yield LintProblem(token.start_mark.line + 1, found_indentation + 1,
+                              'wrong indentation: expected %d but found %d' %
+                              (expected, found_indentation))
 
-        elif token.start_mark.column != indent + conf['spaces']:
-            yield LintProblem(
-                token.end_mark.line + 1, token.start_mark.column + 1,
-                'found indentation of %d instead of %d' %
-                (token.start_mark.column, indent + conf['spaces']))
+        context['cur_line_indent'] = found_indentation
+        context['cur_line'] = token.end_mark.line + 1
+
+    # Step 2: Update state
+
+    if isinstance(token, yaml.BlockMappingStartToken):
+        assert isinstance(next, yaml.KeyToken)
+        if next.start_mark.line == token.start_mark.line:
+            #   - a: 1
+            #     b: 2
+            # or
+            #   - ? a
+            #     : 1
+            indent = token.start_mark.column
+        else:
+            #   - ?
+            #       a
+            #     : 1
+            indent = token.start_mark.column + conf['spaces']
+
+        context['stack'].append(Parent(MAP, indent))
+
+    elif isinstance(token, yaml.FlowMappingStartToken):
+        if next.start_mark.line == token.start_mark.line:
+            #   - {a: 1, b: 2}
+            indent = next.start_mark.column
+        else:
+            #   - {
+            #   a: 1, b: 2
+            # }
+            indent = context['cur_line_indent'] + conf['spaces']
+
+        context['stack'].append(Parent(MAP, indent))
+
+    elif isinstance(token, yaml.BlockSequenceStartToken):
+        #   - - a
+        #     - b
+        assert next.start_mark.line == token.start_mark.line
+        assert isinstance(next, yaml.BlockEntryToken)
+
+        indent = token.start_mark.column
+
+        context['stack'].append(Parent(B_SEQ, indent))
+
+    elif isinstance(token, yaml.FlowSequenceStartToken):
+        if next.start_mark.line == token.start_mark.line:
+            #   - [a, b]
+            indent = next.start_mark.column
+        else:
+            #   - [
+            #   a, b
+            # ]
+            indent = context['cur_line_indent'] + conf['spaces']
+
+        context['stack'].append(Parent(F_SEQ, indent))
+
+    elif isinstance(token, (yaml.BlockEndToken,
+                            yaml.FlowMappingEndToken,
+                            yaml.FlowSequenceEndToken)):
+        assert context['stack'][-1].type in (MAP, B_SEQ, F_SEQ)
+        context['stack'].pop()
+
+    elif isinstance(token, yaml.KeyToken):
+        indent = context['stack'][-1].indent
+
+        context['stack'].append(Parent(KEY, indent))
+
+        # explicit key:
+        #   ? key
+        #   : v
+        # or
+        #   ?
+        #     key
+        #   : v
+        context['stack'][-1].explicit_key = (
+           token.start_mark.pointer < token.end_mark.pointer and
+           token.start_mark.buffer[token.start_mark.pointer] == '?')
+
+    if context['stack'][-1].type == VAL:
+        context['stack'].pop()
+        assert context['stack'][-1].type == KEY
+        context['stack'].pop()
+
+    elif isinstance(token, yaml.ValueToken):
+        assert context['stack'][-1].type == KEY
+
+        # Discard empty values
+        if isinstance(next, (yaml.BlockEndToken,
+                             yaml.FlowMappingEndToken,
+                             yaml.FlowSequenceEndToken,
+                             yaml.KeyToken)):
+            context['stack'].pop()
+        else:
+            if context['stack'][-1].explicit_key:
+                #   ? k
+                #   : value
+                # or
+                #   ? k
+                #   :
+                #     value
+                indent = context['stack'][-1].indent + conf['spaces']
+            elif next.start_mark.line == prev.start_mark.line:
+                #   k: value
+                indent = next.start_mark.column
+            elif isinstance(next, (yaml.BlockSequenceStartToken,
+                                   yaml.BlockEntryToken)):
+                # NOTE: We add BlockEntryToken in the test above because
+                # sometimes BlockSequenceStartToken are not issued. Try
+                # yaml.scan()ning this:
+                #     '- lib:\n'
+                #     '  - var\n'
+                if next.start_mark.column == context['stack'][-1].indent:
+                    #   key:
+                    #   - e1
+                    #   - e2
+                    indent = context['stack'][-1].indent
+                else:
+                    #   key:
+                    #     - e1
+                    #     - e2
+                    indent = context['stack'][-1].indent + conf['spaces']
+            else:
+                #   k:
+                #     value
+                indent = context['stack'][-1].indent + conf['spaces']
+
+            context['stack'].append(Parent(VAL, indent))