diff --git a/README.rst b/README.rst index ff72959..2b0a7a6 100644 --- a/README.rst +++ b/README.rst @@ -273,3 +273,17 @@ The behavior of Fypp can be influenced with various command line options. A summary of all command line options can be obtained by:: fypp -h + + +Source mapping +-------------- + +``--source-map FILE`` writes a JSON file mapping output byte ranges back to +source byte ranges, enabling tools to remap diagnostics to the original +template:: + + fypp --source-map source.fypp.map source.fpp source.f90 + +Each mapping entry has a ``kind`` (``verbatim``, ``expanded``, or +``generated``) and byte-offset fields. Format versioned via ``version`` +field (currently ``1``). diff --git a/src/fypp.py b/src/fypp.py index c6d0154..a5cd479 100755 --- a/src/fypp.py +++ b/src/fypp.py @@ -64,6 +64,7 @@ import time import optparse import io +import json import platform import builtins @@ -237,6 +238,8 @@ def __init__(self, includedirs=None, encoding='utf-8'): # Directory of current file self._curdir = None + self._cur_char_span, self._file_contents = None, {} + self._file_char_to_byte, self._file_line_to_char = {}, {} def parsefile(self, fobj): '''Parses file or a file like object. @@ -572,6 +575,11 @@ def _log_event(event, span=(-1, -1), **params): def _parse_txt(self, includespan, fname, txt): + self._file_contents[fname], c2b = txt, [0] + for ch in txt: c2b.append(c2b[-1] + len(ch.encode(self._encoding))) + l2c = [0] + [i + 1 for i, ch in enumerate(txt) if ch == '\n'] + self._file_char_to_byte[fname] = c2b + self._file_line_to_char[fname] = l2c if l2c[-1] == len(txt) else l2c + [len(txt)] self.handle_include(includespan, fname) self._parse(txt) self.handle_endinclude(includespan, fname) @@ -583,10 +591,12 @@ def _parse(self, txt, linenr=0, directcall=False): start, end = match.span() if start > pos: endlinenr = linenr + txt.count('\n', pos, start) + self._cur_char_span = (pos, start) self._process_text(txt[pos:start], (linenr, endlinenr)) linenr = endlinenr endlinenr = linenr + txt.count('\n', start, end) span = (linenr, endlinenr) + self._cur_char_span = (start, end) ldirtype, ldir, idirtype, idir = match.groups() if directcall and (idirtype is None or idirtype != '$'): msg = 'only inline eval directives allowed in direct calls' @@ -618,6 +628,7 @@ def _parse(self, txt, linenr=0, directcall=False): linenr = endlinenr if pos < len(txt): endlinenr = linenr + txt.count('\n', pos) + self._cur_char_span = (pos, len(txt)) self._process_text(txt[pos:], (linenr, endlinenr)) @@ -721,6 +732,7 @@ def _process_direct_call(self, callexpr, span): argval = argval[1:-1] keyword = match.group('kwname') self.handle_nextarg(span, keyword, False) + self._cur_char_span = None self._parse(argval, linenr=span[0], directcall=True) self.handle_endcall(span, callname, False) @@ -886,6 +898,7 @@ def __init__(self): # Current file self._curfile = None + self._parser_ref = None def reset(self): @@ -1208,7 +1221,8 @@ def handle_eval(self, span, expr): expr (str): String representation of the Python expression to be evaluated. ''' - self._curnode.append(('eval', self._curfile, span, expr)) + cs = getattr(self._parser_ref, '_cur_char_span', None) + self._curnode.append(('eval', self._curfile, span, expr, cs)) def handle_comment(self, span): @@ -1230,7 +1244,8 @@ def handle_text(self, span, txt): span (tuple of int): Start and end line of the text. txt (str): Text. ''' - self._curnode.append(('txt', self._curfile, span, txt)) + cs = getattr(self._parser_ref, '_cur_char_span', None) + self._curnode.append(('txt', self._curfile, span, txt, cs)) def handle_mute(self, span): @@ -1409,6 +1424,7 @@ def _render(self, tree): for node in tree: cmd = node[0] if cmd == 'txt': + self._on_txt(node) output.append(node[3]) elif cmd == 'if': out, ieval, peval = self._get_conditional_content(*node[1:5]) @@ -1416,7 +1432,7 @@ def _render(self, tree): eval_pos += peval output += out elif cmd == 'eval': - out, ieval, peval = self._get_eval(*node[1:4]) + out, ieval, peval = self._get_eval(*node[1:5]) eval_inds += _shiftinds(ieval, len(output)) eval_pos += peval output += out @@ -1460,7 +1476,8 @@ def _render(self, tree): return output, eval_inds, eval_pos - def _get_eval(self, fname, span, expr): + def _on_txt(self, node): pass + def _get_eval(self, fname, span, expr, char_span=None): try: result = self._evaluate(expr, fname, span[0]) except Exception as exc: @@ -1869,6 +1886,160 @@ def _foldline(self, line): return [line] +class SourceMapRenderer(Renderer): + '''Renderer subclass that records byte-level source mappings.''' + + def __init__(self, char_to_byte_tables, line_to_char_tables, + file_contents=None, encoding='utf-8', **kwargs): + super().__init__(**kwargs) + self._char_to_byte, self._line_to_char = char_to_byte_tables, line_to_char_tables + self._file_contents = file_contents if file_contents is not None else {} + self._encoding = encoding + self._mappings, self._out_byte_offset = [], 0 + _orig = self._linenumdir + def _w(*a, **kw): r = _orig(*a, **kw); self._record('generated', text=r); return r + self._linenumdir = _w + + def render(self, tree, divert=False, fixposition=False): + saved = (self._mappings, self._out_byte_offset) + self._mappings, self._out_byte_offset = [], 0 + result = super().render(tree, divert=divert, fixposition=fixposition) + if divert: self._mappings, self._out_byte_offset = saved + else: self._fixup_out_byte_offsets(result); self._merge_adjacent_verbatim() + return result + def _on_txt(self, n): self._record('verbatim', *n[1:5]) + def _span_to_byte_range(self, fname, span, char_span=None): + t = self._char_to_byte.get(fname) + c2b = lambda co: (co if t is None else t[co] if co < len(t) else (t[-1] if t else co)) + if char_span is not None: return c2b(char_span[0]), c2b(char_span[1]) + lt = self._line_to_char.get(fname) + if lt is None: + return (0, 0) + return (c2b(lt[min(span[0], len(lt)-1)]), + c2b(lt[min(span[1], len(lt)-1)])) + + def _record(self, kind, fname=None, span=None, text=None, char_span=None): + if not text: return + n = len(text.encode(self._encoding)); o = self._out_byte_offset + e = {'out_byte_start': o, 'out_byte_end': o + n, 'kind': kind} + if fname is not None: + sb, se = self._span_to_byte_range(fname, span, char_span) + e.update(src_file=fname, src_byte_start=sb, src_byte_end=se) + if kind == 'verbatim': e['_text'] = text.encode(self._encoding) + self._mappings.append(e); self._out_byte_offset += n + def _get_eval(self, fname, span, expr, char_span=None): + out, ieval, peval = super()._get_eval(fname, span, expr) + self._record('expanded', fname, span, ''.join(out), char_span); return out, ieval, peval + def _get_called_content(self, fname, spans, name, argexpr, contents, argnames): + r = super()._get_called_content(fname, spans, name, argexpr, contents, argnames) + self._record('expanded', fname, (spans[0][0], spans[-1][1]), ''.join(r[0])) + return r + def _get_muted_content(self, fname, spans, content): + saved = (len(self._mappings), self._out_byte_offset) + result = super()._get_muted_content(fname, spans, content) + del self._mappings[saved[0]:]; self._out_byte_offset = saved[1] + self._record('generated', text=result); return result + + def _fixup_out_byte_offsets(self, final_text): + if not self._mappings: return + out_bytes = final_text.encode(self._encoding) + insertion_pats = self._build_insertion_patterns() + src_cache, cursor, fixed = {}, 0, [] + def gen(s, e): return {'out_byte_start': s, 'out_byte_end': e, 'kind': 'generated'} + for e in self._mappings: + if e['kind'] == 'verbatim': + fname = e['src_file'] + if fname not in src_cache: + c = self._file_contents.get(fname) + src_cache[fname] = c.encode(self._encoding) if c is not None else None + if src_cache[fname] is not None: + exp = e.pop('_text', None) + if exp is None: + exp = src_cache[fname][e['src_byte_start']:e['src_byte_end']] + for pat in insertion_pats: + while True: + m = pat.match(out_bytes, cursor) + if m is None: break + fixed.append(gen(cursor, m.end())); cursor = m.end() + pos = out_bytes.find(exp, cursor) + if pos >= 0: + ne = {**e, 'out_byte_start': pos, 'out_byte_end': pos + len(exp)} + fixed.append(ne); cursor = ne['out_byte_end']; continue + elif insertion_pats: + rng = self._find_with_insertions(out_bytes, exp, cursor, insertion_pats) + if rng is not None: + mstart, mend, segs = rng + src0, outpos = e['src_byte_start'], mstart + for so, eo, se, ee in segs: + if outpos < so: fixed.append(gen(outpos, so)) + fixed.append({**gen(so, eo), 'src_file': fname, + 'kind': 'verbatim', 'src_byte_start': src0 + se, + 'src_byte_end': src0 + ee}) + outpos = eo + if outpos < mend: fixed.append(gen(outpos, mend)) + cursor = mend; continue + n = e['out_byte_end'] - e['out_byte_start'] + end = min(cursor + n, len(out_bytes)) + fixed.append(gen(cursor, end) if e['kind'] == 'verbatim' + else {**e, 'out_byte_start': cursor, 'out_byte_end': end}) + cursor = end + if cursor < len(out_bytes): + fixed.append(gen(cursor, len(out_bytes))) + self._mappings = mappings = fixed + for i, e in enumerate(mappings): + if e['kind'] != 'verbatim': + e['out_byte_end'] = (mappings[i+1]['out_byte_start'] + if i+1 < len(mappings) else len(out_bytes)) + self._mappings = [e for e in mappings + if e['out_byte_start'] < e['out_byte_end']] + for e in self._mappings: e.pop('_text', None) + + def _build_insertion_patterns(self): + lnpat = br'#(?:line)? \d+ "[^\n]*"[^\n]*\n' + pats = [re.compile(lnpat)] + if isinstance(self._linefolder, FortranLineFolder): + sfx, pfx = self._linefolder._suffix, self._linefolder._prefix.lstrip() + if pfx: + ind = str(max(0, self._linefolder._indent)).encode('ascii') + bs = re.escape(sfx.encode(self._encoding)) + bp = re.escape(pfx.encode(self._encoding)) + pats += [re.compile(bs + b'\n[ \t]{' + ind + b',}' + bp), + re.compile(bs + b'\n' + lnpat + b'[ \t]{' + ind + b',}' + bp)] + return pats + def _find_with_insertions(self, out_bytes, expected, start, insertion_pats): + if not expected: return None + for cand in range(start, len(out_bytes)): + if out_bytes[cand] != expected[0]: continue + i, j, segs, segout, segexp = cand, 0, [], cand, 0 + while j < len(expected): + for pat in insertion_pats: + m = pat.match(out_bytes, i) + if m is not None: + if j > segexp: segs.append((segout, i, segexp, j)) + i = m.end(); segout, segexp = i, j; break + else: + if i < len(out_bytes) and out_bytes[i] == expected[j]: i += 1; j += 1; continue + break + if j == len(expected): + if j > segexp: segs.append((segout, i, segexp, j)) + return cand, i, segs + return None + def _merge_adjacent_verbatim(self): + if not self._mappings: return + merged = [self._mappings[0]] + for e in self._mappings[1:]: + p = merged[-1] + if (p['kind'] == 'verbatim' == e.get('kind') and p.get('src_file') == e.get('src_file') + and p['out_byte_end'] == e['out_byte_start'] + and p['src_byte_end'] == e['src_byte_start']): + p['out_byte_end'], p['src_byte_end'] = e['out_byte_end'], e['src_byte_end'] + else: merged.append(e) + self._mappings = merged + + def get_source_map(self, source_file=None): + return {'version': 1, 'source_file': source_file or '', 'mappings': list(self._mappings)} + + class Evaluator: '''Provides an isolated environment for evaluating Python expressions. @@ -2360,6 +2531,7 @@ def __init__(self, parser=None, builder=None, renderer=None, evaluator=None): self._parser = Parser() if parser is None else parser self._builder = Builder() if builder is None else builder + self._builder._parser_ref = self._parser if renderer is None: evaluator = Evaluator() if evaluator is None else evaluator self._renderer = Renderer(evaluator) @@ -2544,7 +2716,16 @@ def __init__(self, options=None, evaluator_factory=Evaluator, linenums = options.line_numbering contlinenums = (options.line_numbering_mode != 'nocontlines') self._create_parent_folder = options.create_parent_folder - if inspect.signature(renderer_factory) == inspect.signature(Renderer): + self._source_map_file = getattr(options, 'source_map', None) + if self._source_map_file: + renderer = SourceMapRenderer( + parser._file_char_to_byte, parser._file_line_to_char, + file_contents=parser._file_contents, encoding=self._encoding, + evaluator=evaluator, linenums=linenums, + contlinenums=contlinenums, + linenumformat=options.line_marker_format, + linefolder=linefolder, filevarroot=options.file_var_root) + elif inspect.signature(renderer_factory) == inspect.signature(Renderer): renderer = renderer_factory( evaluator, linenums=linenums, contlinenums=contlinenums, linenumformat=options.line_marker_format, linefolder=linefolder, @@ -2570,6 +2751,16 @@ def process_file(self, infile, outfile=None): ''' infile = STDIN if infile == '-' else infile output = self._preprocessor.process_file(infile) + if self._source_map_file: + try: + with io.open(self._source_map_file, 'w', encoding='utf-8') as f: + json.dump(self._preprocessor._renderer.get_source_map( + infile), f, indent=2) + f.write('\n') + except IOError as exc: + raise FyppFatalError( + "failed to write source map '{}': {}".format( + self._source_map_file, exc)) if outfile is None: return output if outfile == '-': @@ -2596,6 +2787,11 @@ def process_text(self, txt): return self._preprocessor.process_text(txt) + def process_text_with_map(self, txt): + output = self._preprocessor.process_text(txt); r = self._preprocessor._renderer + return output, (r.get_source_map(STRING) if isinstance(r, SourceMapRenderer) else None) + + @staticmethod def _apply_definitions(defines, evaluator, evaluate): for define in defines: @@ -2700,6 +2896,7 @@ def __init__(self): self.encoding = 'utf-8' self.create_parent_folder = False self.file_var_root = None + self.source_map = None class FortranLineFolder: @@ -2943,6 +3140,9 @@ def get_option_parser(): parser.add_option('--file-var-root', metavar='DIR', dest='file_var_root', default=defs.file_var_root, help=msg) + parser.add_option('--source-map', metavar='FILE', dest='source_map', + default=defs.source_map, + help='write JSON source-map of output byte offsets') return parser diff --git a/test/test_source_map.py b/test/test_source_map.py new file mode 100644 index 0000000..4585997 --- /dev/null +++ b/test/test_source_map.py @@ -0,0 +1,468 @@ +'''Unit tests for Fypp's --source-map feature.''' +import json, os, sys, tempfile, unittest +from pathlib import Path +sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "src")) +import fypp + + +def _make_tool(args=None, source_map_file=None): + opts, _ = fypp.get_option_parser().parse_args(args or []) + opts.source_map = source_map_file or '__dummy__' + return fypp.Fypp(opts) + + +def _get_map(txt, args=None): + return _make_tool(args).process_text_with_map(txt) + + +def _kinds(smap): return [m['kind'] for m in smap['mappings']] + +_SRC_FIELDS = ('out_byte_start', 'out_byte_end', 'src_byte_start', + 'src_byte_end', 'src_file') + + +class TestMappingKinds(unittest.TestCase): + + def test_expanded_inline_eval(self): + output, smap = _get_map("x = ${1 + 2}$\n") + self.assertIn('3', output) + self.assertIn('expanded', _kinds(smap)) + + def test_expanded_line_eval(self): + output, smap = _get_map("$:str(42)\n") + self.assertIn('42', output) + self.assertIn('expanded', _kinds(smap)) + + def test_generated_content(self): + _, smap = _get_map("line1\n", args=['-n']) + self.assertIn('generated', _kinds(smap)) + + def test_if_true_branch(self): + output, smap = _get_map("#:if True\nkept\n#:endif\n") + self.assertIn('kept', output) + self.assertIn('verbatim', _kinds(smap)) + + def test_for_loop(self): + output, smap = _get_map("#:for i in range(3)\nval${i}$\n#:endfor\n") + for v in ('val0', 'val1', 'val2'): + self.assertIn(v, output) + self.assertTrue(any(k in ('expanded', 'verbatim') for k in _kinds(smap))) + + def test_nested_if(self): + output, smap = _get_map( + "#:if True\nouter\n#:if True\ninner\n#:endif\n#:endif\n") + self.assertIn('outer', output) + self.assertIn('inner', output) + self.assertIn('verbatim', _kinds(smap)) + + +class TestIncludes(unittest.TestCase): + + def test_include_mapping(self): + with tempfile.TemporaryDirectory() as d: + with open(os.path.join(d, 'inc.fypp'), 'w') as f: + f.write("! included line\n") + inp = os.path.join(d, 'main.fypp') + with open(inp, 'w') as f: + f.write('#:include "inc.fypp"\n') + smap_path = os.path.join(d, 'out.map') + tool = _make_tool(args=['-I', d], source_map_file=smap_path) + output = tool.process_file(inp) + self.assertIn('included line', output) + with open(smap_path) as f: + smap = json.load(f) + src_files = {m.get('src_file', '') for m in smap['mappings']} + self.assertTrue(any('inc.fypp' in s for s in src_files)) + + +class TestEdgeCases(unittest.TestCase): + + def test_no_directives(self): + inp = "program hello\n print *, 'hi'\nend program hello\n" + output, smap = _get_map(inp) + self.assertEqual(output, inp) + vb = [m for m in smap['mappings'] if m['kind'] == 'verbatim'] + self.assertEqual(len(vb), 1) + self.assertEqual(vb[0]['out_byte_start'], 0) + self.assertEqual(vb[0]['out_byte_end'], len(inp.encode('utf-8'))) + + def test_multiline_continuation(self): + output, smap = _get_map("#:if Tr&\n &ue\nkept\n#:endif\n") + self.assertIn('kept', output) + self.assertTrue(len(_kinds(smap)) >= 1) + + def test_verbatim_byte_accuracy(self): + inp = "before\n#:if True\nmiddle\n#:endif\nafter\n" + output, smap = _get_map(inp) + sb, ob = inp.encode('utf-8'), output.encode('utf-8') + for m in smap['mappings']: + if m['kind'] == 'verbatim': + self.assertEqual(sb[m['src_byte_start']:m['src_byte_end']], + ob[m['out_byte_start']:m['out_byte_end']]) + + def test_verbatim_literal_continuation_markers(self): + inp = "a&\n&b\n" + output, smap = _get_map(inp) + self.assertEqual(output, inp) + ob = output.encode('utf-8') + vb = [m for m in smap['mappings'] if m['kind'] == 'verbatim'] + self.assertEqual(len(vb), 1) + self.assertEqual(vb[0]['out_byte_start'], 0) + self.assertEqual(vb[0]['out_byte_end'], len(ob)) + self.assertEqual(inp.encode('utf-8')[vb[0]['src_byte_start']:vb[0]['src_byte_end']], + ob[vb[0]['out_byte_start']:vb[0]['out_byte_end']]) + + def test_expanded_literal_continuation_markers_inline(self): + output, smap = _get_map("${'a&\\n&b'}$\n") + ob = output.encode('utf-8') + self.assertEqual(output, "a&\n&b\n") + es = sorted(smap['mappings'], key=lambda e: e['out_byte_start']) + self.assertEqual(es[0]['out_byte_start'], 0) + self.assertEqual(es[-1]['out_byte_end'], len(ob)) + for m in es: + self.assertLessEqual(m['out_byte_start'], m['out_byte_end']) + self.assertLessEqual(m['out_byte_end'], len(ob)) + self.assertIn('expanded', _kinds(smap)) + + def test_expanded_literal_continuation_markers_set_var(self): + output, smap = _get_map("#:set x='a&\\n&b'\n${x}$\n") + ob = output.encode('utf-8') + self.assertEqual(output, "a&\n&b\n") + es = sorted(smap['mappings'], key=lambda e: e['out_byte_start']) + self.assertEqual(es[0]['out_byte_start'], 0) + self.assertEqual(es[-1]['out_byte_end'], len(ob)) + for m in es: + self.assertLessEqual(m['out_byte_start'], m['out_byte_end']) + self.assertLessEqual(m['out_byte_end'], len(ob)) + self.assertIn('expanded', _kinds(smap)) + + def test_folding_splits_verbatim_into_precise_pieces(self): + inp = "abcdefghij${1}$\n" + output, smap = _get_map(inp, args=['-l', '8']) + sb, ob = inp.encode('utf-8'), output.encode('utf-8') + self.assertIn('generated', _kinds(smap)) + for m in smap['mappings']: + if m['kind'] == 'verbatim': + self.assertEqual( + sb[m['src_byte_start']:m['src_byte_end']], + ob[m['out_byte_start']:m['out_byte_end']]) + + def test_folding_with_linenums_keeps_verbatim_byte_accuracy(self): + inp = "abc ${1}$ def\n" + output, smap = _get_map(inp, args=['-l', '8', '-n']) + sb, ob = inp.encode('utf-8'), output.encode('utf-8') + self.assertIn('generated', _kinds(smap)) + for m in smap['mappings']: + if m['kind'] == 'verbatim': + self.assertEqual( + sb[m['src_byte_start']:m['src_byte_end']], + ob[m['out_byte_start']:m['out_byte_end']]) + + def test_folding_with_linenums_loop_keeps_verbatim_byte_accuracy(self): + inp = "#:for i in range(2)\naaaaaaaa ${i}$ bbbbbbbb\n#:endfor\n" + output, smap = _get_map(inp, args=['-l', '8', '-n']) + sb, ob = inp.encode('utf-8'), output.encode('utf-8') + self.assertIn('generated', _kinds(smap)) + verb = [m for m in smap['mappings'] if m['kind'] == 'verbatim'] + self.assertTrue(len(verb) > 0) + for m in verb: + if m['kind'] == 'verbatim': + self.assertEqual( + sb[m['src_byte_start']:m['src_byte_end']], + ob[m['out_byte_start']:m['out_byte_end']]) + + def test_muted_with_line_numbering(self): + output, smap = _get_map( + "#:mute\nhidden\n#:endmute\nvisible\n", args=['-n']) + self.assertNotIn('hidden', output) + self.assertIn('visible', output) + n = len(output.encode('utf-8')) + es = sorted(smap['mappings'], key=lambda e: e['out_byte_start']) + self.assertTrue(len(es) >= 1) + self.assertEqual(es[0]['out_byte_start'], 0) + self.assertEqual(es[-1]['out_byte_end'], n) + for i in range(len(es) - 1): + self.assertEqual(es[i]['out_byte_end'], es[i+1]['out_byte_start']) + + def test_unicode_byte_accuracy(self): + inp = "! café résumé\n" + output, smap = _get_map(inp) + self.assertEqual(output, inp) + vb = [m for m in smap['mappings'] if m['kind'] == 'verbatim'] + self.assertEqual(len(vb), 1) + self.assertEqual(vb[0]['out_byte_end'], len(inp.encode('utf-8'))) + self.assertEqual(vb[0]['src_byte_end'], len(inp.encode('utf-8'))) + + def test_macro_call_mapping(self): + inp = "#:def greet(name)\nHello ${name}$!\n#:enddef\n$:greet('World')\n" + output, smap = _get_map(inp) + self.assertIn('Hello World!', output) + self.assertIn('expanded', _kinds(smap)) + + def test_escape_sequences_stay_verbatim(self): + for inp in ["cost = $\\{100\\}\n", + "! comment #\\: not a directive\n"]: + with self.subTest(inp=inp): + output, smap = _get_map(inp) + verb = [m for m in smap['mappings'] if m['kind'] == 'verbatim'] + self.assertTrue(len(verb) > 0, + "escaped text should remain verbatim, not degrade to generated") + + def test_hash_in_verbatim_with_folding(self): + inp = "aaa#bbb ${1}$\n" + output, smap = _get_map(inp, args=['-l', '8']) + verb = [m for m in smap['mappings'] if m['kind'] == 'verbatim'] + self.assertTrue(len(verb) > 0, + "verbatim text containing '#' should not be consumed as insertion") + + +class TestAPI(unittest.TestCase): + + def test_process_text_returns_str(self): + self.assertIsInstance(_make_tool().process_text("hello\n"), str) + + def test_process_text_with_map_returns_tuple(self): + result = _make_tool().process_text_with_map("hello\n") + self.assertIsInstance(result, tuple) + self.assertEqual(len(result), 2) + self.assertIsInstance(result[0], str) + self.assertIsInstance(result[1], dict) + + def test_process_text_with_map_no_sourcemap(self): + opts, _ = fypp.get_option_parser().parse_args([]) + opts.source_map = None + output, smap = fypp.Fypp(opts).process_text_with_map("hello\n") + self.assertIsInstance(output, str) + self.assertIsNone(smap) + + def test_cli_source_map_writes_file(self): + with tempfile.TemporaryDirectory() as d: + inp = os.path.join(d, 'input.fypp') + smap_path = os.path.join(d, 'output.map') + with open(inp, 'w') as f: + f.write("program test\nend program test\n") + _make_tool(source_map_file=smap_path).process_file( + inp, os.path.join(d, 'output.f90')) + with open(smap_path) as f: + smap = json.load(f) + self.assertIn('version', smap) + self.assertIn('mappings', smap) + + +class TestJSONFormat(unittest.TestCase): + + def test_source_map_version(self): + _, smap = _get_map("hello\n") + self.assertEqual(smap['version'], 1) + self.assertIn('source_file', smap) + + def test_mapping_fields_verbatim(self): + _, smap = _get_map("hello\n") + for m in smap['mappings']: + self.assertIn('kind', m) + if m['kind'] == 'verbatim': + for f in _SRC_FIELDS: + self.assertIn(f, m) + + def test_mapping_fields_expanded(self): + _, smap = _get_map("${1 + 1}$\n") + expanded = [m for m in smap['mappings'] if m['kind'] == 'expanded'] + self.assertTrue(len(expanded) >= 1) + for m in expanded: + for f in _SRC_FIELDS: + self.assertIn(f, m) + + def test_muted_produces_no_mappings(self): + _, smap = _get_map("#:if False\ngone\n#:endif\n") + self.assertEqual(smap['mappings'], []) + + def test_generated_fields(self): + _, smap = _get_map("hello\n", args=['-n']) + gen = [m for m in smap['mappings'] if m['kind'] == 'generated'] + self.assertTrue(len(gen) >= 1) + for m in gen: + self.assertIn('out_byte_start', m) + self.assertIn('out_byte_end', m) + + def test_no_overlapping_output_ranges(self): + _, smap = _get_map("before\n#:if True\nkept\n#:endif\nafter\n") + es = sorted(smap['mappings'], key=lambda e: e['out_byte_start']) + for i in range(len(es) - 1): + self.assertLessEqual(es[i]['out_byte_end'], es[i+1]['out_byte_start']) + + def test_continuous_coverage(self): + for inp, args in [("alpha\nbeta\n", []), + ("before\n$:str(99)\nafter\n", []), + ("line1\nline2\n", ['-n']), + ("#:mute\nx\n#:endmute\nafter\n", ['-n']), + ("A${str(7)}$X&YaZXa& &a \nbY Y${2}$\n", + ['-l', '8']), + ("AAAAAAAAAA${1}$Z\n", ['-l', '8', '-n'])]: + with self.subTest(inp=inp, args=args): + output, smap = _get_map(inp, args=args) + n = len(output.encode('utf-8')) + if not n: continue + es = sorted(smap['mappings'], key=lambda e: e['out_byte_start']) + self.assertTrue(len(es) >= 1) + self.assertEqual(es[0]['out_byte_start'], 0) + self.assertEqual(es[-1]['out_byte_end'], n) + for i in range(len(es) - 1): + self.assertEqual(es[i]['out_byte_end'], es[i+1]['out_byte_start']) + + +def _assert_valid_map(tc, output, smap, inp=None): + '''No crash, continuous coverage, no zero-width, verbatim byte accuracy.''' + ob = output.encode('utf-8') + n = len(ob) + es = sorted(smap['mappings'], key=lambda e: e['out_byte_start']) + for i, e in enumerate(es): + tc.assertGreater(e['out_byte_end'], e['out_byte_start'], + f"zero-width entry #{i}: {e}") + if n == 0: + tc.assertEqual(es, []) + return + tc.assertTrue(len(es) >= 1) + tc.assertEqual(es[0]['out_byte_start'], 0) + tc.assertEqual(es[-1]['out_byte_end'], n) + for i in range(len(es) - 1): + tc.assertEqual(es[i]['out_byte_end'], es[i + 1]['out_byte_start'], + f"gap/overlap between {i} and {i+1}") + if inp is not None: + sb = inp.encode('utf-8') + for e in es: + if e['kind'] == 'verbatim': + tc.assertEqual( + sb[e['src_byte_start']:e['src_byte_end']], + ob[e['out_byte_start']:e['out_byte_end']]) + + +class TestStressEdgeCases(unittest.TestCase): + + def test_empty_lines_between_directives(self): + inp = "#:if True\n\n\nkept\n\n#:endif\n" + output, smap = _get_map(inp) + self.assertIn('kept', output) + _assert_valid_map(self, output, smap, inp) + + def test_empty_lines_with_linenums(self): + inp = "#:if True\n\n\nkept\n\n#:endif\n" + output, smap = _get_map(inp, args=['-n']) + _assert_valid_map(self, output, smap, inp) + + def test_backslash_in_verbatim(self): + inp = "path = C:\\users\n" + output, smap = _get_map(inp) + self.assertEqual(output, inp) + _assert_valid_map(self, output, smap, inp) + + def test_backslash_with_folding(self): + inp = "path = C:\\users\\longdirname\n" + output, smap = _get_map(inp, args=['-l', '15']) + _assert_valid_map(self, output, smap, inp) + + def test_eval_producing_line_directive_pattern(self): + inp = '${\"# 1 \\\"fake.f90\\\"\"}$\n' + output, smap = _get_map(inp, args=['-n']) + self.assertIn('fake.f90', output) + _assert_valid_map(self, output, smap) + + def test_fold_7_eval(self): + inp = "ab${1}$cd\n" + output, smap = _get_map(inp, args=['-l', '7']) + _assert_valid_map(self, output, smap, inp) + + def test_fold_7_with_linenums(self): + inp = "ab${1}$cd\n" + output, smap = _get_map(inp, args=['-l', '7', '-n']) + _assert_valid_map(self, output, smap, inp) + + def test_fold_7_indent_0(self): + inp = "ABCDEFGHIJ${99}$XYZ\n" + output, smap = _get_map(inp, args=['-l', '7', '--indentation', '0']) + _assert_valid_map(self, output, smap, inp) + + def test_tab_with_eval_and_fold(self): + inp = "\tab${1}$cdefgh\n" + output, smap = _get_map(inp, args=['-l', '10']) + _assert_valid_map(self, output, smap, inp) + + def test_none_eval_then_verbatim(self): + inp = "${None}$hello\n" + output, smap = _get_map(inp) + self.assertIn('hello', output) + _assert_valid_map(self, output, smap) + + def test_empty_eval_between_verbatim(self): + inp = "before${None}$after\n" + output, smap = _get_map(inp) + self.assertIn('beforeafter', output) + _assert_valid_map(self, output, smap) + + def test_mute_between_visible_linenums(self): + inp = "before\n#:mute\nhidden\n#:endmute\nafter\n" + output, smap = _get_map(inp, args=['-n']) + self.assertNotIn('hidden', output) + _assert_valid_map(self, output, smap, inp) + + def test_multiple_mute_blocks(self): + inp = "#:mute\na\n#:endmute\nvis1\n#:mute\nb\n#:endmute\nvis2\n" + output, smap = _get_map(inp, args=['-n']) + self.assertIn('vis1', output) + _assert_valid_map(self, output, smap, inp) + + def test_call_endcall_basic(self): + inp = ("#:def wrap(body)\nBEGIN\n${body}$\nEND\n#:enddef\n" + "#:call wrap\nhello\n#:endcall\n") + output, smap = _get_map(inp) + self.assertIn('BEGIN', output) + self.assertIn('hello', output) + _assert_valid_map(self, output, smap) + + def test_call_endcall_with_linenums(self): + inp = ("#:def wrap(body)\nBEGIN\n${body}$\nEND\n#:enddef\n" + "#:call wrap\nhello\n#:endcall\n") + output, smap = _get_map(inp, args=['-n']) + _assert_valid_map(self, output, smap) + + def test_fold_empty_eval_verbatim_gap(self): + '''Fold insertion between verbatim pieces must not cause a gap.''' + inp = ("#:for i in range(2)\n" + "path\\dir ${i}$ ${None}$ end\n" + "#:endfor\n") + output, smap = _get_map(inp, args=['-l', '12']) + self.assertIn('path', output) + _assert_valid_map(self, output, smap) + + def test_include_chain(self): + with tempfile.TemporaryDirectory() as d: + with open(os.path.join(d, 'C.fypp'), 'w') as f: + f.write("! from C\n") + with open(os.path.join(d, 'B.fypp'), 'w') as f: + f.write("! from B\n#:include 'C.fypp'\n") + inp = os.path.join(d, 'A.fypp') + with open(inp, 'w') as f: + f.write("! from A\n#:include 'B.fypp'\n! back in A\n") + smap_path = os.path.join(d, 'out.map') + tool = _make_tool(args=['-I', d], source_map_file=smap_path) + output = tool.process_file(inp) + self.assertIn('from C', output) + with open(smap_path) as f: + smap = json.load(f) + files = {e.get('src_file', '') for e in smap['mappings']} + self.assertTrue(any('C.fypp' in s for s in files)) + ob = output.encode('utf-8') + es = sorted(smap['mappings'], key=lambda e: e['out_byte_start']) + self.assertEqual(es[0]['out_byte_start'], 0) + self.assertEqual(es[-1]['out_byte_end'], len(ob)) + for i in range(len(es) - 1): + self.assertEqual(es[i]['out_byte_end'], + es[i + 1]['out_byte_start']) + + def test_crlf_verbatim(self): + inp = "hello\r\nworld\r\n" + output, smap = _get_map(inp) + _assert_valid_map(self, output, smap, inp) + + +if __name__ == '__main__': + unittest.main()