Skip to content

Commit 73a4220

Browse files
committed
Feature: Add CodeLineIterator for filtered code line iteration (including multi-line comment removal).
1 parent 84bdfa2 commit 73a4220

File tree

3 files changed

+116
-50
lines changed

3 files changed

+116
-50
lines changed

CodeIngestLib/CodeLineIterator.cs

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
// Code authored by Dean Edis (DeanTheCoder).
2+
// Anyone is free to copy, modify, use, compile, or distribute this software,
3+
// either in source code form or as a compiled binary, for any non-commercial
4+
// purpose.
5+
//
6+
// If you modify the code, please retain this copyright header,
7+
// and consider contributing back to the repository or letting us know
8+
// about your modifications. Your contributions are valued!
9+
//
10+
// THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND.
11+
using System;
12+
using System.Collections.Generic;
13+
using System.IO;
14+
using JetBrains.Annotations;
15+
16+
namespace CodeIngestLib;
17+
18+
/// <summary>
19+
/// Iterates through code lines from a StreamReader, providing filtered content based on specified options.
20+
/// Can optionally strip comments and import statements while processing each line.
21+
/// </summary>
22+
internal class CodeLineIterator
23+
{
24+
private readonly StreamReader m_reader;
25+
private readonly bool m_stripComments;
26+
private readonly bool m_stripImports;
27+
28+
public CodeLineIterator([NotNull] StreamReader reader, bool stripComments, bool stripImports)
29+
{
30+
m_reader = reader ?? throw new ArgumentNullException(nameof(reader));
31+
m_stripComments = stripComments;
32+
m_stripImports = stripImports;
33+
}
34+
35+
public IEnumerable<string> GetLines()
36+
{
37+
var inBlockComment = false;
38+
while (m_reader.ReadLine() is { } line)
39+
{
40+
if (string.IsNullOrWhiteSpace(line))
41+
continue;
42+
line = line.Trim();
43+
44+
if (m_stripImports && (line.StartsWith("using") || line.StartsWith("#include") || line.StartsWith("#pragma") || line.StartsWith("namespace") || line.StartsWith("import") || line.StartsWith("from ")))
45+
continue;
46+
47+
if (m_stripComments)
48+
{
49+
if (line.StartsWith("//") || line.StartsWith("# "))
50+
continue;
51+
52+
if (line.StartsWith("/*") && line.EndsWith("*/"))
53+
continue;
54+
55+
// Strip single-line comments mid-line.
56+
var commentIndex = line.IndexOf("//", StringComparison.Ordinal);
57+
if (commentIndex > 0)
58+
line = line[..commentIndex].Trim();
59+
60+
// Strip comments in the middle of a line.
61+
var commentStart = line.IndexOf("/*", StringComparison.Ordinal);
62+
var commentEnd = line.IndexOf("*/", StringComparison.Ordinal);
63+
while (commentStart >= 0 && commentEnd >= commentStart)
64+
{
65+
line = (line[..commentStart] + line[(commentEnd + 2)..]).Trim();
66+
67+
commentStart = line.IndexOf("/*", StringComparison.Ordinal);
68+
commentEnd = line.IndexOf("*/", StringComparison.Ordinal);
69+
}
70+
71+
if (inBlockComment)
72+
{
73+
if (!line.Contains("*/"))
74+
continue; // We're in a comment block - Skip this line.
75+
76+
// We're in a block comment, and this line ends with a comment block.
77+
inBlockComment = false;
78+
line = line[(line.IndexOf("*/", StringComparison.Ordinal) + 2)..].Trim();
79+
}
80+
81+
if (line.Contains("/*"))
82+
{
83+
inBlockComment = true;
84+
continue;
85+
}
86+
}
87+
88+
yield return line;
89+
}
90+
}
91+
}

CodeIngestLib/Ingester.cs

Lines changed: 24 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,15 @@ public class Ingester
2222
{
2323
private readonly IngestOptions m_options;
2424

25-
private static string[] SymbolsToCollapse { get; } = new[]
26-
{
25+
private static string[] SymbolsToCollapse { get; } =
26+
[
2727
"<", "<=", "=", "==", "=>", ">", "!=", "(", ")", "{", "}", "[", "]", "-", "+", "*", "&", "%", "/", "<<", ">>", ";", ",", "||", "|", ":", "?", "|"
28-
};
28+
];
29+
30+
private static string[] FilesToSkip { get; } =
31+
[
32+
"resx", ".g.", ".designer.", "\\obj\\", "/obj/", "\\bin\\", "/bin/", "assemblyinfo.cs", "/.", "\\."
33+
];
2934

3035
public Ingester(IngestOptions options)
3136
{
@@ -70,7 +75,7 @@ public Ingester(IngestOptions options)
7075
using var tempOutputFile = new TempFile();
7176
outputFile ??= tempOutputFile;
7277

73-
using (var outputStream = (outputFile).Open(FileMode.Create))
78+
using (var outputStream = outputFile.Open(FileMode.Create))
7479
using (var writer = new StreamWriter(outputStream, Encoding.UTF8))
7580
{
7681
writer.NewLine = "\n";
@@ -86,64 +91,34 @@ public Ingester(IngestOptions options)
8691
break; // Caller requested cancellation.
8792
progress.Progress = (int)(100.0 * (i + 1.0) / sourceFiles.Length);
8893
}
89-
90-
using var reader = new StreamReader(sourceFile.FullName, Encoding.UTF8);
9194

92-
writer.WriteLine($"// File: {(m_options.UseFullPaths ? sourceFile.FullName : sourceFile.Name)}");
93-
94-
var lineNumber = 1;
95-
string line;
96-
while ((line = reader.ReadLine()) != null)
97-
{
98-
if (ShouldIncludeSourceLine(line, m_options))
99-
writer.WriteLine($"{lineNumber.ToString()}|{GetCodeLine(line).Trim()}");
95+
if (m_options.Verbose)
96+
Logger.Instance.Info($"Processing: {sourceFile.FullName}");
10097

101-
lineNumber++;
102-
}
98+
using var reader = new StreamReader(sourceFile.FullName, Encoding.UTF8);
99+
writer.WriteLine($"// File: {(m_options.UseFullPaths ? sourceFile.FullName : sourceFile.Name)}");
103100

104-
if (m_options.Verbose)
105-
Logger.Instance.Warn($"{sourceFile.FullName} processed ({lineNumber - 1:N0} lines)");
101+
var iterator = new CodeLineIterator(reader, m_options.StripComments, m_options.ExcludeImports);
102+
iterator
103+
.GetLines()
104+
.ForEach((line, lineIndex) =>
105+
{
106+
var s = GetCodeLine(line);
107+
if (!string.IsNullOrWhiteSpace(s))
108+
writer.WriteLine($"{lineIndex + 1}|{s}");
109+
});
106110
}
107111
}
108112

109113
return (sourceFiles.Length, outputFile.Length);
110114
}
111115

112116
private static bool ShouldSkipFile(FileInfo f) =>
113-
new[]
114-
{
115-
"resx", ".g.", ".designer.", "\\obj\\", "/obj/", "\\bin\\", "/bin/", "assemblyinfo.cs", "/.", "\\."
116-
}.Any(o => f.FullName.Contains(o, StringComparison.OrdinalIgnoreCase));
117-
118-
private static bool ShouldIncludeSourceLine(string s, IngestOptions options)
119-
{
120-
if (string.IsNullOrWhiteSpace(s))
121-
return false;
122-
var trimmed = s.Trim();
123-
124-
if (options.ExcludeImports)
125-
{
126-
if (trimmed.StartsWith("using") || trimmed.StartsWith("#include") || trimmed.StartsWith("#pragma") || trimmed.StartsWith("namespace") || trimmed.StartsWith("import") || trimmed.StartsWith("from "))
127-
return false;
128-
}
129-
130-
if (options.StripComments)
131-
{
132-
if (trimmed.StartsWith("//") || trimmed.StartsWith("# "))
133-
return false;
134-
if (trimmed.StartsWith("/*") && trimmed.EndsWith("*/"))
135-
return false;
136-
}
137-
138-
return true;
139-
}
117+
FilesToSkip.Any(o => f.FullName.Contains(o, StringComparison.OrdinalIgnoreCase));
140118

141119
private static string GetCodeLine(string line)
142120
{
143-
var commentIndex = line.IndexOf("//", StringComparison.Ordinal);
144-
if (commentIndex >= 0)
145-
line = line[..commentIndex];
146-
121+
// De-tab.
147122
if (line.Contains('\t'))
148123
line = line.Replace('\t', ' ');
149124
if (!line.Contains(' '))

DTC.Core

0 commit comments

Comments
 (0)