|
2 | 2 | * Exercise 1-23. Write a program to remove all comments from a C program.
|
3 | 3 | * Don't forget to handle quoted strings and character constants properly. C
|
4 | 4 | * comments don't nest.
|
5 |
| - * Known issue: when more than full comment is present in a single line, only |
6 |
| - * the last one is removed. |
| 5 | + * |
7 | 6 | * By Faisal Saadatmand
|
8 | 7 | */
|
9 | 8 |
|
10 |
| -#include <stdio.h> |
11 |
| - |
12 |
| -#define MAXLINE 1000 |
13 |
| -#define YES 1 |
14 |
| -#define NO 0 |
15 |
| -#define SLASH_ASTERISK 1 |
16 |
| -#define ASTERISK_SLASH 0 |
17 |
| - |
18 |
| -/* functions */ |
19 |
| -int getLine(char [], int); |
20 |
| -int findComment(char [], int ); |
21 |
| -int delComment(char [], char [], int , int); |
22 |
| -int delBlankLine(char []); |
23 |
| - |
24 |
| -/* getLine function: read a line into s, return length */ |
25 |
| -int getLine(char s[], int lim) |
26 |
| -{ |
27 |
| - int c, i; |
28 |
| - |
29 |
| - for (i = 0; i < lim - 1 && (c = getchar()) != EOF && c != '\n'; ++i) |
30 |
| - s[i] = c; |
31 |
| - |
32 |
| - if (c == '\n') { |
33 |
| - s[i] = c; |
34 |
| - ++i; |
35 |
| - } |
36 |
| - |
37 |
| - s[i] = '\0'; |
38 |
| - |
39 |
| - return i; |
40 |
| -} |
41 |
| - |
42 |
| -/* findComment function: searches line[] for the first occurrence of the first |
43 |
| - * character of a C comment notation and returns the location on finding a single |
44 |
| - * line comment or -1 on failure */ |
45 |
| -int findComment(char line[], int notation) |
46 |
| -{ |
47 |
| - int i, j; |
48 |
| - int quoteStart; /* location of the start of the quotation mark */ |
49 |
| - int quoteEnd; /* location of the end of quotation mark */ |
50 |
| - int location; /* location of C comment notation */ |
51 |
| - int comment[2]; /* notation type: start or end */ |
52 |
| - int lookForQuote; /* flag variable */ |
53 |
| - |
54 |
| - location = quoteStart = quoteEnd = -1; |
55 |
| - /* set the appropriate notation */ |
56 |
| - if (notation == SLASH_ASTERISK) { |
57 |
| - comment[0] = '/'; |
58 |
| - comment[1] = '*'; |
59 |
| - } else if (notation == ASTERISK_SLASH) { |
60 |
| - comment[0] = '*'; |
61 |
| - comment[1] = '/'; |
62 |
| - } |
63 |
| - |
64 |
| - lookForQuote = YES; |
65 |
| - /* line[x - 1] check handles escape sequences. It is unnecessary for the |
66 |
| - * start of the quote but is added for the sake of correctness. */ |
67 |
| - for (i = 0; line[i] != '\0'; ++i) { |
68 |
| - if (line[i] == comment[0] && line[i + 1] == comment[1]) { |
69 |
| - if (notation == ASTERISK_SLASH) |
70 |
| - location = i + 1; /* end of comment including notation */ |
71 |
| - else |
72 |
| - location = i; /* start of comment including notation */ |
73 |
| - } |
74 |
| - if (line[i] == '\"' && line[i - 1] != '\\' && lookForQuote == YES) { |
75 |
| - quoteStart = i; |
76 |
| - for (j = i + 1; line[j] != '\0'; ++j) |
77 |
| - if (line[j] == '\"' && line[j - 1] != '\\') |
78 |
| - quoteEnd = j; |
79 |
| - lookForQuote = NO; |
80 |
| - } |
81 |
| - } |
82 |
| - |
83 |
| - /* check if notation is inside a double quotation marks */ |
84 |
| - if (location >= 0 && quoteStart >= 0) |
85 |
| - if (location > quoteStart && location < quoteEnd) |
86 |
| - location = -1; /* not a C comment */ |
87 |
| - |
88 |
| - /* check if notation is inside a multi-line double quotation marks */ |
89 |
| - if (location >= 0 && quoteStart >= 0 && quoteEnd < 0) |
90 |
| -// if (location < quoteStart) |
91 |
| - location = -1; /* not a C comment */ |
92 |
| - |
93 |
| - return location; |
94 |
| -} |
95 |
| - |
96 |
| -/* delComment function: deletes C comments from line string stores result in |
97 |
| - * modLine */ |
98 |
| -int delComment(char line[], char modLine[], int start, int end) |
99 |
| -{ |
100 |
| - int i, j; |
101 |
| - int status; |
102 |
| - |
103 |
| - i = j = 0; |
104 |
| - |
105 |
| - /* no notation - delete entire line */ |
106 |
| - if (start < 0 && end < 0) |
107 |
| - for (i = 0; line[i] != '\0'; ++i) |
108 |
| - modLine[i] = '\0'; |
109 |
| - /* start but no end - delete rest of line */ |
110 |
| - else if (start >= 0 && end < 0) |
111 |
| - for (i = 0; i < start; ++i) |
112 |
| - modLine[i] = line[i]; |
113 |
| - /* end but no start - move text after comment to the beginning of line */ |
114 |
| - else if (start < 0 && end >= 0) |
115 |
| - for (j = end + 1; line[j] != '\0'; ++j) { |
116 |
| - modLine[i] = line[j]; |
117 |
| - ++i; |
118 |
| - } |
119 |
| - /* full comment embedded - move text after comment to start location */ |
120 |
| - else if (start >= 0 && end >= 0) { |
121 |
| - for (i = 0; i < start; ++i) |
122 |
| - modLine[i] = line[i]; |
123 |
| - for (j = end + 1; line[j] != '\0'; ++j) { |
124 |
| - modLine[i] = line[j]; |
125 |
| - ++i; |
126 |
| - } |
127 |
| - } |
| 9 | +/* |
| 10 | + * NOTE: In keeping with the C standard the book follows, this program does not |
| 11 | + * delete single-line comments, i.e. lines beginning with //, because their |
| 12 | + * addition into the language came after ANSI C (C89). |
| 13 | + */ |
128 | 14 |
|
129 |
| - /* end of line formatting */ |
130 |
| - if (start < 0 && end < 0) |
131 |
| - modLine[0] = '\n'; |
132 |
| - else if (start >= 0 && end < 0) { |
133 |
| - modLine[i] = '\n'; |
134 |
| - modLine[i + 1] = '\0'; |
135 |
| - } else |
136 |
| - modLine[i] = '\0'; |
| 15 | +#include <stdio.h> |
137 | 16 |
|
138 |
| - /* status of the current deleted comment: single or multi-line */ |
139 |
| - status = 0; |
140 |
| - if ((start >= 0 && end < 0) || (start < 0 && end < 0)) |
141 |
| - status = 1; |
142 |
| - else if (start < 0 && end >= 0) |
143 |
| - status = 0; |
| 17 | +#define IN 1 |
| 18 | +#define OUT 0 |
144 | 19 |
|
145 |
| - return status; |
146 |
| -} |
147 |
| - |
148 |
| -/* delBlanklin function: deletes a line if it's blank. Returns 1 on success |
149 |
| - * and 0 on failure */ |
150 |
| -int delBlankLine(char s[]) |
| 20 | +/* isQuotationMark: return true if c is a valid beginning (or end) of a string |
| 21 | + * literal, otherwise return 0 */ |
| 22 | +int isQuotationMark(char prev, char c) |
151 | 23 | {
|
152 |
| - int i, notBlankLine; |
153 |
| - |
154 |
| - i = notBlankLine = 0; |
155 |
| - while (notBlankLine != 1 && s[i] != '\0') { |
156 |
| - if (s[i] != ' ' && s[i] != '\t' && s[i] != '\n') |
157 |
| - notBlankLine = 1; |
158 |
| - ++i; |
159 |
| - } |
160 |
| - |
161 |
| - if (notBlankLine != 1) |
162 |
| - for (i = 0; s[i] != '\0'; ++i) |
163 |
| - s[i] = '\0'; |
164 |
| - |
165 |
| - return notBlankLine; |
| 24 | + return prev != '\\' && prev != '\'' && c == '\"'; |
166 | 25 | }
|
167 | 26 |
|
168 | 27 | int main(void)
|
169 | 28 | {
|
170 |
| - int len; /* current line length */ |
171 |
| - int start; /* comment's beginning */ |
172 |
| - int end; /* comment's end */ |
173 |
| - char line[MAXLINE]; /* current input line */ |
174 |
| - char modLine[MAXLINE]; /* modified output line */ |
175 |
| - int status; /* multi-line comments flag */ |
176 |
| - |
177 |
| - status = 0; |
178 |
| - |
179 |
| - while ((len = getLine(line, MAXLINE)) > 0) { |
180 |
| - |
181 |
| - start = findComment(line, SLASH_ASTERISK); |
182 |
| - end = findComment(line, ASTERISK_SLASH); |
183 |
| - |
184 |
| - if (start < 0 && end < 0 && status == 0) /* no comment found */ |
185 |
| - printf("%s", line); |
186 |
| - else { |
187 |
| - status = delComment(line, modLine, start, end); |
188 |
| - delBlankLine(modLine); /* optional */ |
189 |
| - printf("%s", modLine); |
| 29 | + int prevC; /* previously read character from input */ |
| 30 | + int c; /* currently read character from input */ |
| 31 | + int comment, quote; /* flag variables */ |
| 32 | + |
| 33 | + comment = quote = OUT; |
| 34 | + prevC = getchar(); /* get the first character */ |
| 35 | + while ((c = getchar()) != EOF) { /* get the next character */ |
| 36 | + if (isQuotationMark(prevC, c)) { |
| 37 | + if (quote == IN) |
| 38 | + quote = OUT; /* the end of quote */ |
| 39 | + else if (comment == OUT) |
| 40 | + quote = IN; /* the beginning of a quote */ |
| 41 | + } |
| 42 | + if (quote == OUT && prevC == '/' && c == '*') |
| 43 | + comment = IN; /* the beginning of a comment */ |
| 44 | + if (comment == OUT) |
| 45 | + putchar(prevC); /* print previously read character */ |
| 46 | + if (comment == IN && prevC == '*' && c == '/') { |
| 47 | + c = getchar(); /* skip '/' character */ |
| 48 | + comment = OUT; /* the end of a comment */ |
190 | 49 | }
|
| 50 | + prevC = c; /* store c */ |
191 | 51 | }
|
| 52 | + putchar(prevC); /* print the last previously read character */ |
192 | 53 | return 0;
|
193 | 54 | }
|
0 commit comments