CompilationPrincipleHomework1/toNFA.cpp at main · zhuzil/CompilationPrincipleHomework1 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
#include "pch.h"
#include "transcode.h"
#include<fstream>
#include<sstream>

/*
获得新的状态节点，统一产生，便于管理，不能产生重复的状态
并添加到state_set[]数组中
*/
state thompson::new_StateNode()
{
	state newState;
	newState.StateName = STATE_NUM + 65;//转换成大写字母
	STATE_NUM++;
	return newState;
}


//处理 a|b
cell thompson::do_Unite(cell Left, cell Right)
{
	cell NewCell;
	NewCell.EdgeCount = 0;
	edge Edge1, Edge2, Edge3, Edge4;
	//获得新的新状态节点
	state StartState = new_StateNode();
	state EndState = new_StateNode();
	//构建边
	Edge1.StartState = StartState;
	Edge1.EndState = Left.StartState;
	Edge1.TransSymbol = '#';  //代表空串

	Edge2.StartState = StartState;
	Edge2.EndState = Right.StartState;
	Edge2.TransSymbol = '#';

	Edge3.StartState = Left.EdgeSet[Left.EdgeCount - 1].EndState;
	Edge3.EndState = EndState;
	Edge3.TransSymbol = '#';

	Edge4.StartState = Right.EdgeSet[Right.EdgeCount - 1].EndState;
	Edge4.EndState = EndState;
	Edge4.TransSymbol = '#';

	//构建单元
	//先将Left和Right的EdgeSet复制到NewCell
	cell_EdgeSet_Link(NewCell, Left);
	cell_EdgeSet_Link(NewCell, Right);

	//将新构建的四条边加入EdgeSet
	NewCell.EdgeSet[NewCell.EdgeCount++] = Edge1;
	NewCell.EdgeSet[NewCell.EdgeCount++] = Edge2;
	NewCell.EdgeSet[NewCell.EdgeCount++] = Edge3;
	NewCell.EdgeSet[NewCell.EdgeCount++] = Edge4;

	//构建NewCell的启示状态和结束状态
	NewCell.StartState = StartState;
	NewCell.EndState = EndState;

	return NewCell;
}


//处理 ab
cell thompson::do_Join(cell Left, cell Right)
{
	//将Left的结束状态和Right的开始状态合并，将Right的边复制给Left，将Left返回
	//将Right中所有以StartState开头的边全部修改
	for (int i = 0; i < Right.EdgeCount; i++)
	{
		if (Right.EdgeSet[i].StartState.StateName.compare(Right.StartState.StateName) == 0)
		{
			Right.EdgeSet[i].StartState = Left.EndState;
			//STATE_NUM--;//感觉有一点点问题呢
		}
		else if (Right.EdgeSet[i].EndState.StateName.compare(Right.StartState.StateName) == 0)
		{
			Right.EdgeSet[i].EndState = Left.EndState;
			//STATE_NUM--;
		}
	}
	Right.StartState = Left.EndState;

	cell_EdgeSet_Link(Left, Right);
	//将Left的结束状态更新为Right的结束状态
	Left.EndState = Right.EndState;
	return Left;
}


//处理 a*
cell thompson::do_Star(cell Cell)
{
	cell NewCell;
	NewCell.EdgeCount = 0;
	edge Edge1, Edge2, Edge3, Edge4;//增加了四条边
	//获得新的新状态节点
	state StartState = new_StateNode();
	state EndState = new_StateNode();
	//构建边
	Edge1.StartState = StartState;
	Edge1.EndState = EndState;
	Edge1.TransSymbol = '#';  //闭包取空串

	Edge2.StartState = Cell.EndState;
	Edge2.EndState = Cell.StartState;
	Edge2.TransSymbol = '#';  //取字符，自连接

	Edge3.StartState = StartState;
	Edge3.EndState = Cell.StartState;
	Edge3.TransSymbol = '#';

	Edge4.StartState = Cell.EndState;
	Edge4.EndState = EndState;
	Edge4.TransSymbol = '#';
	//构建单元
	//先将原来的Cell的EdgeSet添加到NewCell
	cell_EdgeSet_Link(NewCell, Cell);
	//将新构建的四条边加入EdgeSet
	NewCell.EdgeSet[NewCell.EdgeCount++] = Edge1;
	NewCell.EdgeSet[NewCell.EdgeCount++] = Edge2;
	NewCell.EdgeSet[NewCell.EdgeCount++] = Edge3;
	NewCell.EdgeSet[NewCell.EdgeCount++] = Edge4;
	//构建NewCell的启示状态和结束状态
	NewCell.StartState = StartState;
	NewCell.EndState = EndState;

	return NewCell;
}


//处理 a
cell thompson::do_Cell(char element)
{
	cell NewCell;
	NewCell.EdgeCount = 0;
	edge NewEdge;
	//获得新的新状态节点
	state StartState = new_StateNode();
	state EndState = new_StateNode();
	//构建边
	NewEdge.StartState = StartState;
	NewEdge.EndState = EndState;
	NewEdge.TransSymbol = element;
	//构建单元
	NewCell.EdgeSet[NewCell.EdgeCount] = NewEdge;
	NewCell.EdgeCount++;
	NewCell.StartState = NewCell.EdgeSet[0].StartState;
	NewCell.EndState = NewCell.EdgeSet[0].EndState;//EdgeCount此时为1
	return NewCell;
}


//将新增的cell添加到原来的cell中
void thompson::cell_EdgeSet_Link(cell& Destination, cell Source)
{
	int D_count = Destination.EdgeCount;
	int S_count = Source.EdgeCount;
	for (int i = 0; i < S_count; i++)
	{
		Destination.EdgeSet[D_count + i] = Source.EdgeSet[i];
	}
	Destination.EdgeCount = D_count + S_count;
}


//接收输入正规表达式，RegularExpression作为回传函数
void thompson::input(string& RegularExpression,string in)
{
	stringstream If(in);
	//cout << "请输入正则表达式：  （操作符：() * |;字符集：a~z A~Z）" << endl;
	do
	{
		If >> RegularExpression;
	} while (!check_Islegal(RegularExpression));

}


//检测输入的正则表达式是否合法
bool thompson::check_Islegal(string check_string)
{
	if (check_Ischaracter(check_string) && check_Ismatching(check_string))
	{
		return true;
	}
	return false;
}


/*
检查输入的字符是否合适 () * | a~z A~Z
合法返回true,非法返回false
*/
bool thompson::check_Ischaracter(string check_string)
{
	int length = check_string.size();
	for (int i = 0; i < length; i++)
	{
		char check = check_string.at(i);
		//原型： const char& at(size_t pos) const;
		//功能： 返回源字符串下标为n处的字符的引用。
		//说明： 不能被修改。
		if (is_letter(check))//小写和大写之间有5个字符，故不能连续判断
		{
			//cout<<"字母 合法";
		}
		else if (check == '(' || check == ')' || check == '*' || check == '|')
		{
			//cout<<"操作符 合法";
		}
		else
		{
			cout << "含有不合法的字符!" << endl;
			cout << "请重新输入:" << endl;
			return false;
		}
	}
	return true;
}


/**先检查括号是否匹配
*合法返回true,非法返回false
*/
bool thompson::check_Ismatching(string check_string)
{
	int length = check_string.size();
	//char * check = new char[length+1];
	//wcscpy(check, length+1, check_string.c_str());
	string check = check_string;
	stack<int> STACK;
	for (int i = 0; i < length; i++)
	{
		if (check[i] == '(')
			STACK.push(i);
		else if (check[i] == ')')
		{
			if (STACK.empty())
			{
				cerr << "有多余的右括号" << endl;//暂时不记录匹配位置location
				cout << "请重新输入:" << endl;
				return false;
			}
			else
				STACK.pop();
		}
	}
	if (!STACK.empty())
	{
		//暂时不记录匹配位置location
		cerr << "有多余的左括号" << endl;
		cout << "请重新输入:" << endl;
		return false;
	}

	return true;
}


/**检测是否是字母
是返回true,否则false
*/
bool thompson::is_letter(char check)
{
	if (check >= 'a' && check <= 'z' || check >= 'A' && check <= 'Z')
		return true;
	return false;
}


/**添加交操作符“+”，便于中缀转后缀表达式
例如 abb->a+b+b
*/
string thompson::add_plus_symbol(string add_string)
{
	int length = add_string.size();
	int return_string_length = 0;
	char* return_string = new char[2 * length + 2];//最多是两倍
	char first, second;
	if (add_string.size() == 1)return add_string;//只输入一个字符的情况
	for (int i = 0; i < length - 1; i++)
	{
		first = add_string.at(i);//读取字符串的第i个字符
		second = add_string.at(i + 1);
		return_string[return_string_length++] = first;
		//要加的可能性如ab 、 *b 、 a( 、 )b 等情况
		//若第二个是字母、第一个不是'('、'|'都要添加
		if (first != '(' && first != '|' && is_letter(second))
		{
			return_string[return_string_length++] = '+';
		}
		//若第二个是'(',第一个不是'|'、'(',也要加
		else if (second == '(' && first != '|' && first != '(')
		{
			return_string[return_string_length++] = '+';
		}
	}
	//将最后一个字符写入
	return_string[return_string_length++] = second;
	return_string[return_string_length] = '\0';
	string STRING(return_string);//
	return STRING;
}


/*
构造优先级表规则：（1）先括号内，再括号外；（2）优先级由高到低：闭包、|、+；（3）同级别，先左后右。
优先级表：
	 #	(	*	|	+	)
isp  0	1	7	5	3	8
icp	 0	8	6	4	2	1
*/

//in stack priority  栈内优先级，栈顶的字符的优先级
int thompson::isp(char c)
{
	switch (c)
	{
	case '#': return 0;
	case '(': return 1;
	case '*': return 7;
	case '|': return 5;
	case '+': return 3;
	case ')': return 8;
	}
	//若走到这一步，说明出错了
	cerr << "ERROR!" << endl;
	return false;
}


//in coming priority 栈外优先级，当前扫描到的字符的优先级
int thompson::icp(char c)
{
	switch (c)
	{
	case '#': return 0;
	case '(': return 8;
	case '*': return 6;
	case '|': return 4;
	case '+': return 2;
	case ')': return 1;
	}
	//若走到这一步，说明出错了
	cerr << "ERROR!" << endl;
	return false;
}


/*中缀表达式转后缀表达式*/
string thompson::turn_postfix(string e)//输入的是一个中缀表达式
{
	//设定e的最后一个符号式“#”，而其“#”一开始先放在栈s的栈底
	e = e + "#";

	stack<char> s;
	char ch = '#', ch1, op;
	s.push(ch);
	//读一个字符
	string Postfix_Expression_String = "";
	int read_location = 0;
	ch = e.at(read_location++);//逐个读取字符
	while (!s.empty())
	{
		if (is_letter(ch))
		{
			Postfix_Expression_String = Postfix_Expression_String + ch;
			//cout<<ch;
			ch = e.at(read_location++);
		}
		else
		{
			//cout<<"输出操作符："<<ch<<endl;
			ch1 = s.top();
			if (isp(ch1) < icp(ch))
			{
				s.push(ch);
				//cout<<"压栈"<<ch<<"  读取下一个"<<endl;
				ch = e.at(read_location++);
			}
			else if (isp(ch1) > icp(ch))
			{
				op = s.top();
				s.pop();
				//cout<<"退栈"<<op<<" 添加到输出字符串"<<endl;
				Postfix_Expression_String = Postfix_Expression_String + op;
				//cout<<op;
			}
			else  //考虑优先级相等的情况
			{
				op = s.top();
				s.pop();
				//cout<<"退栈"<<op<<"  但不添加到输入字符串"<<endl;

				if (op == '(')
					ch = e.at(read_location++);
			}
		}
	}
	return Postfix_Expression_String;
}


/*表达式转NFA处理函数,返回最终的NFA结合*/
cell thompson::Expression_to_NFA(string expression)
{
	int length = expression.size();
	char element;
	cell Cell, Left, Right;
	stack<cell> STACK;
	for (int i = 0; i < length; i++)
	{
		element = expression.at(i);
		switch (element)
		{
		case '|':
			Right = STACK.top();
			STACK.pop();
			Left = STACK.top();
			STACK.pop();
			Cell = do_Unite(Left, Right);
			STACK.push(Cell);
			break;
		case '*':
			Left = STACK.top();
			STACK.pop();
			Cell = do_Star(Left);
			STACK.push(Cell);
			break;
		case '+':
			Right = STACK.top();
			STACK.pop();
			Left = STACK.top();
			STACK.pop();
			Cell = do_Join(Left, Right);
			STACK.push(Cell);
			break;
		default:
			Cell = do_Cell(element);
			STACK.push(Cell);
		}
	}
	cout << "处理完毕!" << endl;
	Cell = STACK.top();
	STACK.pop();

	return Cell;
}


//显示NFA

void thompson::Display(cell Cell)
{
	ofstream of("toNFA.txt");
	of << "NFA 的边数：" << Cell.EdgeCount << endl;
	of << "NFA 的起始状态：" << Cell.StartState.StateName << endl;
	of << "NFA 的结束状态：" << Cell.EndState.StateName << endl;
	for (int i = 0; i < Cell.EdgeCount; i++)
	{
		of << "第" << i + 1 << "条边的起始状态：" << Cell.EdgeSet[i].StartState.StateName
			<< "  结束状态：" << Cell.EdgeSet[i].EndState.StateName
			<< "  转换符：" << Cell.EdgeSet[i].TransSymbol << endl;
	}
	of << "结束" << endl;
	of.close();
}