-
Notifications
You must be signed in to change notification settings - Fork 14
Expand file tree
/
Copy pathctangle.w
More file actions
1558 lines (1373 loc) · 54.4 KB
/
ctangle.w
File metadata and controls
1558 lines (1373 loc) · 54.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
% This file is part of CWEB.
% This program by Silvio Levy and Donald E. Knuth
% is based on a program by Knuth.
% It is distributed WITHOUT ANY WARRANTY, express or implied.
% Version 4.12.2 --- July 2025
% Copyright (C) 1987,1990,1993,2000 Silvio Levy and Donald E. Knuth
% Permission is granted to make and distribute verbatim copies of this
% document provided that the copyright notice and this permission notice
% are preserved on all copies.
% Permission is granted to copy and distribute modified versions of this
% document under the conditions for verbatim copying, provided that the
% entire resulting derived work is given a different name and distributed
% under the terms of a permission notice identical to this one.
% Amendments to 'ctangle.w' resulting in this updated version were created
% by numerous collaborators over the course of many years.
% Please send comments, suggestions, etc. to tex-k@@tug.org.
% Here is TeX material that gets inserted after \input cwebmac
\def\hang{\hangindent 3em\indent\ignorespaces}
\def\pb{$\.|\ldots\.|$} % C brackets (|...|)
\def\v{\char'174} % vertical (|) in typewriter font
\mathchardef\RA="3221 % right arrow
\mathchardef\BA="3224 % double arrow
\def\title{CTANGLE (Version 4.12.2)}
\def\topofcontents{\null\vfill
\centerline{\titlefont The {\ttitlefont CTANGLE} processor}
\vskip 15pt
\centerline{(Version 4.12.2)}
\vfill}
\def\botofcontents{\vfill
\noindent
Copyright \copyright\ 1987, 1990, 1993, 2000 Silvio Levy and Donald E. Knuth
\bigskip\noindent
Permission is granted to make and distribute verbatim copies of this
document provided that the copyright notice and this permission notice
are preserved on all copies.
\smallskip\noindent
Permission is granted to copy and distribute modified versions of this
document under the conditions for verbatim copying, provided that the
entire resulting derived work is given a different name and distributed
under the terms of a permission notice identical to this one.
}
\pageno=\contentspagenumber \advance\pageno by 1
\let\maybe=\iftrue
@** Introduction.
This is the \.{CTANGLE} program by Silvio Levy and Donald E. Knuth,
based on \.{TANGLE} by Knuth.
We are thankful to
Nelson Beebe, Hans-Hermann Bode (to whom the \CPLUSPLUS/ adaptation is due),
Klaus Guntermann, Norman Ramsey, Tomas Rokicki, Joachim Schnitter,
Joachim Schrod, Lee Wittenberg, and others who have contributed improvements.
The ``banner line'' defined here should be changed whenever \.{CTANGLE}
is modified.
@d banner "This is CTANGLE (Version 4.12.2)"
@c
@<Include files@>@/
@h
@<Common code for \.{CWEAVE} and \.{CTANGLE}@>@/
@<Typedef declarations@>@/
@<Private variables@>@/
@<Predeclaration of procedures@>
@ \.{CTANGLE} has a fairly straightforward outline. It operates in
two phases: First it reads the source file, saving the \CEE/ code in
compressed form; then it shuffles and outputs the code.
Please read the documentation for \.{COMMON}, the set of routines common
to \.{CTANGLE} and \.{CWEAVE}, before proceeding further.
@c
int main (
int ac,
char **av)
{
argc=ac; argv=av;
program=ctangle;
@<Set initial values@>@;
common_init();
if (show_banner) puts(banner); /* print a ``banner line'' */
phase_one(); /* read all the user's text and compress it into |tok_mem| */
phase_two(); /* output the contents of the compressed tables */
return wrap_up(); /* and exit gracefully */
}
@ The next few sections contain stuff from the file |"common.w"| that must
be included in both |"ctangle.w"| and |"cweave.w"|. It appears in
file |"common.h"|, which is also included in |"common.w"| to propagate
possible changes from this \.{COMMON} interface consistently.
@i common.h
@* Data structures exclusive to {\tt CTANGLE}.
We've already seen that the |byte_mem| array holds the names of identifiers,
strings, and sections;
the |tok_mem| array holds the replacement texts
for sections. Allocation is sequential, since things are deleted only
during Phase II, and only in a last-in-first-out manner.
A \&{text} variable is a structure containing a pointer into
|tok_mem|, which tells where the corresponding text starts, and an
integer |text_link|, which, as we shall see later, is used to connect
pieces of text that have the same name. All the \&{text}s are stored in
the array |text_info|, and we use a \&{text\_pointer} variable to refer
to them.
The first position of |tok_mem| that is unoccupied by
replacement text is called |tok_ptr|, and the first unused location of
|text_info| is called |text_ptr|. Thus we usually have the identity
|text_ptr->tok_start==tok_ptr|.
@<Typed...@>=
typedef struct {
eight_bits *tok_start; /* pointer into |tok_mem| */
sixteen_bits text_link; /* relates replacement texts */
} text;
typedef text *text_pointer;
@ @d max_texts 4000 /* number of replacement texts, must be less than 10240 */
@d max_toks 270000 /* number of bytes in compressed \CEE/ code */
@<Private...@>=
static text text_info[max_texts];
static text_pointer text_info_end=text_info+max_texts-1;
static text_pointer text_ptr; /* first unused position in |text_info| */
static eight_bits tok_mem[max_toks];
static eight_bits *tok_mem_end=tok_mem+max_toks-1;
static eight_bits *tok_ptr; /* first unused position in |tok_mem| */
@ @<Set init...@>=
text_info->tok_start=tok_ptr=tok_mem;
text_ptr=text_info+1; text_ptr->tok_start=tok_mem;
/* this makes replacement text 0 of length zero */
@ If |p| is a pointer to a section name, |p->equiv| is a pointer to its
replacement text, an element of the array |text_info|.
@d equiv equiv_or_xref /* info corresponding to names */
@ @<Set init...@>=
init_node(name_dir); /* the undefined section has no replacement text */
@ Here's the procedure that decides whether a name of length |l|
starting at position |first| equals the identifier pointed to by |p|:
@c
bool names_match(
name_pointer p, /* points to the proposed match */
const char *first, /* position of first character of string */
size_t l, /* length of identifier */
eight_bits t) /* not used by \.{CTANGLE} */
{@+(void)t;@/
return length(p)==l && strncmp(first,p->byte_start,l)==0;
}
@ The common lookup routine |id_lookup| refers to a separate routine
|init_node| when the data structure grows.
@c
void
init_node(
name_pointer node)
{
node->equiv=(void *)text_info;
}
@* Tokens.
Replacement texts, which represent \CEE/ code in a compressed format,
appear in |tok_mem| as mentioned above. The codes in
these texts are called `tokens'; some tokens occupy two consecutive
eight-bit byte positions, and the others take just one byte.
If $p$ points to a replacement text, |p->tok_start| is the |tok_mem| position
of the first eight-bit code of that text. If |p->text_link==0|,
this is the replacement text for a macro, otherwise it is the replacement
text for a section. In the latter case |p->text_link| is either equal to
|section_flag|, which means that there is no further text for this section, or
|p->text_link| points to a continuation of this replacement text; such
links are created when several sections have \CEE/ texts with the same
name, and they also tie together all the \CEE/ texts of unnamed sections.
The replacement text pointer for the first unnamed section appears in
|text_info->text_link|, and the most recent such pointer is |last_unnamed|.
@d macro 0
@d section_flag max_texts /* final |text_link| in section replacement texts */
@<Private...@>=
static text_pointer last_unnamed; /* most recent replacement text of unnamed section */
@ @<Set init...@>= last_unnamed=text_info; text_info->text_link=macro;
@ If the first byte of a token is less than |0200|, the token occupies a
single byte. Otherwise we make a sixteen-bit token by combining two consecutive
bytes |a| and |b|. If |0200<=a<0250|, then |(a-0200)@t${}\times2^8$@>+b|
points to an identifier; if |0250<=a<0320|, then
|(a-0250)@t${}\times2^8$@>+b| points to a section name
(or, if it has the special value |output_defs_flag|,
to the area where the preprocessor definitions are stored); and if
|0320<=a<0400|, then |(a-0320)@t${}\times2^8$@>+b| is the number of the section
in which the current replacement text appears.
Codes less than |0200| are 7-bit |char| codes that represent themselves.
Some of the 7-bit codes will not be present, however, so we can
use them for special purposes. The following symbolic names are used:
\yskip \hang |string| denotes the beginning or end of a string
or a verbatim construction.
\hang |constant| denotes a numerical constant.
\hang |join| denotes the concatenation of adjacent items with no space
or line breaks allowed between them (the \.{@@\&} operation of \.{CWEB}).
@^ASCII code dependencies@>
@d string 02 /* takes the place of ASCII \.{STX} */
@d constant 03 /* takes the place of ASCII \.{ETX} */
@d join 0177 /* takes the place of ASCII \.{DEL} */
@d output_defs_flag (2*024000-1) /* |024000==(0250-0200)*0400| */
@ The following procedure is used to enter a two-byte value into
|tok_mem| when a replacement text is being generated.
@c
static void
store_two_bytes(
sixteen_bits x)
{
if (tok_ptr+2>tok_mem_end) overflow("token");
*tok_ptr++=x>>8; /* store high byte */
*tok_ptr++=x&0377; /* store low byte */
}
@ @<Predecl...@>=@+static void store_two_bytes(sixteen_bits);
@** Stacks for output. The output process uses a stack to keep track
of what is going on at different ``levels'' as the sections are being
written out. Entries on this stack have four parts:
\yskip\hang |byte_field| is the |tok_mem| location from which the next
token on a particular level will be read;
\hang |name_field| points to the name corresponding to a particular level;
\hang |repl_field| points to the replacement text currently being read
at a particular level;
\hang |section_field| is the section number, or zero if this is a macro.
\yskip\noindent The current values of these four quantities are referred to
quite frequently, so they are stored in an extra slot at the very end of the
|stack| array. We call the current values |cur_byte|, |cur_name|, |cur_repl|,
and |cur_section|.
The global variable |stack_ptr| tells how many levels of output are
currently in progress. The end of all output occurs when the stack is
empty, i.e., when |stack_ptr==stack|.
@<Typed...@>=
typedef struct {
eight_bits *byte_field; /* present location within replacement text */
name_pointer name_field; /* |byte_start| index for text being output */
text_pointer repl_field; /* |tok_start| index for text being output */
sixteen_bits section_field; /* section number or zero if not a section */
} output_state;
typedef output_state *stack_pointer;
@ @d stack_size 50 /* number of simultaneous levels of macro expansion */
@d cur_state stack[stack_size+1] /* |cur_byte|, |cur_name|, |cur_repl|,
and |cur_section| */
@d cur_byte cur_state.byte_field /* location of next output byte in |tok_mem|*/
@d cur_name cur_state.name_field /* pointer to current name being expanded */
@d cur_repl cur_state.repl_field /* pointer to current replacement text */
@d cur_section cur_state.section_field /* current section number being expanded */
@d cur_end (cur_repl+1)->tok_start /* current ending location in |tok_mem| */
@<Private...@>=
static output_state stack[stack_size+2]; /* info for non-current levels */
static stack_pointer stack_end=stack+stack_size; /* end of |stack| */
static stack_pointer stack_ptr; /* first unused location in the output state stack */
@ To get the output process started, we will perform the following
initialization steps. We may assume that |text_info->text_link| is nonzero,
since it points to the \CEE/ text in the first unnamed section that generates
code; if there are no such sections, there is nothing to output, and an
error message will have been generated before we do any of the initialization.
@<Initialize the output stacks@>=
stack_ptr=stack+1; cur_name=name_dir;
cur_repl=text_info->text_link+text_info;
cur_byte=cur_repl->tok_start; cur_section=0;
@ Similar settings are used for secondary output files.
@<Initialize the secondary output@>=
stack_ptr=stack+1; cur_name=*an_output_file;
cur_repl=(text_pointer)cur_name->equiv;
cur_byte=cur_repl->tok_start;
@ When the replacement text for name |p| is to be inserted into the output,
the following subroutine is called to save the old level of output and get
the new one going.
We assume that the \CEE/ compiler can copy structures.
@^system dependencies@>
@c
static void
push_level( /* suspends the current level */
name_pointer p)
{
if (stack_ptr==stack_end) overflow("stack");
*stack_ptr=cur_state;
stack_ptr++;
if (p!=NULL) { /* |p==NULL| means we are in |output_defs| */
cur_name=p; cur_repl=(text_pointer)p->equiv;
cur_byte=cur_repl->tok_start; cur_section=0;
}
}
@ When we come to the end of a replacement text, the |pop_level| subroutine
does the right thing: It either moves to the continuation of this replacement
text or returns the state to the most recently stacked level.
@c
static void
pop_level( /* do this when |cur_byte| reaches |cur_end| */
bool flag) /* |flag==false| means we are in |output_defs| */
{
if (flag && cur_repl->text_link<section_flag) { /* link to a continuation */
cur_repl=cur_repl->text_link+text_info; /* stay on the same level */
cur_byte=cur_repl->tok_start; return;
}
stack_ptr--; /* go down to the previous level */
if (stack_ptr>stack) cur_state=*stack_ptr;@^system dependencies@>
}
@ @<Predecl...@>=
static void push_level(name_pointer);@/
static void pop_level(bool);@/
static void get_output(void);
@ The heart of the output procedure is the function |get_output|,
which produces the next token of output and sends it on to the lower-level
function |out_char|. The main purpose of |get_output| is to handle the
necessary stacking and unstacking. It sends the value |section_number|
if the next output begins or ends the replacement text of some section,
in which case |cur_val| is that section's number (if beginning) or the
negative of that value (if ending). (A section number of 0 indicates
not the beginning or ending of a section, but a \#\&{line} command.)
And it sends the value |identifier|
if the next output is an identifier, in which case
|cur_val| points to that identifier name.
@d section_number 0201 /* code returned by |get_output| for section numbers */
@d identifier 0202 /* code returned by |get_output| for identifiers */
@<Private...@>=
static int cur_val; /* additional information corresponding to output token */
@ If |get_output| finds that no more output remains, it returns with
|stack_ptr==stack|.
@^high-bit character handling@>
@c
static void
get_output(void) /* sends next token to |out_char| */
{
sixteen_bits a; /* value of current byte */
restart: if (stack_ptr==stack) return;
if (cur_byte==cur_end) {
cur_val=-((int)cur_section); /* cast needed because of sign extension */
pop_level(true);
if (cur_val==0) goto restart;
out_char(section_number); return;
}
a=*cur_byte++;
if (out_state==verbatim && a!=string && a!=constant && a!='\n')
C_putc(a); /* a high-bit character can occur in a string */
else if (a<0200) out_char(a); /* one-byte token */
else {
a=(a-0200)*0400+*cur_byte++;
switch (a/024000) { /* |024000==(0250-0200)*0400| */
case 0: cur_val=(int)a; out_char(identifier); break;
case 1: if (a==output_defs_flag) output_defs();
else @<Expand section |a-024000|, |goto restart|@>@;
break;
default: cur_val=(int)a-050000;
if (cur_val>0) cur_section=(sixteen_bits)cur_val;
out_char(section_number);
}
}
}
@ The user may have forgotten to give any \CEE/ text for a section name,
or the \CEE/ text may have been associated with a different name by mistake.
@<Expand section |a-...@>=
{
a-=024000;
if ((a+name_dir)->equiv!=(void *)text_info) push_level(a+name_dir);
else if (a!=0) {
printf("%s","\n! Not present: <");
print_section_name(a+name_dir); err_print(">");
@.Not present: <section name>@>
}
goto restart;
}
@* Producing the output.
The |get_output| routine above handles most of the complexity of output
generation, but there are two further considerations that have a nontrivial
effect on \.{CTANGLE}'s algorithms.
@ First,
we want to make sure that the output has spaces and line breaks in
the right places (e.g., not in the middle of a string or a constant or an
identifier, not at a `\.{@@\&}' position
where quantities are being joined together, and certainly after an \.=
because the \CEE/ compiler thinks \.{=-} is ambiguous).
The output process can be in one of following states:
\yskip\hang |num_or_id| means that the last item in the buffer is a number or
identifier, hence a blank space or line break must be inserted if the next
item is also a number or identifier.
\yskip\hang |unbreakable| means that the last item in the buffer was followed
by the \.{@@\&} operation that inhibits spaces between it and the next item.
\yskip\hang |verbatim| means we're copying only character tokens, and
that they are to be output exactly as stored. This is the case during
strings, verbatim constructions and numerical constants.
\yskip\hang |post_slash| means we've just output a slash.
\yskip\hang |normal| means none of the above.
\yskip\noindent Furthermore, if the variable |protect| is |true|, newlines
are preceded by a `\.\\'.
@d normal 0 /* non-unusual state */
@d num_or_id 1 /* state associated with numbers and identifiers */
@d post_slash 2 /* state following a \./ */
@d unbreakable 3 /* state associated with \.{@@\&} */
@d verbatim 4 /* state in the middle of a string */
@<Private...@>=
static eight_bits out_state; /* current status of partial output */
static bool protect; /* should newline characters be quoted? */
@ Here is a routine that is invoked when we want to output the current line.
During the output process, |cur_line| equals the number of the next line
to be output.
@c
static void
flush_buffer(void) /* writes one line to output file */
{
C_putc('\n');
if (cur_line % 100 == 0 && show_progress) {
putchar('.');
if (cur_line % 500 == 0) printf("%d",cur_line);
update_terminal(); /* progress report */
}
cur_line++;
}
@ @<Predecl...@>=@+static void flush_buffer(void);
@ Second, we have modified the original \.{TANGLE} so that it will write output
on multiple files.
If a section name is introduced in at least one place by \.{@@(}
instead of \.{@@<}, we treat it as the name of a file.
All these special sections are saved on a stack, |output_files|.
We write them out after we've done the unnamed section.
@d max_files 256
@<Private...@>=
static name_pointer output_files[max_files];
static name_pointer *cur_out_file, *end_output_files, *an_output_file;
static char cur_section_name_char; /* is it |'<'| or |'('| */
static char output_file_name[longest_name+1]; /* name of the file */
@ We make |end_output_files| point just beyond the end of
|output_files|. The stack pointer
|cur_out_file| starts out there. Every time we see a new file, we
decrement |cur_out_file| and then write it in.
@<Set initial...@>=
cur_out_file=end_output_files=output_files+max_files;
@ @<If it's not there, add |cur_section_name| to the output file stack, or
complain we're out of room@>=
{
for (an_output_file=cur_out_file;
an_output_file<end_output_files; an_output_file++)
if (*an_output_file==cur_section_name) break;
if (an_output_file==end_output_files) {
if (cur_out_file>output_files)
*--cur_out_file=cur_section_name;
else overflow("output files");
}
}
@* The big output switch. Here then is the routine that does the
output.
@c
static void
phase_two (void) {
phase=2;
web_file_open=false;
cur_line=1;
@<Initialize the output stacks@>@;
@<Output macro definitions if appropriate@>@;
if (text_info->text_link==macro && cur_out_file==end_output_files) {
printf("%s","\n! No program text was specified."); mark_harmless();
@.No program text...@>
}
else {
if (show_progress) {
printf(cur_out_file==end_output_files ? @|
"\nWriting the output file (%s):" : @|
"\nWriting the output files: (%s)",C_file_name);
@.Writing the output...@>
update_terminal();
}
if (text_info->text_link!=macro)
@<Output material...@>@;
@<Write all the named output files@>@;
if (show_happiness) {
if (show_progress) new_line();
printf("%s","Done.");
}
}
}
@ @<Predecl...@>=
static void phase_two(void);@/
static void output_defs(void);@/
static void out_char(eight_bits);
@ To write the named output files, we proceed as for the unnamed
section.
The only subtlety is that we have to open each one.
@<Write all the named output files@>=
for (an_output_file=end_output_files; an_output_file>cur_out_file;) {
an_output_file--;
sprint_section_name(output_file_name,*an_output_file);
fclose(C_file);
if ((C_file=fopen(output_file_name,"wb"))==NULL)
fatal("! Cannot open output file ",output_file_name);
@.Cannot open output file@>
if (show_progress) { printf("\n(%s)",output_file_name); update_terminal(); }
cur_line=1;
@<Initialize the secondary output@>@;
@<Output material...@>@;
}
@ @<Output material from |stack|@>=
{
while (stack_ptr>stack) get_output();
flush_buffer();
}
@ If a \.{@@h} was not encountered in the input,
we go through the list of replacement texts and copy the ones
that refer to macros, preceded by the \.{\#define} preprocessor command.
@<Output macro definitions if appropriate@>=
if (!output_defs_seen)
output_defs();
@ @<Private...@>=
static bool output_defs_seen=false;
@ @d C_printf(c,a) fprintf(C_file,c,a)
@d C_putc(c) fputc((int)(c),C_file) /* isn't \CEE/ wonderfully consistent? */
@c
static void
output_defs(void)
{
sixteen_bits a; eight_bits *macro_end;
push_level(NULL);
for (cur_text=text_info+1; cur_text<text_ptr; cur_text++)
if (cur_text->text_link==macro) { /* |cur_text| is the text for a |macro| */
cur_byte=cur_text->tok_start;
macro_end=(cur_text+1)->tok_start; /* end of |macro| replacement text */
C_printf("%s","#define ");
out_state=normal;
protect=true; /* newlines should be preceded by |'\\'| */
do macro_end--; while (isspace(*macro_end)&&plus_plus!=*macro_end);
/* discard trailing whitespace; |plus_plus=='\v'| */
while (cur_byte<=macro_end) {
a=*cur_byte++;
if (out_state==verbatim && a!=string && a!=constant && a!='\n')
C_putc(a); /* a high-bit character can occur in a string */
@^high-bit character handling@>
else if (a<0200) out_char(a); /* one-byte token */
else {
a=(a-0200)*0400+*cur_byte++;
if (a<024000) { /* |024000==(0250-0200)*0400| */
cur_val=(int)a; out_char(identifier);
}
else if (a<050000) confusion("macro defs have strange char");
else {
cur_val=(int)a-050000; cur_section=(sixteen_bits)cur_val;
out_char(section_number);
}
/* no other cases */
}
}
protect=false;
flush_buffer();
}
pop_level(false);
}
@ A many-way switch is used to send the output. Note that this function
is not called if |out_state==verbatim|, except perhaps with arguments
|'\n'| (protect the newline), |string| (end the string), or |constant|
(end the constant).
@c
static void
out_char(
eight_bits cur_char)
{
char *j; /* pointer into |byte_mem| */
restart:
switch (cur_char) {
case '\n': if (protect && out_state!=verbatim) C_putc(' ');
if (protect || out_state==verbatim) C_putc('\\');
flush_buffer(); if (out_state!=verbatim) out_state=normal; break;
@/@t\4@>@<Case of an identifier@>@;
@/@t\4@>@<Case of a section number@>@;
@/@t\4@>@<Cases like \.{!=}@>@;
case '=': case '>': C_putc(cur_char);@+C_putc(' ');
out_state=normal; break;
case join: out_state=unbreakable; break;
case constant: if (out_state==verbatim) {
out_state=num_or_id; break;
}
if(out_state==num_or_id) C_putc(' '); out_state=verbatim; break;
case string: if (out_state==verbatim) out_state=normal;
else out_state=verbatim; break;
case '/': C_putc('/'); out_state=post_slash; break;
case '*': if (out_state==post_slash) C_putc(' ');
@=/* fall through */@>@;
default: C_putc(cur_char); out_state=normal; break;
}
}
@ @<Cases like \.{!=}@>=@t\1\quad@>
case plus_plus: C_putc('+');@+C_putc('+'); out_state=normal; break;
case minus_minus: C_putc('-');@+C_putc('-'); out_state=normal; break;
case minus_gt: C_putc('-');@+C_putc('>'); out_state=normal; break;
case gt_gt: C_putc('>');@+C_putc('>'); out_state=normal; break;
case eq_eq: C_putc('=');@+C_putc('='); out_state=normal; break;
case lt_lt: C_putc('<');@+C_putc('<'); out_state=normal; break;
case gt_eq: C_putc('>');@+C_putc('='); out_state=normal; break;
case lt_eq: C_putc('<');@+C_putc('='); out_state=normal; break;
case non_eq: C_putc('!');@+C_putc('='); out_state=normal; break;
case and_and: C_putc('&');@+C_putc('&'); out_state=normal; break;
case or_or: C_putc('|');@+C_putc('|'); out_state=normal; break;
case dot_dot_dot: C_putc('.');@+C_putc('.');@+C_putc('.'); out_state=normal;
break;
case colon_colon: C_putc(':');@+C_putc(':'); out_state=normal; break;
case period_ast: C_putc('.');@+C_putc('*'); out_state=normal; break;
case minus_gt_ast: C_putc('-');@+C_putc('>');@+C_putc('*'); out_state=normal;
break;
@ When an identifier is output to the \CEE/ file, characters in the range
128--255 (|0200|--|0377|) must be changed into something else, so the \CEE/
compiler won't complain. By default, \.{CTANGLE} converts the
character with code $16 x+y$ to the three characters `\.X$xy$', but
a different transliteration table can be specified. Thus a German
might want {\it gr\"un\/} to appear as a still readable \.{gruen}.
This makes debugging a lot less confusing.
@d translit_length 10
@<Private...@>=
static char translit[0200][translit_length];
@ @<Set init...@>=
{ int i;
for (i=0;i<0200;i++)
snprintf(translit[i],translit_length,"X%02X",(unsigned int)(0200+i));
}
@ @<Case of an identifier@>=@t\1\quad@>
case identifier:
if (out_state==num_or_id) C_putc(' ');
for (j=(cur_val+name_dir)->byte_start;
j<(cur_val+name_dir+1)->byte_start; j++)
if (ishigh(*j)) C_printf("%s",translit[(eight_bits)(*j)-0200]);
@^high-bit character handling@>
else C_putc(*j);
out_state=num_or_id; break;
@ @<Case of a sec...@>=@t\1\quad@>
case section_number:
if (cur_val>0) C_printf("/*%d:*/",cur_val);
else if(cur_val<0) C_printf("/*:%d*/",-cur_val);
else if (protect) {
cur_byte +=4; /* skip line number and file name */
cur_char = (eight_bits)'\n';
goto restart;
} else {
sixteen_bits a;
a=*cur_byte++ *0400;
a+=*cur_byte++; /* gets the line number */
C_printf("\n#line %d \"",(int)a);
@:line}{\.{\#line}@>
cur_val=(int)(*cur_byte++-0200)*0400;
cur_val+=*cur_byte++; /* points to the file name */
for (j=(cur_val+name_dir)->byte_start;
j<(cur_val+name_dir+1)->byte_start; j++) {
if (*j=='\\' || *j=='"') C_putc('\\');
C_putc(*j);
}
C_putc('"');@+C_putc('\n');
}
break;
@** Introduction to the input phase.
We have now seen that \.{CTANGLE} will be able to output the full
\CEE/ program, if we can only get that program into the byte memory in
the proper format. The input process is something like the output process
in reverse, since we compress the text as we read it in and we expand it
as we write it out.
There are three main input routines. The most interesting is the one that gets
the next token of a \CEE/ text; the other two are used to scan rapidly past
\TEX/ text in the \.{CWEB} source code. One of the latter routines will jump to
the next token that starts with `\.{@@}', and the other skips to the end
of a \CEE/ comment.
@ Control codes in \.{CWEB} begin with `\.{@@}', and the next character
identifies the code. Some of these are of interest only to \.{CWEAVE},
so \.{CTANGLE} ignores them; the others are converted by \.{CTANGLE} into
internal code numbers by the |ccode| table below. The ordering
of these internal code numbers has been chosen to simplify the program logic;
larger numbers are given to the control codes that denote more significant
milestones.
@d ignore 00 /* control code of no interest to \.{CTANGLE} */
@d ord 0302 /* control code for `\.{@@'}' */
@d control_text 0303 /* control code for `\.{@@t}', `\.{@@\^}', etc. */
@d translit_code 0304 /* control code for `\.{@@l}' */
@d output_defs_code 0305 /* control code for `\.{@@h}' */
@d format_code 0306 /* control code for `\.{@@f}' */
@d definition 0307 /* control code for `\.{@@d}' */
@d begin_C 0310 /* control code for `\.{@@c}' */
@d section_name 0311 /* control code for `\.{@@<}' */
@d new_section 0312 /* control code for `\.{@@\ }' and `\.{@@*}' */
@<Private...@>=
static eight_bits ccode[256]={ignore}; /* meaning of a char following \.{@@} */
@ @<Set ini...@>=
ccode[' ']=ccode['\t']=ccode['\n']=ccode['\v']=ccode['\r']=ccode['\f']
=ccode['*']=new_section;
ccode['@@']=(eight_bits)'@@'; ccode['=']=string;
ccode['d']=ccode['D']=definition;
ccode['f']=ccode['F']=ccode['s']=ccode['S']=format_code;
ccode['c']=ccode['C']=ccode['p']=ccode['P']=begin_C;
ccode['^']=ccode[':']=ccode['.']=ccode['t']=ccode['T']=
ccode['q']=ccode['Q']=control_text;
ccode['h']=ccode['H']=output_defs_code;
ccode['l']=ccode['L']=translit_code;
ccode['&']=join;
ccode['<']=ccode['(']=section_name;
ccode['\'']=ord;
@ The |skip_ahead| procedure reads through the input at fairly high speed
until finding the next non-ignorable control code, which it returns.
@c
static eight_bits
skip_ahead(void) /* skip to next control code */
{
eight_bits c; /* control code found */
while (true) {
if (loc>limit && (get_line()==false)) return new_section;
*(limit+1)='@@';
while (*loc!='@@') loc++;
if (loc<=limit) {
loc++; c=ccode[(eight_bits)*loc]; loc++;
if (c!=ignore || *(loc-1)=='>') return c;
}
}
}
@ @<Predecl...@>=
static eight_bits skip_ahead(void);@/
static bool skip_comment(bool);
@ The |skip_comment| procedure reads through the input at somewhat high
speed in order to pass over comments, which \.{CTANGLE} does not transmit
to the output. If the comment is introduced by \.{/*}, |skip_comment|
proceeds until finding the end-comment token \.{*/} or a newline; in the
latter case |skip_comment| will be called again by |get_next|, since the
comment is not finished. This is done so that each newline in the
\CEE/ part of a section is copied to the output; otherwise the \#\&{line}
commands inserted into the \CEE/ file by the output routines become useless.
On the other hand, if the comment is introduced by \.{//} (i.e., if it
is a \CPLUSPLUS/ ``short comment''), it always is simply delimited by the next
newline. The boolean argument |is_long_comment| distinguishes between
the two types of comments.
If |skip_comment| comes to the end of the section, it prints an error message.
No comment, long or short, is allowed to contain `\.{@@\ }' or `\.{@@*}'.
@<Private...@>=
static bool comment_continues=false; /* are we scanning a comment? */
@ @c
static bool skip_comment( /* skips over comments */
bool is_long_comment)
{
char c; /* current character */
while (true) {
if (loc>limit) {
if (is_long_comment) {
if (get_line()) return comment_continues=true;
else {
err_print("! Input ended in mid-comment");
@.Input ended in mid-comment@>
return comment_continues=false;
}
}
else return comment_continues=false;
}
c=*(loc++);
if (is_long_comment && c=='*' && *loc=='/') {
loc++; return comment_continues=false;
}
if (c=='@@') {
if (ccode[(eight_bits)*loc]==new_section) {
err_print("! Section name ended in mid-comment"); loc--;
@.Section name ended in mid-comment@>
return comment_continues=false;
}
else loc++;
}
}
}
@* Inputting the next token.
@<Private...@>=
static name_pointer cur_section_name; /* name of section just scanned */
static bool no_where; /* suppress |print_where|? */
@ As one might expect, |get_next| consists mostly of a big switch
that branches to the various special cases that can arise.
@c
static eight_bits
get_next(void) /* produces the next input token */
{
static bool preprocessing=false;
eight_bits c; /* the current character */
while (true) {
if (loc>limit) {
if (preprocessing && *(limit-1)!='\\') preprocessing=false;
if (get_line()==false) return new_section;
else if (print_where && !no_where) {
sixteen_bits a;
print_where=false;
@<Insert the line number into |tok_mem|@>@;
}
else return (eight_bits)'\n';
}
c=(eight_bits)*loc;
if (comment_continues || (c=='/' && (*(loc+1)=='*' || *(loc+1)=='/'))) {
if (skip_comment(comment_continues||*(loc+1)=='*')) return '\n';
/* scan to end of comment or newline */
else continue;
}
loc++;
if (xisdigit(c) || c=='.') @<Get a constant@>@;
else if (c=='\'' || c=='"'@|
|| ((c=='L' || c=='u' || c=='U')&&(*loc=='\'' || *loc=='"'))@|
|| ((c=='u' && *loc=='8')&&(*(loc+1)=='\'' || *(loc+1)=='"')))
@<Get a string@>@;
else if (isalpha((int)c) || isxalpha(c) || ishigh(c))
@<Get an identifier@>@;
else if (c=='@@') @<Get control code and possible section name@>@;
else if (xisspace(c)) {
if (!preprocessing || loc>limit) continue;
/* we don't want a blank after a final backslash */
else return (eight_bits)' ';
/* ignore spaces and tabs, unless |preprocessing| */
}
else if (c=='#' && loc==buffer+1) preprocessing=true;
mistake: @<Compress two-symbol operator@>@;
return c;
}
}
@ @<Predecl...@>=@+static eight_bits get_next(void);
@ The following code assigns values to the combinations \.{++},
\.{--}, \.{->}, \.{>=}, \.{<=}, \.{==}, \.{<<}, \.{>>}, \.{!=}, %\.{||}
\.{\v\v} and~\.{\&\&}, and to the \CPLUSPLUS/
combinations \.{...}, \.{::}, \.{.*} and \.{->*}.
The compound assignment operators (e.g., \.{+=}) are
treated as separate tokens.
@<Compress tw...@>=
switch(c) {
case '+': if (*loc=='+') compress(plus_plus); break;
case '-': if (*loc=='-') {@+compress(minus_minus);@+}
else if (*loc=='>') {
if (*(loc+1)=='*') {loc++;@+compress(minus_gt_ast);}
else compress(minus_gt);
} break;
case '.': if (*loc=='*') {@+compress(period_ast);@+}
else if (*loc=='.' && *(loc+1)=='.') {
loc++;@+compress(dot_dot_dot);
} break;
case ':': if (*loc==':') compress(colon_colon); break;
case '=': if (*loc=='=') compress(eq_eq); break;
case '>': if (*loc=='=') {@+compress(gt_eq);@+}
else if (*loc=='>') compress(gt_gt); break;
case '<': if (*loc=='=') {@+compress(lt_eq);@+}
else if (*loc=='<') compress(lt_lt); break;
case '&': if (*loc=='&') compress(and_and); break;
case '|': if (*loc=='|') compress(or_or); break;
case '!': if (*loc=='=') compress(non_eq); break;
}
@ @<Get an identifier@>= {
id_first=--loc;
do
++loc;
while (isalpha((int)*loc) || isdigit((int)*loc) @|
|| isxalpha(*loc) || ishigh(*loc));
id_loc=loc; return identifier;
}
@ @<Get a constant@>= {
bool hex_flag = false; /* are we reading a hexadecimal literal? */
id_first=loc-1;
if (*id_first=='.' && !xisdigit(*loc)) goto mistake; /* not a constant */
if (*id_first=='0') {
if (*loc=='x' || *loc=='X') { /* hex constant */
hex_flag = true;
loc++; while (xisxdigit(*loc) || *loc=='\'') loc++;
}
else if (*loc=='b' || *loc=='B') { /* binary constant */
loc++; while (*loc=='0' || *loc=='1' || *loc=='\'') loc++; goto found;
}
}
while (xisdigit(*loc) || *loc=='\'') loc++;
if (*loc=='.') {
loc++;
while ((hex_flag && xisxdigit(*loc)) || xisdigit(*loc) || *loc=='\'') loc++;
}
if (*loc=='e' || *loc=='E') { /* float constant */
if (*++loc=='+' || *loc=='-') loc++;
while (xisdigit(*loc) || *loc=='\'') loc++;
}
else if (hex_flag && (*loc=='p' || *loc=='P')) { /* hex float constant */
if (*++loc=='+' || *loc=='-') loc++;
while (xisxdigit(*loc) || *loc=='\'') loc++;
}
found: while (*loc=='u' || *loc=='U' || *loc=='l' || *loc=='L'
|| *loc=='f' || *loc=='F') loc++;
id_loc=loc;
return constant;
}
@ \CEE/ strings and character constants, delimited by double and single
quotes, respectively, can contain newlines or instances of their own
delimiters if they are protected by a backslash. We follow this
convention, but do not allow the string to be longer than |longest_name|.
@<Get a string@>= {
char delim = (char)c; /* what started the string */
id_first = section_text+1;
id_loc = section_text; *++id_loc=delim;
if (delim=='L' || delim=='u' || delim=='U') { /* wide character constant */
if (delim=='u' && *loc=='8') *++id_loc=*loc++;
delim=*loc++; *++id_loc=delim;
}