-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathTXT_HTML.vbs
More file actions
2059 lines (1722 loc) · 79.4 KB
/
TXT_HTML.vbs
File metadata and controls
2059 lines (1722 loc) · 79.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
Public TitlePrinted As Boolean
Public TCR_HTMLerror As Boolean
Public Const max_chapt_len = 40
Const div_end = "</DIV>"
Const nbs1 = " "
Const nbs4 = " "
Const div_center = "<DIV align=center>"
Const div_jus_st = "<DIV align=justify>"
Const brl = "<BR>"
Dim SpaceNum As String
Private InLine1 As String
Private InLine2 As String
'max line length to print centered
Public Const maxDigLen = 4 'for digits
Public Const maxSpecialLen = 40 'for special words (as Ãëàâà)
Public Const maxBetweenTwoEmptyLen = 90 'max line len between two empty line
Public Const maxStarLen = 20 'for ***
Public Const maxCapitalLen = 80 'for capital words
Public Const maxStartFromDig = 40 '80 'for chapters titles starting from digit
Const minAvLen = 200 'min averaged line length
Public Const MaxAuthorLen = 80 'max length of author name
Public Const MaxBookTitleLen = 80 ''max length of book title
Public Const maxTitleLenForLit = 150 'if the line between two empty and if converted from lit-file
Public Const MaxLenSubtitle = 80
Public Const MaxSubtitLineNum = 3
Public Const MaxLinesToAccumulateFirstEmpty = 16
Public Const MaxLinesToAccumulateFirstNoEmpty = 3
Public Const MinEpigraphLineNum = 2
Public Const MaxEpigraphLineNum = 6
Public Const MaxEpigAutLen = 40
Public Const EpigMaxLineLenToJoin = 40
Public Const MinVersesLineNum = 2
Public Const MinVersesLineNumNoEmpty = 3
Public Const MaxVersesLineNum = 16 'HAS TO BE = MaxLinesToAccumulateFirstEmpty
Public Const MaxLenVerseNoEmpty = 35
Public Const MaxLinesToAccumulateForVerses = 35 '16
Public MainTitleFound As Boolean
Public MainTitleFoundAsException As Boolean
Private AfterFindBookTitle As Boolean
Public Const MaxLenComaCapital = 40 'line1 ends by coma, line2 starts from capital
Public Function TXT_HTML(TxtFile As String, Optional FromRb As String, _
Optional NoHeader As Boolean = False, Optional SaveFileAs As String, Optional FromPdf = False) As String
On Error Resume Next
If NoHeader Then
Dim MakeChapterBookmarksOld As Boolean, MakeBookContentsOld As Boolean
MakeChapterBookmarksOld = MakeChapterBookmarks: MakeBookContentsOld = MakeBookContents
MakeChapterBookmarks = False: MakeBookContents = False
End If
TitlePrinted = False
CurBookSubtitle = ""
TxtToHtmlLastTitPos = 0
TxtToHtmlFirstAbsTitleFound = False
ContentsSignInserted = False
BookContentsStr = ""
AfterTitleFound = False
If InputFileExt <> "lit" Then SpacesNum = " " Else SpacesNum = " "
NumContBook = 0
If InputFileExt = "lit" Then
If EmptyLineLim(BookTitle) Then BookTitle = ""
If EmptyLineLim(BookAuthor) Then BookAuthor = ""
Else
BookTitle = "": BookAuthor = ""
End If
Dim TmpFileName As String, TmpFileName1 As String, LineLen As Long, lineLen1 As Long, _
AvLineLen As Double, linesToTest As Integer, _
OutputFile As String, OutString As String, SearhStr As String, _
LineNum As Long, FormatRec As String, _
parFound As Long, handleIn As Long, HandleOut As Long, _
handleIn1 As Long, HandleOut1 As Long, _
ParLineRat As Double, _
Line1 As String, line2 As String, line3 As String, line4 As String, _
ParToPrint As String, _
AvLeadingSpacesNum As Double, EmptyNum As Integer, FirstParLine As Boolean, _
ContentStrLen As Long, StartTime As Long
StartTime = timeGetTime
'make tmp and output files names
TmpFileName = CurrentPath & "\" & "0000_000.tmp0"
TmpFileName1 = CurrentPath & "\" & "0000_001.tmp0"
If NoHeader = False Then
OutputFile = LastTmpDir & "\" & filecommand(GetShortNameNoExtension, TxtFile, "") & ".html0"
Else
OutputFile = SaveFileAs
End If
''''''''''''''''''debug.print OutputFile: End
'CORRECT tmp-file CONTENT
filecommand CopyTheFile, TxtFile, TmpFileName
'detect charset
CharsetDetector TmpFileName
If WhereToClean = "0" Or WhereToClean = "2" Then
'''''debug.print "WhereToClean=", WhereToClean, "txt to html: cleanig before"
CleanUpF.CleanUpBookFile FileContent
End If
''''''''''''''''''debug.print FileContent
Form1.Status.Panels(2).Text = "...converting txt to html..."
If InputFileExt = "tcr" Then
pos0 = InStr(1, FileContent, "PPL", vbTextCompare)
If pos0 > 0 And pos0 < 3 Then FileContent = Replace(FileContent, "PPL", "", 1, 1, vbTextCompare)
FileContent = Replace(FileContent, "<* >", "", 1, 1, vbTextCompare)
End If
'REMOVE HTML SHIT
'remove <head>
pos0 = InStr(1, FileContent, "</HEAD>", vbTextCompare)
If pos0 > 0 Then FileContent = Right(FileContent, Len(FileContent) - pos0 - 6)
'remove often-used tags
'FileContent = Remove_Tags(FileContent, "<u[^>]*>", " ", False)
' FileContent = Remove_Tags(FileContent, "</u[^>]*>", " ", False)
'FileContent = Remove_Tags(FileContent, "<h[^>]*>", " ", False)
' FileContent = Remove_Tags(FileContent, "</h[^>]*>", " ", False)
'FileContent = Remove_Tags(FileContent, "<a[^>]*>", " ", False)
' FileContent = Replace(FileContent, "</a>", " ", , , vbTextCompare)
'FileContent = Remove_Tags(FileContent, "<dir[^>]*>", " ", False)
''''''''''''''''''debug.print InStr(1, FileContent, "<HTML>", vbTextCompare)
If InStr(1, FileContent, "<H", vbTextCompare) Or InStr(1, FileContent, "<dir>", vbTextCompare) _
Or InStr(1, FileContent, "<BODY>", vbTextCompare) Or InStr(1, FileContent, "<p", vbTextCompare) _
Or InStr(1, FileContent, "<DIV", vbTextCompare) Or InStr(1, FileContent, "<A", vbTextCompare) _
Or InStr(1, FileContent, "<PRE", vbTextCompare) _
Then
FileContent = Replace(FileContent, "<", Chr(171))
FileContent = Replace(FileContent, ">", Chr(187))
FileContent = Remove_Tags(FileContent, "<[^>]*>", " ")
FileContent = Remove_Tags(FileContent, "&[^;]{0,8};", " ")
Else
FileContent = Replace(FileContent, "<", Chr(171))
FileContent = Replace(FileContent, ">", Chr(187))
FileContent = Replace(FileContent, "<", Chr(171))
FileContent = Replace(FileContent, ">", Chr(187))
End If
'
Form1.Status.Panels(2).Text = "...removing bad symbols..."
RestoreLineEndsGlobal FileContent, False
For i = 0 To 8
FileContent = Replace(FileContent, Chr(i), "")
Next
For i = 11 To 12
FileContent = Replace(FileContent, Chr(i), "")
Next
For i = 14 To 31
FileContent = Replace(FileContent, Chr(i), "")
Next
Form1.Status.Panels(2).Text = "...replacing non-standard symbols..."
'too many double line ends ->remove double
Dim StLen As Double
'StLen = Len(FileContent): If StLen < 1 Then StLen = 1
If GetOccurenceNumberFast(FileContent, LineEnd & LineEnd, True) > 500 Then
FileContent = Replace(FileContent, LineEnd & LineEnd, LineEnd)
End If
'End
' WriteStringFile "f:\tmp2\Html_Txt.htm", FileContent: End
FileContent = Replace(FileContent, " *", "*")
If LastCharset <> "Win1251lat" Then
FileContent = Replace(FileContent, Chr$(132), Chr$(34))
'FileContent = Replace(FileContent, Chr$(145), Chr$(39)) 'added
'FileContent = Replace(FileContent, Chr$(146), Chr$(39)) 'added
FileContent = Replace(FileContent, Chr$(147), Chr$(34))
FileContent = Replace(FileContent, Chr$(148), Chr$(34))
End If
FileContent = Replace(FileContent, Chr$(160), " ")
FileContent = Replace(FileContent, Chr$(150), "-")
'FileContent = Replace(FileContent, Chr(151), "-")
FileContent = Replace(FileContent, Chr$(172), "-")
FileContent = Replace(FileContent, Chr$(173), "")
'FileContent = Replace(FileContent, "--", "-")
FileContent = Replace(FileContent, "==", "-")
If LastCharset <> "Win1251lat" Then
FileContent = Replace(FileContent, Chr$(168), Chr$(197)) ' ¨ -> Å
FileContent = Replace(FileContent, Chr$(184), Chr$(229)) ' ¸ -> å
End If
'FileContent = Replace(FileContent, "$", LineEnd)
FileContent = Replace(FileContent, Chr(9), " ")
If InputFileExt = "kml" Then
LastFormatType = "tabulators": FormatRec = LastFormatType:
LineNum = GetOccurenceNumberFast(FileContent, LineEnd, True)
FileContent = FileContent & LineEnd & LineEnd 'for correct print if on error exit
'WriteStringFile TmpFileName, FileContent
GoTo eRestOrig
'FormatRec = "spaces"
'LineNum = GetOccurenceNumberFast(FileContent, LineEnd, True)
'FileContent = FileContent & LineEnd & LineEnd 'for correct print if on error exit
'WriteStringFile TmpFileName, FileContent
'GoTo eStartRec
End If
'find out number of -
NumOfPer = 100# * CDbl(PatternMatchNumber("-" & Chr(13), FileContent, True)) / CDbl(Len(FileContent))
RemovePerenos = False
'write corrected tmp-file
FileContent = FileContent & LineEnd & LineEnd 'for correct print if on error exit
WriteStringFile TmpFileName, FileContent
'user-defined FormatRec ->goto restore
If LastFormatType <> "auto" Then
If LastFormatType = "tabulators" Then FormatRec = "tabulators" Else _
If LastFormatType = "simple" Then FormatRec = "spaces" Else _
If LastFormatType = "line ends" Then FormatRec = "line ends" Else _
FormatRec = "advanced"
LineNum = GetOccurenceNumberFast(FileContent, LineEnd, True)
GoTo eRestOrig
End If
'short file
If FileLen(TmpFileName) < 5000 Then
LineNum = GetOccurenceNumberFast(FileContent, LineEnd, True)
FormatRec = "advanced": GoTo eStartRec
End If
Form1.Status.Panels(2).Text = "...analysing text structure..."
'FIND PARAGRAPH RECOVERY TYPE
FormatRec = "advanced"
'check averaged line length
Dim SumAi2 As Double, SumAi As Double, LenVariance As Double, Line1Len As Long, _
LineEndsNum As Long, TabsToLineNumRat As Double
SumAi2 = CDbl(0): SumAi = CDbl(0): LineEndsNum = CLng(0)
parFound = 0: LineNum = 1: AvLineLen = 0#: AvLeadingSpacesNum = 0#
handleIn = FreeFile
Open TmpFileName For Input As #handleIn
Do Until EOF(handleIn)
Line Input #handleIn, Line1
Line1Len = Len(Line1)
SumAi = SumAi + Line1Len
SumAi2 = SumAi2 + Line1Len ^ 2#
AvLineLen = AvLineLen + CDbl(Len(Line1))
If Line1Len - Len(LTrim(Line1)) >= 1 Then
AvLeadingSpacesNum = AvLeadingSpacesNum + CDbl(1)
End If
'AvLeadingSpacesNum = AvLeadingSpacesNum + Len(line1) - Len(LTrim(line1))
LineNum = LineNum + CLng(1)
If InStr(Line1, SpacesNum) = 1 Then parFound = parFound + 1
Loop 'Do Until EOF(handleIn)
Close #handleIn 'Open TmpFileName For Input As #handleIn
TabsToLineNumRat = AvLeadingSpacesNum / CDbl(LineNum)
AvLineLen = AvLineLen / CDbl(LineNum)
AvLeadingSpacesNum = AvLeadingSpacesNum / CDbl(LineNum)
LenVariance = (Sqr(SumAi2 - SumAi ^ 2# / CDbl(LineNum)) / CDbl(LineNum)) / AvLineLen
'small len variance ->remove hyps
If LenVariance < 0.07 Then
If NumOfPer >= 0.002 Then
'RemovePerenos = True
FileContent = Replace(FileContent, Chr(32) & "-" & LineEnd, Chr(28))
FileContent = Replace(FileContent, "-" & LineEnd, "")
FileContent = Replace(FileContent, Chr(28), Chr(32) & "-" & LineEnd)
End If
End If
'small line len variance + tabulated pars ->tabulators
If TabsToLineNumRat > 0.1 And LenVariance < 0.07 Then
LastFormatType = "tabulators": FormatRec = LastFormatType: GoTo eRestOrig
End If
'number of line ends aproximatly equal to the number of tabs -> line ends
If TabsToLineNumRat > 0.9 And TabsToLineNumRat < 1.1 Then
LastFormatType = "line ends": FormatRec = LastFormatType: GoTo eRestOrig
End If
''''''''''''''''''debug.print AvLineLen, LineNum / CDbl(Len(FileContent))
'long lines -> line ends
If AvLineLen > 160 Then
LastFormatType = "line ends": FormatRec = LastFormatType: GoTo eRestOrig
End If
'take out leading spaces if there is more than 7
If AvLeadingSpacesNum > 0.85 And InputFileExt <> "lit" Then
'''''''''''''''''''''debug.print "take out leading spaces"
Form1.Status.Panels(2).Text = "...removing leading spaces..."
WriteStringFile TmpFileName1, FileContent
handleIn1 = FreeFile
Open TmpFileName1 For Input As #handleIn1
HandleOut1 = FreeFile
Open TmpFileName For Output As #HandleOut1
Do Until EOF(handleIn1)
Line Input #handleIn1, Line1
Print #HandleOut1, LTrim(Line1)
Loop
Close #HandleOut1
Close #handleIn1
FormatRec = "advanced"
FileContent = ReadStringFile(TmpFileName)
GoTo eStartRec
End If 'If AvLeadingSpacesNum > 0.85 And InputFileExt <> "lit" Then
If AvLineLen < minAvLen And LineNum > 0 Then
ParLineRat = CDbl(parFound) / CDbl(LineNum)
FormatRec = "advanced"
If ParLineRat > 0.1 And ParLineRat < 0.5 Then FormatRec = "spaces"
End If 'If AvLineLen < minAvLen And lineNum > 0 Then
If InputFileExt = "lit" Then
If parFound < 1 Then
FormatRec = "advanced"
Else
FormatRec = "spaces"
LineNum = GetOccurenceNumberFast(FileContent, LineEnd, True)
End If
End If 'If InputFileExt = "lit" Then
'
eStartRec:
eRestOrig:
Form1.Status.Panels(2).Text = "...converting txt to html..."
If FormatRec = "spaces" Then LastFormatRec = "simple" Else LastFormatRec = FormatRec
FileContent = Replace(FileContent, Chr(151), "-")
FileContent = Replace(FileContent, "--", "-")
'FileContent = Replace(FileContent, Chr(169), " ")
'FileContent = RemoveRepeatedSymbols(FileContent, Chr(32))
CleanBookGarbage FileContent, True
FileContent = Replace(FileContent, Chr(1), "")
FileContent = FileContent & LineEnd & TmpFileEnd & LineEnd & Chr(1) & Chr(1) & LineEnd
WriteStringFile TmpFileName, FileContent
Dim TotalLines As Double, LineNum0 As Double
If LineNum <= 1 Then TotalLines = CDbl(1) Else: TotalLines = CDbl(100) / CDbl(LineNum)
''''''''''''''''''debug.print FormatRec: End
'RECOVER TITLES, EPIGRAPHS, VERSES, ETC
Form1.Status.Panels(2).Text = "...txt to html...looking for titles..."
handleIn = FreeFile: Open TmpFileName For Input As #handleIn
'open output file for writing
filecommand DeleteTheFile, TmpFileName1, ""
HandleOut = FreeFile: Open TmpFileName1 For Output As #HandleOut
Dim LineTmp As String, LetAsc As Integer, CurPercents As Integer, SearchRes As Boolean
'
'FIND BOOK AUTHOR AND TITLE
TXT_HTMLFindBookTitle handleIn, HandleOut, LineNum, EmptyNum
TXT_HTMLSubtitleFinger handleIn, HandleOut, "", 1, 60
TXT_HTMLEpigraphFinger handleIn, HandleOut, "" ', , , True
eInpLine1:
'MainTitleFoundAsException = False
CurPercents = CInt(CSng(LineNum) * TotalLines)
If CurPercents > 400 Then GoTo eErrExit
If CurPercents \ 10 Then
If CurPercents <= 100 Then Form1.Status.Panels(1).Text = CurPercents & " %"
End If
Line Input #handleIn, InLine1:
If InStr(InLine1, Chr(1)) Then GoTo EndFile0
LineNum = LineNum + 1
eTitCheck:
'If InStr(InLine1, Chr(32)) = 1 Then FirstSymIsSpace = True Else FirstSymIsSpace = False
If TXT_HTMLTitleFinger(handleIn, HandleOut, InLine1, "JustCheck") Then
'If RemovePerenos Then InLine1 = TXT_HTMLRemoveHyphs(InLine1)
'If InputFileExt = "rb" Then GoTo eInpLine1
TXT_HTMLTitlePrinter handleIn, HandleOut, InLine1, , , PrintAsSubtitle
'TXT_HTMLEpigraphFinger handleIn, HandleOut, ""
SearchRes = TXT_HTMLSubtitleFinger(handleIn, HandleOut, "", 1, 60)
'If MainTitleFoundAsException = False Then
If SearchRes Then TXT_HTMLEpigraphFinger handleIn, HandleOut, "", , , , True _
Else TXT_HTMLEpigraphFinger handleIn, HandleOut, ""
' End If
'MainTitleFoundAsException = False
GoTo eInpLine1
End If 'if TXT_HTMLTitleFinger(HandleOut, InLine2) Then
If EmptyLine(InLine1) = False Then
If FindVerses = 1 And Len(Trim(InLine1)) <= MaxLenVerseNoEmpty Then
If TXT_HTMLVersesFinger(handleIn, HandleOut, InLine1) Then GoTo eInpLine1
End If
Print #HandleOut, InLine1
TitlePrinted = False
GoTo eInpLine1
End If 'If EmptyLine(InLine1) = False Then
'line is empty-> look for subtitle
CheckAgain:
' If InputFileExt = "rb" Then GoTo eBrCheck
SearchRes = TXT_HTMLSubtitleFinger(handleIn, HandleOut, "", 2, 60)
'If MainTitleFoundAsException = False Then
SearchRes = TXT_HTMLEpigraphFinger(handleIn, HandleOut, "", , , , True)
'SearchRes = TXT_HTMLVersesFinger(handleIn, HandleOut, "")
' End If
'SearchRes = TXT_HTMLVersesFinger(handleIn, HandleOut, "")
If SearchRes Then GoTo eInpLine1
'If FormatRec = "tabulators" Or FormatRec = "line ends" Then
If FormatRec = "line ends" Then
eBrCheck0: 'put <BR> instead of empty lines
Line Input #handleIn, LineTmp: If InStr(LineTmp, Chr(1)) Then GoTo EndFile0
LineNum = LineNum + 1
If EmptyLine(LineTmp) Then GoTo eBrCheck0
Print #HandleOut, "<BR>"
InLine1 = LineTmp: GoTo eTitCheck
End If 'If FormatRec = "tabulators" Then
eBrCheck: 'put <BR> if it does not break a line
Line Input #handleIn, LineTmp: If InStr(LineTmp, Chr(1)) Then GoTo EndFile0
LineNum = LineNum + 1
If EmptyLine(LineTmp) Then GoTo eBrCheck
LetAsc = Asc(LTrim(LineTmp))
If LetterIsSmall(LetAsc) Then
'join lines
Print #HandleOut, LineTmp
TitlePrinted = False
Else
Print #HandleOut, "<BR>"
InLine1 = LineTmp: GoTo eTitCheck
'Print #HandleOut, LineTmp
End If 'If LetterIsSmall(LetAsc) Then
''''''''''''''''''debug.print InLine1: End
GoTo eInpLine1
'Loop
EndFile0:
Close #HandleOut
Close #handleIn
''''''''''''''''''debug.print Err.Number: End
OutString = ReadStringFile(TmpFileName1)
OutString = OutString & LineEnd & Chr(1) & Chr(1)
''''''''''''''''''debug.print OutString
WriteStringFile TmpFileName, OutString
' WriteStringFile "f:\tmp\AfterTit.txt", OutString: End
'filecommand CopyTheFile, TmpFileName, "F:\tmp3\after titles.txt": End
''''''''''''''''''debug.print FormatRec: End
'RESTORE PARAGRAPHS
eGetPar:
'FormatRec = "tabulators"
Form1.Status.Panels(2).Text = "...txt to html...formatting paragraphs..."
''''''''''''''''''debug.print lineNum, TotalLines, 1# / CSng(lineNum) * 100: End
TotalLines = 1# / CSng(LineNum) * 100#
EmptyNum = 0: LineNum0 = 0: LineNum = 0
handleIn = FreeFile: Open TmpFileName For Input As #handleIn
'open output file for writing
HandleOut = FreeFile: Open OutputFile For Output As #HandleOut
If NoHeader = False Then
'write html start
fsd = ConvertFontSizeToDigit(NewFileFontSize)
CurChar = "windows-1251"
If LastCharset = "Win1251lat" Then CurChar = "windows-1252"
''''''''''''''''''debug.print LastCharset
Print #HandleOut, "<HTML>" & LineEnd & _
"<meta content=""text/html; charset=" & CurChar & """ http-equiv=""Content-Type"">" & _
ReaderJustified & LineEnd & _
"<BODY bgColor=" & NewFileBackColor & ">" & LineEnd & _
"<BASEFONT size=""" & fsd & """" & " face=""" & NewFileFontName & """" & ">" & LineEnd & _
"<BODY TEXT=" & NewFileForeColor & ">" & LineEnd & _
"<DIV></DIV>"
End If 'If NoHeader = False Then
'ORIGINAL RECONSTRUCTION
If FormatRec = "line ends" Then
''''''''''''''''''debug.print "line ends"
eInpOneLine:
If TXT_HTMLInputFirstLine(handleIn, HandleOut, LineNum) = False Then GoTo EndFile1
CurPercents = CInt(CSng(LineNum) * TotalLines)
If CurPercents > 400 Then GoTo eErrExit
If CurPercents \ 10 Then
If CurPercents <= 100 Then Form1.Status.Panels(1).Text = CurPercents & " %"
End If
'TXT_HTMLRemoveHyphs InLine1, RemovePerenos
TXT_HTMLParagraphPrinter handleIn, HandleOut, InLine1
GoTo eInpOneLine
End If 'If FormatRec = "line ends" Then
'loop over file lines
eInputTwoLines:
If TXT_HTMLInputFirstLine(handleIn, HandleOut, LineNum) = False Then GoTo EndFile1
If TXT_HTMLInputSecondLine(handleIn, HandleOut, LineNum, InLine1) = False Then GoTo EndFile1
GoTo eInputNothing
EmptyCheck:
FirstParLine = True
eInputLine2:
If TXT_HTMLInputSecondLine(handleIn, HandleOut, LineNum, InLine1, FirstParLine) = False Then
If EmptyLine(InLine1) = False Then TXT_HTMLParagraphPrinter handleIn, HandleOut, InLine1
GoTo EndFile1
End If
eInputNothing:
CurPercents = CInt(CSng(LineNum) * TotalLines)
If CurPercents > 400 Then
GoTo eErrExit
End If
If CurPercents \ 10 Then
If CurPercents <= 100 Then Form1.Status.Panels(1).Text = CurPercents & " %"
End If
If FormatRec = "tabulators" Then
''''''''''''''''''debug.print AscCod, InLine2
AscCod = Asc(Trim(InLine2) & " ")
If LetterIsSmall(AscCod) And FromPdf = False Then
'TXT_HTMLRemoveHyphs InLine1, RemovePerenos
InLine1 = RTrim(InLine1) & " " & Trim(InLine2)
GoTo eInputLine2
End If
AscCod = Asc(InLine2 & " ")
If AscCod <> 32 And AscCod <> 133 Then
'TXT_HTMLRemoveHyphs InLine1, RemovePerenos
InLine1 = InLine1 & " " & LTrim(InLine2)
GoTo eInputLine2
End If 'If AscCod <> 32 Then
TXT_HTMLParagraphPrinter handleIn, HandleOut, InLine1
' '''''''''''''''''debug.print InLine1
InLine1 = InLine2
GoTo EmptyCheck
End If 'If FormatRec = "tabulators" Then
'SPACE RECONSTRUCTION
If FormatRec = "spaces" Then
AscCod = Asc(InLine2 & " ")
If AscCod <> 32 Then
'TXT_HTMLRemoveHyphs InLine1, RemovePerenos
InLine1 = InLine1 & " " & LTrim(InLine2)
FirstParLine = False
GoTo eInputLine2
End If 'If AscCod <> 32 Then
If FirstParLine Then GoTo ePrr
pEnd = Asc(Right(" " & Trim(InLine1), 1))
''''''''''''''''''debug.print pend
If pEnd = 32 Then
InLine1 = InLine1 & " " & LTrim(InLine2)
FirstParLine = False
GoTo eInputLine2
End If
pst = Asc(Left(Trim(InLine2), 1) & " ")
'print if line2 starts from [
If pst = 91 Then GoTo ePrr
'line1 ends by . line2 starts from . ->print line1
If pEnd = 46 And pst = 46 Then
''''''''''''''''''debug.print InLine2
GoTo ePrr
End If
'for english texts
If LastCharset = "Win1251lat" Then
If (pst = 39 And (pEnd = 46 Or pEnd = 133 Or pEnd = 39)) _
Or (pst = 34 And (pEnd = 46 Or pEnd = 133 Or pEnd = 34)) _
Or ((pst = 145 Or pst = 146) And (pEnd = 46 Or pEnd = 133 Or pEnd = 145 Or pEnd = 146)) _
Or ((pst = 147 Or pst = 148) And (pEnd = 46 Or pEnd = 133 Or pEnd = 147 Or pEnd = 148)) _
Or ((pst = 171) And (pEnd = 46 Or pEnd = 133 Or pEnd = 187)) _
Or (((pst >= 65 And pst <= 91) Or (pst >= 192 And pst <= 223)) And (pEnd = 34 Or pEnd = 39 Or (pEnd >= 145 And pEnd <= 148) Or pEnd = 187)) _
Then
GoTo ePrr
End If
End If
'starts from ", ends by . or ... or "
If (pst = 34) And (pEnd = 46 Or pEnd = 133 Or pEnd = 34) Then
'If (pst = 34 Or pst = 145 Or pst = 147) _
'And (pend = 46 Or pend = 133 Or pend = 34 Or pend = 146 Or pend = 148) Then
GoTo ePrr
End If
'repeat if the line is not ended by .,; etc
' ; ! . :
If (pEnd <> 33 And pEnd <> 46 And pEnd <> 58 And pEnd <> 59 _
And pEnd <> 63 And pEnd <> 44 And pEnd <> 133 And pEnd <> 187) Then
InLine1 = InLine1 & " " & LTrim(InLine2)
FirstParLine = False
'If TXT_HTMLInputSecondLine(handleIn, HandleOut, lineNum, InLine1, FirstParLine) = False Then GoTo EndFile1
'GoTo eInputNothing
GoTo eInputLine2
End If 'If (pend <> 33 And pend <> 46 And pend <> 58
'line1 ends by "," line2 starts from capital
If pEnd = 44 Then
If ((pst >= 65 And pst <= 90) Or (pst >= 192 And pst <= 223)) Then
'epigraph?->print line1
If Len(Trim(InLine1)) < MaxLenComaCapital Then GoTo ePrr
InLine1 = InLine1 & " " & LTrim(InLine2)
FirstParLine = False
'If TXT_HTMLInputSecondLine(handleIn, HandleOut, lineNum, InLine1, FirstParLine) = False Then GoTo EndFile1
'GoTo eInputNothing
GoTo eInputLine2
End If
End If
'pst <> "-"
If pst <> 45 And pst <> 32 Then
'repeat if line2 does not start from capital letter
'If ((pst >= 0 And pst <= 33) Or (pst >= 35 And pst <= 48)
If ((pst >= 0 And pst <= 33) Or (pst >= 35 And pst <= 41) Or (pst >= 43 And pst <= 48) _
Or (pst >= 58 And pst <= 64) Or (pst >= 91 And pst <= 171) _
Or (pst >= 172 And pst <= 191) Or pst >= 224) Then
InLine1 = InLine1 & " " & LTrim(InLine2)
FirstParLine = False
'If TXT_HTMLInputSecondLine(handleIn, HandleOut, lineNum, InLine1, FirstParLine) = False Then GoTo EndFile1
'GoTo eInputNothing
GoTo eInputLine2
End If 'If ((pst >= 0 And pst <= 33) Or (pst >= 35 And pst <= 48)
End If 'If pst <> 45 Then
If pst = 45 Then
'''''''''''''''''''debug.print InLine2
'pst = "-", repeat if the letter after "-" is not capital
pcheck = Replace(InLine2, " ", "")
If Len(pcheck) > 2 Then
pst = Asc(Mid(pcheck, 2, 1) & " ")
'If pst = Chr(34) Then GoTo ePrr
If ((pst >= 0 And pst <= 64) Or (pst >= 91 And pst <= 191) _
Or pst >= 224) Then
InLine1 = InLine1 & " " & LTrim(InLine2)
FirstParLine = False
'If TXT_HTMLInputSecondLine(handleIn, HandleOut, lineNum, InLine1, FirstParLine) = False Then GoTo EndFile1
'GoTo eInputNothing
GoTo eInputLine2
End If 'If ((pst >= 0 And pst <= 64) Or (pst >= 91 And pst <= 191)
End If 'If Len(pcheck) > 2 Then
End If 'If pst = 45 Then
ePrr:
'print line1 and continue
TXT_HTMLParagraphPrinter handleIn, HandleOut, InLine1
' '''''''''''''''''debug.print InLine1
InLine1 = InLine2
GoTo EmptyCheck
End If 'If FormatRec = "spaces" Then
'ADVANCED RECONSTRUCTION
If FormatRec = "advanced" Then
If InLine1 = "" Then
InLine1 = InLine2
FirstParLine = True
GoTo eInputLine2
End If
'single letter line1->sum up
If Len(Trim(InLine1)) = 1 Then
let0 = Trim(InLine1)
If let0 <> "0" And let0 <> "1" And let0 <> "2" And let0 <> "3" And let0 <> "4" _
And let0 <> "5" And let0 <> "6" And let0 <> "7" And let0 <> "8" _
And let0 <> "9" And let0 <> "I" And let0 <> "X" And let0 <> "V" And let0 <> "L" Then
InLine1 = InLine1 & " " & LTrim(InLine2)
FirstParLine = False
GoTo eInputLine2
End If 'If let0
End If 'If Len(Trim(InLine1)) = 1 Then
'single letter line2->sum up
If Len(Trim(InLine2)) = 1 Then
''''''''''''''''''''debug.print InLine2
let0 = Trim(InLine2)
If let0 <> "0" And let0 <> "1" And let0 <> "2" And let0 <> "3" And let0 <> "4" _
And let0 <> "5" And let0 <> "6" And let0 <> "7" And let0 <> "8" _
And let0 <> "9" And let0 <> "I" And let0 <> "X" And let0 <> "V" And let0 <> "L" Then
InLine1 = InLine1 & " " & LTrim(InLine2)
FirstParLine = False
GoTo eInputLine2
End If 'If let0
End If 'If Len(Trim(InLine2)) = 1 Then
'AscCod = Asc(InLine2 & " ")
InLine2 = Trim(InLine2)
pst = Asc(Left(Trim(InLine2), 1) & " ")
'print line1 if line2 starts from [
If pst = 91 Then
TXT_HTMLParagraphPrinter handleIn, HandleOut, InLine1
InLine1 = InLine2
FirstParLine = True
GoTo eInputLine2
End If
' pend = Asc(Right(" " & InLine1, 1))
' If pend = 32 Then
' If Len(Trim(InLine1)) < MaxLenComaCapital Then GoTo eEprFirstLine
' GoTo eSumm
' End If
'check line1 end
pEnd = Asc(Right(" " & Trim(InLine1), 1))
'for english texts
If LastCharset = "Win1251lat" Then
If (pst = 39 And (pEnd = 46 Or pEnd = 133 Or pEnd = 39)) _
Or (pst = 34 And (pEnd = 46 Or pEnd = 133 Or pEnd = 34)) _
Or ((pst = 145 Or pst = 146) And (pEnd = 46 Or pEnd = 133 Or pEnd = 145 Or pEnd = 146)) _
Or ((pst = 147 Or pst = 148) And (pEnd = 46 Or pEnd = 133 Or pEnd = 147 Or pEnd = 148)) _
Or ((pst = 171) And (pEnd = 46 Or pEnd = 133 Or pEnd = 187)) _
Or (((pst >= 65 And pst <= 91) Or (pst >= 192 And pst <= 223)) And (pEnd = 34 Or pEnd = 39 Or (pEnd >= 145 And pEnd <= 148) Or pEnd = 187)) _
Then
'If (pst = 39 Or pst = 34 Or pst = 145 Or pst = 146 Or pst = 171) And pend <> "," Then
TXT_HTMLParagraphPrinter handleIn, HandleOut, InLine1
InLine1 = InLine2
FirstParLine = True
GoTo eInputLine2
End If
End If
'If InStr(InLine1, "Åñëè íàõëûíåò") Or InStr(InLine2, "Åñëè íàõëûíåò") Then
' '''''''''''''''''debug.print "InLine1", InLine1: 'End
' '''''''''''''''''debug.print "InLine2", InLine2
' '''''''''''''''''debug.print , FirstParLine
' End If
'line1 ends by ! . : ; ? , ... >>
If (pEnd = 33 Or pEnd = 46 Or pEnd = 58 Or pEnd = 59 _
Or pEnd = 63 Or pEnd = 44 Or pEnd = 133 Or pEnd = 187) Then
e555:
'check line2 start
' If pst = 32 Then GoTo eSumm
'line1 ends by . line2 starts from . ->print line1
If pEnd = 46 And pst = 46 Then GoTo eEprFirstLine
'line1 ends by "," line2 starts from capital->summ up if line1 is not short
If pEnd = 44 Then
If ((pst >= 65 And pst <= 90) Or (pst >= 192 And pst <= 223)) Then
'epigraph?->print InLine1
If Len(InLine1) < MaxLenComaCapital Then GoTo eEprFirstLine
InLine1 = InLine1 & " " & LTrim(InLine2)
FirstParLine = False
GoTo eInputLine2
End If
End If 'If pend = 44 Then
'print line1 if line2 starts from the capital letter
'If ((pst >= 65 And pst <= 91) Or (pst >= 192 And pst <= 223) _
'Or pst = 34 Or pst = 171) Then
If LetterIsCapital(pst) Or pst = 34 Or pst = 171 Then
eEprFirstLine:
TXT_HTMLParagraphPrinter handleIn, HandleOut, InLine1
InLine1 = InLine2
GoTo eInputLine2
Else
'print line1 if line2 starts from "-" following by capital letter
If pst = 45 Then
pcheck = Replace(InLine2, " ", "")
If Len(pcheck) > 2 Then
pst = Asc(Mid(pcheck, 2, 1))
If ((pst >= 65 And pst <= 90) Or (pst >= 192 And pst <= 223) _
Or pst = 34 Or pst = 171) Then
'print line1
GoTo eEprFirstLine
' 'line1 is a paragraph
' TXT_HTMLParagraphPrinter handleIn, HandleOut, InLine1
' InLine1 = InLine2
' GoTo eInputLine2
End If 'If ((pst >= 65 And pst <= 90) Or (pst >= 192 And pst <= 223)
End If 'If Len(pcheck) > 1 Then
End If 'If pst = 45
'print line1 if line2 starts from digit following by .
If LetterIsDigit(pst) Then
pcheck = Replace(InLine2, " ", "")
If Len(pcheck) > 2 Then
pst = Asc(Mid(pcheck, 2, 1))
If pst = 46 Then
'print line1
GoTo eEprFirstLine
End If 'If pst = 46 Then
End If 'If Len(pcheck) > 1 Then
End If 'If LetterIsDigit(pst) Then
End If 'If ((pst >= 65 And pst <= 90) Or (pst >= 192 And pst <= 223)
'line1 does not end by ! . : ; ? , ... >>, line2 starts from capital, both lines are short
Else
If Len(InLine1) < MaxLenComaCapital And Len(InLine2) < MaxLenComaCapital Then
If ((pst >= 65 And pst <= 90) Or (pst >= 192 And pst <= 223) _
Or pst = 34 Or pst = 171) Then GoTo eEprFirstLine
End If
End If 'If (pend = 33 Or pend = 46 Or pend = 58 Or pend = 59
eSumm:
'TXT_HTMLRemoveHyphs InLine1, RemovePerenos
InLine1 = InLine1 & " " & LTrim(InLine2)
FirstParLine = False
GoTo eInputLine2
'GoTo eInputNothing 'repeat if line2 is not a new par
End If 'If FormatRec = "advanced" Then
'Loop
EndFile1:
On Error Resume Next
''''''''''''''''''debug.print "end file1": ' End
Close #HandleOut
Close #handleIn
Form1.Status.Panels(2).Text = "...txt to html...cleaning book..."
OutString = ReadStringFile(OutputFile)
RemoveDesignerSign OutString
If NoHeader = False Then
MakeDesignerSign OutString
OutString = OutString & LineEnd & "</BASEFONT>" & LineEnd & "</BODY></HTML>"
End If
'WriteStringFile "f:\tmp3\before titles.txt", OutString
'add book contents
If MakeBookContents Then
''''''''''''''''''debug.print "MakeBookContents", BookContentsStr
If BookContentsStr <> "" Then
Dim ContName As String
'''''''''''''''''''''debug.print LastCharset
If LastCharset = "Win1251lat" Then
ContName = "CONTENTS"
Else
ContName = Chr(209) & Chr(206) & Chr(196) & Chr(197) & Chr(208) & Chr(198) & Chr(192) & _
Chr(205) & Chr(200) & Chr(197) ' "ÑÎÄÅÐÆÀÍÈÅ"
End If
ContName = "<FONT color=" & BookContentColor & ">" & ContName & "</FONT>"
BookContentsStr = _
"<SPAN id=BCONTENTS>" & LineEnd & _
"<DIV align=center><B>" & ContName & "</B></DIV><BR>" & LineEnd & _
BookContentsStr & LineEnd & _
"</SPAN><BR>"
OutString = Replace(OutString, ContentsSign, BookContentsStr)
Else
OutString = Replace(OutString, ContentsSign, "")
End If 'If BookContentsStr <> "" Then
End If 'If MakeBookContents Then
''''''''''''''''''debug.print ContentsSign: End
OutString = Replace(OutString, "<BR>" & LineEnd, "<BR>")
OutString = RemoveRepeatedSymbols(OutString, "<BR>")
OutString = Replace(OutString, "<BR>", "<BR>" & LineEnd)
OutString = Replace(OutString, "</H2></DIV>" & LineEnd & "<BR>", "</H2></DIV>")
OutString = RemoveRepeatedSymbols(OutString, LineEnd)
OutString = RemoveRepeatedSymbols(OutString, Chr(32))
InsertPageBreaksToHtml0 OutString
If WhereToClean = "1" Or WhereToClean = "2" Then
'''''debug.print "WhereToClean=", WhereToClean, "txt to html: cleanig after"
CleanUpF.CleanUpBookFile OutString
End If
ReplaceBrByDiv OutString
Call WriteStringFile(OutputFile, OutString)
filecommand DeleteTheFile, TmpFileName, ""
filecommand DeleteTheFile, TmpFileName1, ""
If NoHeader Then
MakeChapterBookmarks = MakeChapterBookmarksOld: MakeBookContents = MakeBookContentsOld
End If
'''''''''''''''''''''debug.print "TXT_HTML: after DeleteTheFile "
TXT_HTML = OutputFile
GoTo end0
eErrExit:
TXT_HTML = ""
end0:
TxtHtmlConvertingTime = timeGetTime - StartTime
''''''''''''''''''debug.print "end0"
LastFormatType = "auto"
LastConverter = ""
TxtToHtmlFirstAbsTitleFound = False
End Function
Public Sub TXT_HTMLParagraphPrinter(handleIn As Long, HandleOut As Long, LineToPrint0 As String, _
Optional NoEmptyCheck As Boolean = False)
On Error Resume Next
'MainTitleFoundAsException = False
Dim LineToPrint As String
LineToPrint = Trim(LineToPrint0)
If Len(LineToPrint) < 1 Then GoTo end0
'If EmptyLineLim(LineToPrint) Then GoTo end0
ReplaceUnderscoreByItalic LineToPrint
'print paragraph
If Left$(LineToPrint, 1) = "-" Then
LineToPrint = div_jus_st & nbs4 & "-" & nbs1 & _
Trim(Right$(LineToPrint, Len(LineToPrint) - 1)) & div_end '& LineEnd
Else
LineToPrint = div_jus_st & nbs4 & LineToPrint & div_end '& LineEnd
End If
Print #HandleOut, LineToPrint
AfterTitleFound = False
end0:
End Sub
Public Sub TXT_HTMLEpigraphPrinter(handleIn As Long, HandleOut As Long, LinesArr() As String, _
Optional PrintAsParagraph As Boolean, Optional LastLineIsAuthor As Boolean, _
Optional LinesNumToPrint As Integer = 0)
On Error Resume Next
'''''''''''''''''''debug.print "TXT_HTMLSubtitleFinger"
Dim StrToPrint As String, CurLine As String, AllLinesEmpty As Boolean, LinNum As Integer
AllLinesEmpty = True
StrToPrint = ""
If LinesNumToPrint <> 0 Then LinNum = LinesNumToPrint Else LinNum = UBound(LinesArr)
'LinNum = UBound(LinesArr)
For j = 1 To LinNum
CurLine = LinesArr(j)
'''''''''''''''''''debug.print j, CurLine
ReplaceUnderscoreByItalic CurLine, True
If EmptyLineLim(CurLine) = False Then
AllLinesEmpty = False
If LastLineIsAuthor And j = LinNum And PrintAsParagraph = False Then GoTo eCheck
StrToPrint = StrToPrint & div_jus_st & CurLine & div_end & LineEnd
End If
Next
eCheck:
If AllLinesEmpty = False Then
If PrintAsParagraph Then
Print #HandleOut, StrToPrint & "<BR>": GoTo end0
End If 'If PrintAsParagraph Then
StrToPrint = "<FONT color=" & EpigraphColor & ">" & StrToPrint & "</FONT>"