-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcountwords.lua
More file actions
187 lines (160 loc) · 4.35 KB
/
countwords.lua
File metadata and controls
187 lines (160 loc) · 4.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
-- Credit: https://gist.github.com/phi-gamma/2622252
packagedata = packagedata or { } -- namespace proposal for packages
dofile(kpse.find_file"char-def.lua") -- unicode tables
dofile(kpse.find_file"lualibs-table.lua") -- old Context table code
local utf = unicode.utf8
local node = node
local type = type
local lower, utfchar, utfvalues = string.lower, utf.char, string.utfvalues
local tableconcat, iowrite = table.concat, io.write
local stringformat, texprint = string.format, tex.print
local traverse_nodes = node.traverse
local chardata = characters.data
local glyph_code = node.id"glyph"
local disc_code = node.id"disc"
local kern_code = node.id"kern"
local kerning_code = 0 -- from font
-- * LaTeX counters interface
local latex = latex or {}
-- This reflects the standard Lua(La)TeX way
-- Credit: https://tex.stackexchange.com/a/330403/238079
latex.count = {}
setmetatable(latex.count, {__index = function(t, counter)
return tex.count['c@' .. counter]
end} )
function latex.getcount (counter)
return tex.getcount('c@' .. counter)
end
function latex.setcount (counter, value)
return tex.setcount('c@' .. counter, value)
end
function latex.inccount (counter)
return latex.setcount(counter, latex.count[counter] + 1)
end
-- * Global wordcount state
-- This table holds the counters we are counting into. Keys are names
-- of LaTeX counters, values are true (we are counting into it) or
-- false (we aren't).
local counters = {}
-- Least number of of characters required to count as a word.
local threshold = 1
function enable_counter (counter)
counters[counter] = true
end
function disable_counter (counter)
counters[counter] = false
end
function set_threshold (n)
if n then
threshold = n
end
end
-- * Counting
local is_letter = table.tohash { "ll", "lm", "lo", "lt", "lu" }
local charcache = { } --- memo without metatable
local lcchar = function(code)
if code then
if charcache[code] then return charcache[code] end
local c = chardata[code]
c = c and c.lccode
if c then --utfstring
if type(c) == "table" then
c = utfchar(unpack(c))
else
c = utfchar(c)
end
else
if type(code) == "number" then
c = utfchar(code)
else
c = code
end
end
charcache[code] = c
return c
end
end
local lowerchar = function (str)
local new, n = { }, 0
for val in utfvalues(str) do
n = n + 1
new[n] = lcchar(val) -- could be inlined here as well ..
end
return tableconcat(new)
end
local function mark_words (head, whenfound)
local current, done = head, nil, 0, false
local str, s, nds, n = { }, 0, { }, 0
local function action()
if s > 0 then
local word = tableconcat(str, "", 1, s)
local mark = whenfound(word)
if mark then
done = true
for i=1,n do
mark(nds[i])
end
end
end
n, s = 0, 0
end
while current do -- iterate
local id = current.id
if id == glyph_code then
local components = current.components
if components then
n = n + 1
nds[n] = current
for g in traverse_nodes(components) do
s = s + 1
str[s] = utfchar(g.char)
end
else
local code = current.char
local data = chardata[code]
if data and is_letter[data.category] then
n = n + 1
nds[n] = current
s = s + 1
str[s] = utfchar(code)
elseif s > 0 then
action()
end
end
elseif id == disc_code then -- take the replace
if n > 0 then
n = n + 1
nds[n] = current
end
elseif id == kern_code and current.subtype == kerning_code and s > 0 then
-- ok
elseif s > 0 then
action()
end
current = current.next
end
if s > 0 then
action()
end
return head, done
end
local function insert_word (str) -- -Y´sweep(l,s)¡
if #str >= threshold then
-- For each enabled counter...
for counter, enabled in pairs(counters) do
-- ...increment the counter.
if enabled then latex.inccount(counter) end
end
end
end
local callback = function (head)
return mark_words(head, insert_word)
end
-- * Export module
return {
latex = latex,
enable_counter = enable_counter,
disable_counter = disable_counter,
callback = callback,
set_threshold = set_threshold
}