() where T : OutputDictBase
- {
- OutputDictBase dict = null;
+ return synonyms;
+ };
- if (typeof(T) == typeof(XdxfDictionary))
+ dict.GetValueFromArticle = a =>
{
- dict = new XdxfDictionary(ID, TranslationType)
+ var valueSB = new StringBuilder();
+
+ foreach (var definition in a.Value.GetDefinitions(true))
{
- Title = ShortTitle,
- FullTitle = LongTitle,
- Description = Description,
- SrcUrl = SrcUrl,
- };
+ var value = definition;
+ // Add abbreviations
+ foreach (var abbreviation in a.Parent.Abbreviations)
+ {
+ value = value.WrapInTag(abbreviation.Key, "i", StringWrapInTagOptions.WrapWholeWordsOnly);
+ if (abbreviation.Key.Length > 1 && char.IsLower(abbreviation.Key[0]))
+ {
+ value = value.WrapInTag(char.ToUpper(abbreviation.Key[0]) + abbreviation.Key.Substring(1), "i", StringWrapInTagOptions.WrapWholeWordsOnly);
+ }
+ }
+
+ value = value.WrapInTag("p");
+
+ // Add bold for numbering
+ value = Regex.Replace(value, "([0-9]+)\\. ", "
$1. ");
+ if (value.Contains("2. "))
+ {
+ // Fix bolding number one for pre-text
+ value = Regex.Replace(value, "
(.*[^>])1\\. ", "
$11. ");
+ }
+
+ valueSB.Append(value);
+ }
+
+ return valueSB.ToString();
+ };
+
+ Log("Building StarDict dictionary.");
+
+ foreach (var kvp in GetCleanEntries())
+ {
+ dict.AddArticle(kvp.Key, kvp.Value);
}
- else if (typeof(T) == typeof(StarDictDictionary))
+
+ AddAbbreviations(dict);
+
+ Log("Saving StarDict dictionary.");
+
+ dict.Save(starDictPath);
+ }
+
+ private void SaveXdxfFile(string xdxfPath)
+ {
+ var dict = new XdxfDictionary(GetMetadata());
+
+ dict.GetXdxfKeysFromAbbreviation = a =>
{
- dict = new StarDictDictionary(ID, TranslationType)
+ var list = new List() { a.Key };
+ if (a.Key.Length > 1 && char.IsLower(a.Key[0]))
{
- Title = LongTitle,
- Description = Description,
- };
- }
+ list.Add(char.ToUpper(a.Key[0]) + a.Key.Substring(1));
+ }
+ return list;
+ };
- dict.Authors.AddRange(Authors);
+ dict.GetXdxfKeysFromArticle = a =>
+ {
+ return a.Key.Split(',', StringSplitOptions.RemoveEmptyEntries).ToList();
+ };
+
+ dict.GetXdxfKeyOptionalTerms = () =>
+ {
+ return new HashSet() { ".", StringUtils.SyllableDotUtf8 };
+ };
+
+ dict.GetXdxfValuesFromArticle = a =>
+ {
+ return a.Value.GetDefinitions(false).ToList();
+ };
+
+ Log("Building XDXF dictionary.");
+
+ foreach (var kvp in GetCleanEntries())
+ {
+ dict.AddArticle(kvp.Key, kvp.Value);
+ }
AddAbbreviations(dict);
- return dict;
+ Log("Saving XDXF dictionary.");
+
+ dict.Save(xdxfPath);
}
- protected abstract void AddAbbreviations(OutputDictBase dict);
+ protected abstract void GetRawDataFromSource();
+
+ protected abstract IEnumerable> GetCleanEntries();
+
+ protected abstract void AddAbbreviations(DictionaryBase dict);
+
}
public enum TranslationType
diff --git a/src/HawDict/Input/MamakaKaiaoInputDict.cs b/src/HawDict/Input/MamakaKaiaoInputDict.cs
index 330e943..1b695dd 100644
--- a/src/HawDict/Input/MamakaKaiaoInputDict.cs
+++ b/src/HawDict/Input/MamakaKaiaoInputDict.cs
@@ -6,6 +6,8 @@
using HtmlAgilityPack;
+using QuickDict;
+
namespace HawDict
{
public class MamakaKaiaoInputDict : HtmlInputDict
@@ -251,7 +253,7 @@ protected override string[] ParseEntryNode(HtmlNode node)
{
string entryName = node.FirstChild.OuterHtml;
string entryValue = node.InnerHtml.Remove(0, entryName.Length);
-
+
try
{
return new string[] { StringUtils.NormalizeWhiteSpace(StringUtils.SingleLineNoTabs(entryName)), StringUtils.NormalizeWhiteSpace(StringUtils.SingleLineNoTabs(entryValue)) };
@@ -269,53 +271,50 @@ protected override string FinalCleanValue(string value)
return StringUtils.FixSentenceSpacing(value);
}
- protected override void AddAbbreviations(OutputDictBase dict)
+ protected override void AddAbbreviations(DictionaryBase dict)
{
- dict.Abbreviations.AddRange(new OutputAbbreviation[]
- {
- new OutputAbbreviation(dict, "abb.", "abbreviation"),
- new OutputAbbreviation(dict, "Bib.", "Bible"),
- new OutputAbbreviation(dict, "cf.", "compare", AbbreviationType.Auxiliary),
- new OutputAbbreviation(dict, "comb.", "combined form"),
- new OutputAbbreviation(dict, "dic.", "dictionary definition"),
- new OutputAbbreviation(dict, "e.g.", "for example", AbbreviationType.Auxiliary),
- new OutputAbbreviation(dict, "Eng.", "English"),
- new OutputAbbreviation(dict, "ext. mng.", "extended meaning"),
- new OutputAbbreviation(dict, "i.e.", "that is", AbbreviationType.Auxiliary),
- new OutputAbbreviation(dict, "inv.", "invention"),
- new OutputAbbreviation(dict, "Japn.", "Japanese"),
- new OutputAbbreviation(dict, "lit.", "literally"),
- new OutputAbbreviation(dict, "mān.", "mānaleo (native speaker)"),
- new OutputAbbreviation(dict, "new mng.", "new meaning"),
- new OutputAbbreviation(dict, "PPN", "Proto Polynesian"),
- new OutputAbbreviation(dict, "redup.", "reduplication"),
- new OutputAbbreviation(dict, "sh.", "shortened form"),
- new OutputAbbreviation(dict, "sp. var.", "spelling variation"),
- new OutputAbbreviation(dict, "Tah.", "Tahitian"),
- new OutputAbbreviation(dict, "trad.", "traditional literary sources"),
- new OutputAbbreviation(dict, "var.", "variation"),
- new OutputAbbreviation(dict, "ham", "hamani (transitive verb)", AbbreviationType.Grammatical),
- new OutputAbbreviation(dict, "heh", "hehele (intransitive verb)", AbbreviationType.Grammatical),
- new OutputAbbreviation(dict, "ʻaʻ", "ʻaʻano (stative verb)", AbbreviationType.Grammatical),
- new OutputAbbreviation(dict, "kik", "kikino (common noun)", AbbreviationType.Grammatical),
- new OutputAbbreviation(dict, "iʻoa", "iʻoa (proper noun)", AbbreviationType.Grammatical),
- new OutputAbbreviation(dict, "EK", "Elama Kanahele"),
- new OutputAbbreviation(dict, "HA", "Henry Auwae"),
- new OutputAbbreviation(dict, "HHLH", "Helen Haleola Lee Hong"),
- new OutputAbbreviation(dict, "HKM", "Harry Kunihi Mitchell"),
- new OutputAbbreviation(dict, "JPM", "Joseph Puipui Makaai"),
- new OutputAbbreviation(dict, "KKK", "Kaui Keola Keamoai"),
- new OutputAbbreviation(dict, "LK", "Louise Keliihoomalu"),
- new OutputAbbreviation(dict, "MMLH", "Martha Manoanoa Lum Ho"),
- new OutputAbbreviation(dict, "MW", "Minnie Whitford"),
- new OutputAbbreviation(dict, "Anatomia", "Judd, Gerrit P. Anatomia"),
- new OutputAbbreviation(dict, "Bihopa", "Bihopa, E. A. Haawina Mua o ka Hoailona Helu"),
- new OutputAbbreviation(dict, "Bounty", "HeMoolelo no na Luina Kipi o ka Moku Bounty"),
- new OutputAbbreviation(dict, "Legendre", "Legendre, A. M. Ke Anahonua"),
- new OutputAbbreviation(dict, "Judd", "Judd et al. Hawaiian Language Imprints, 1822-1899"),
- new OutputAbbreviation(dict, "Pakaa", "Nakuina, Moses K. Pakaa a me Ku-a-Pakaa"),
- new OutputAbbreviation(dict, "Wilcox", "Wilcox, Robert"),
- });
+ dict.AddAbbreviation("abb.", "abbreviation");
+ dict.AddAbbreviation("Bib.", "Bible");
+ dict.AddAbbreviation("cf.", "compare", AbbreviationType.Auxiliary);
+ dict.AddAbbreviation("comb.", "combined form");
+ dict.AddAbbreviation("dic.", "dictionary definition");
+ dict.AddAbbreviation("e.g.", "for example", AbbreviationType.Auxiliary);
+ dict.AddAbbreviation("Eng.", "English");
+ dict.AddAbbreviation("ext. mng.", "extended meaning");
+ dict.AddAbbreviation("i.e.", "that is", AbbreviationType.Auxiliary);
+ dict.AddAbbreviation("inv.", "invention");
+ dict.AddAbbreviation("Japn.", "Japanese");
+ dict.AddAbbreviation("lit.", "literally");
+ dict.AddAbbreviation("mān.", "mānaleo (native speaker)");
+ dict.AddAbbreviation("new mng.", "new meaning");
+ dict.AddAbbreviation("PPN", "Proto Polynesian");
+ dict.AddAbbreviation("redup.", "reduplication");
+ dict.AddAbbreviation("sh.", "shortened form");
+ dict.AddAbbreviation("sp. var.", "spelling variation");
+ dict.AddAbbreviation("Tah.", "Tahitian");
+ dict.AddAbbreviation("trad.", "traditional literary sources");
+ dict.AddAbbreviation("var.", "variation");
+ dict.AddAbbreviation("ham", "hamani (transitive verb)", AbbreviationType.Grammatical);
+ dict.AddAbbreviation("heh", "hehele (intransitive verb)", AbbreviationType.Grammatical);
+ dict.AddAbbreviation("ʻaʻ", "ʻaʻano (stative verb)", AbbreviationType.Grammatical);
+ dict.AddAbbreviation("kik", "kikino (common noun)", AbbreviationType.Grammatical);
+ dict.AddAbbreviation("iʻoa", "iʻoa (proper noun)", AbbreviationType.Grammatical);
+ dict.AddAbbreviation("EK", "Elama Kanahele");
+ dict.AddAbbreviation("HA", "Henry Auwae");
+ dict.AddAbbreviation("HHLH", "Helen Haleola Lee Hong");
+ dict.AddAbbreviation("HKM", "Harry Kunihi Mitchell");
+ dict.AddAbbreviation("JPM", "Joseph Puipui Makaai");
+ dict.AddAbbreviation("KKK", "Kaui Keola Keamoai");
+ dict.AddAbbreviation("LK", "Louise Keliihoomalu");
+ dict.AddAbbreviation("MMLH", "Martha Manoanoa Lum Ho");
+ dict.AddAbbreviation("MW", "Minnie Whitford");
+ dict.AddAbbreviation("Anatomia", "Judd, Gerrit P. Anatomia");
+ dict.AddAbbreviation("Bihopa", "Bihopa, E. A. Haawina Mua o ka Hoailona Helu");
+ dict.AddAbbreviation("Bounty", "HeMoolelo no na Luina Kipi o ka Moku Bounty");
+ dict.AddAbbreviation("Legendre", "Legendre, A. M. Ke Anahonua");
+ dict.AddAbbreviation("Judd", "Judd et al. Hawaiian Language Imprints, 1822-1899");
+ dict.AddAbbreviation("Pakaa", "Nakuina, Moses K. Pakaa a me Ku-a-Pakaa");
+ dict.AddAbbreviation("Wilcox", "Wilcox, Robert");
}
}
}
diff --git a/src/HawDict/Input/PlaceNamesInputDict.cs b/src/HawDict/Input/PlaceNamesInputDict.cs
index 65ba678..e6f5042 100644
--- a/src/HawDict/Input/PlaceNamesInputDict.cs
+++ b/src/HawDict/Input/PlaceNamesInputDict.cs
@@ -6,6 +6,8 @@
using HtmlAgilityPack;
+using QuickDict;
+
namespace HawDict
{
public class PlaceNamesInputDict : HtmlInputDict
@@ -91,27 +93,24 @@ protected override string FinalCleanValue(string value)
return StringUtils.FixSentenceSpacing(value);
}
- protected override void AddAbbreviations(OutputDictBase dict)
+ protected override void AddAbbreviations(DictionaryBase dict)
{
- dict.Abbreviations.AddRange(new OutputAbbreviation[]
- {
- new OutputAbbreviation(dict, "For. Sel.", "Elbert, Selections from Fornander"),
- new OutputAbbreviation(dict, "For.", "Fornander, Hawaiian Antiquities (e.g., For. 5:176 means Fornander, Volume 5, p. 176)"),
- new OutputAbbreviation(dict, "HM", "Beckwith, Hawaiian Mythology"),
- new OutputAbbreviation(dict, "Indices", "Indices of Awards..."),
- new OutputAbbreviation(dict, "Kuy. 1", "Kuykendall, The Hawaiian Kingdom, Volume 1"),
- new OutputAbbreviation(dict, "Kuy. 2", "Kuykendall, The Hawaiian Kingdom, Volume 2"),
- new OutputAbbreviation(dict, "Kuy. 3", "Kuykendall, The Hawaiian Kingdom, Volume 3"),
- new OutputAbbreviation(dict, "lit.", "literally"),
- new OutputAbbreviation(dict, "PE", "Pukui and Elbert, Hawaiian Dictionary"),
- new OutputAbbreviation(dict, "PH", "Emerson, Pele and Hiiaka"),
- new OutputAbbreviation(dict, "qd.", "quadrangle"),
- new OutputAbbreviation(dict, "qds.", "quadrangles (maps 2-4)"),
- new OutputAbbreviation(dict, "RC", "Ruling Chiefs"),
- new OutputAbbreviation(dict, "TM", "Taylor and Miranda, \"Honolulu Street Names\""),
- new OutputAbbreviation(dict, "UL", "Emerson, Unwritten Literature..."),
- new OutputAbbreviation(dict, "*", "Pronunciation and meaning uncertain"),
- });
+ dict.AddAbbreviation("For. Sel.", "Elbert, Selections from Fornander");
+ dict.AddAbbreviation("For.", "Fornander, Hawaiian Antiquities (e.g., For. 5:176 means Fornander, Volume 5, p. 176)");
+ dict.AddAbbreviation("HM", "Beckwith, Hawaiian Mythology");
+ dict.AddAbbreviation("Indices", "Indices of Awards...");
+ dict.AddAbbreviation("Kuy. 1", "Kuykendall, The Hawaiian Kingdom, Volume 1");
+ dict.AddAbbreviation("Kuy. 2", "Kuykendall, The Hawaiian Kingdom, Volume 2");
+ dict.AddAbbreviation("Kuy. 3", "Kuykendall, The Hawaiian Kingdom, Volume 3");
+ dict.AddAbbreviation("lit.", "literally");
+ dict.AddAbbreviation("PE", "Pukui and Elbert, Hawaiian Dictionary");
+ dict.AddAbbreviation("PH", "Emerson, Pele and Hiiaka");
+ dict.AddAbbreviation("qd.", "quadrangle");
+ dict.AddAbbreviation("qds.", "quadrangles (maps 2-4)");
+ dict.AddAbbreviation("RC", "Ruling Chiefs");
+ dict.AddAbbreviation("TM", "Taylor and Miranda, \"Honolulu Street Names\"");
+ dict.AddAbbreviation("UL", "Emerson, Unwritten Literature...");
+ dict.AddAbbreviation("*", "Pronunciation and meaning uncertain");
}
}
}
diff --git a/src/HawDict/Input/PukuiElbertInputDict.cs b/src/HawDict/Input/PukuiElbertInputDict.cs
index 7bee6f9..ca2bbc3 100644
--- a/src/HawDict/Input/PukuiElbertInputDict.cs
+++ b/src/HawDict/Input/PukuiElbertInputDict.cs
@@ -6,6 +6,8 @@
using HtmlAgilityPack;
+using QuickDict;
+
namespace HawDict
{
public class PukuiElbertInputDict : HtmlInputDict
@@ -37,12 +39,13 @@ protected override string CleanSourceHtml(string s)
{
// Remove header comments
s = Regex.Replace(s, "In causative/simulative forms beginning with.*\n", "");
- return s
+ s = s
.Replace("
\n
\n
\n| ", "")
.Replace("&4 ", "Redup. ").Replace("&;n", "n.").Replace("&(PCP; ", "(PCP ").Replace("(Mele. ", "(Mele ")
.Replace("..", ".").Replace("..", ".").Replace("..", ".")
.Replace("“", "\"").Replace("”", "\"")
+ .Replace(" ,", ",")
.Replace("T.44>", "")
.Replace("h3", "span")
// Typo fixes:
@@ -165,7 +168,7 @@ protected override string CleanSourceHtml(string s)
.Replace("Ka-pū,lehu", "Ka-pū.lehu")
.Replace("ā…paha", "ā … paha")
// Typos with _
- .Replace("Na_na_", "Nānā")
+ .Replace("Na_na_", "Nānā.")
.Replace(">Palaki ʻan_ai", ">Palaki ʻānai")
.Replace("Pal_aha", "Pālaha")
.Replace(">ka mea an_a", ">ka mea āna")
@@ -181,9 +184,14 @@ protected override string CleanSourceHtml(string s)
.Replace(">Ma kahi maikaʻi e paʻawela ana n_", ">Ma kahi maikaʻi e paʻawela ana nō")
.Replace(">p_u.ʻulu kaua ", ">pū.ʻulu kaua ")
.Replace(">Kō wai kaʻa k_elā?", ">Kō wai kaʻa kēlā?")
+ .Replace("A spindly banana . . ,", "A spindly banana …,")
// Missing definition number fixes
.Replace(" n. Name of a large valley on", " 1. n. Name of a large valley on")
;
+ // Fix Nānā references
+ s = Regex.Replace(s, @"Nānā;? (\d)", @"Nānā. $1");
+ s = Regex.Replace(s, @"Nānā;? (\d)", @"Nānā. $1");
+ return s;
}
protected override bool IsEntryNode(HtmlNode node)
@@ -206,125 +214,122 @@ protected override string FinalCleanValue(string value)
return StringUtils.FixSentenceSpacing(value);
}
- protected override void AddAbbreviations(OutputDictBase dict)
+ protected override void AddAbbreviations(DictionaryBase dict)
{
- dict.Abbreviations.AddRange(new OutputAbbreviation[]
- {
- new OutputAbbreviation(dict, "And.", "Andrews dictionary, 1865; reference is given only if no evidence is available other than that in Andrews and Andrews-Parker (AP)"),
- new OutputAbbreviation(dict, "AP", "Andrews-Parker dictionary, 1922; reference is given only if no evidence is available other than that in Andrews (And.) and Andrews-Parker"),
- new OutputAbbreviation(dict, "Cap.", "beginning with a capital letter"),
- new OutputAbbreviation(dict, "caus/sim.", "causative/simulative", AbbreviationType.Grammatical),
- new OutputAbbreviation(dict, "cf.", "compare", AbbreviationType.Auxiliary),
- new OutputAbbreviation(dict, "conj.", "conjunction", AbbreviationType.Grammatical),
- new OutputAbbreviation(dict, "demon.", "demonstrative", AbbreviationType.Grammatical),
- new OutputAbbreviation(dict, "Eng.", "word borrowed from English"),
- new OutputAbbreviation(dict, "ex.", "example, examples", AbbreviationType.Auxiliary),
- new OutputAbbreviation(dict, "f.", "form (in names of plants)"),
- new OutputAbbreviation(dict, "fig.", "figuratively"),
- new OutputAbbreviation(dict, "For.", "Fornander, Hawaiian Antiquities (For. 4:297 = Fornander Vol. 4, p. 297)"),
- new OutputAbbreviation(dict, "FS", "Elbert, Selections from Fornander"),
- new OutputAbbreviation(dict, "GP", "Green and Pukui, Legend of Kawelo"),
- new OutputAbbreviation(dict, "Gr.", "word probably borrowed from Greek"),
- new OutputAbbreviation(dict, "Gram.", "Elbert and Pukui, Hawaiian Grammar"),
- new OutputAbbreviation(dict, "Heb.", "word probably borrowed from Hebrew"),
- new OutputAbbreviation(dict, "HM", "Beckwith, Hawaiian Mythology"),
- new OutputAbbreviation(dict, "HP", "Handy, Hawaiian Planter"),
- new OutputAbbreviation(dict, "Ii", "Ii, Fragments of Hawaiian History"),
- new OutputAbbreviation(dict, "interr.", "interrogative", AbbreviationType.Grammatical),
- new OutputAbbreviation(dict, "interj.", "interjection", AbbreviationType.Grammatical),
- new OutputAbbreviation(dict, "Kam. 1964", "Kamakau, Ka Poʻe Kahiko"),
- new OutputAbbreviation(dict, "Kam. 1976", "Kamakau, The Works of the People of Old"),
- new OutputAbbreviation(dict, "Kel.", "Kelekona, Kaluaikoolau"),
- new OutputAbbreviation(dict, "Kep.", "Beckwith, Kepelino"),
- new OutputAbbreviation(dict, "KJV", "King James Version of the Bible"),
- new OutputAbbreviation(dict, "KL.", "Beckwith, Kumulipo"),
- new OutputAbbreviation(dict, "Laie", "Beckwith, Laieikawai"),
- new OutputAbbreviation(dict, "lit.", "literally"),
- new OutputAbbreviation(dict, "loc.n.", "locative noun", AbbreviationType.Grammatical),
- new OutputAbbreviation(dict, "Malo", "Malo, Hawaiian Antiquities, 1951"),
- new OutputAbbreviation(dict, "MK", "Ke Alanui o ka Lani, Oia ka Manuale Kakolika"),
- new OutputAbbreviation(dict, "n.v.", "noun-verb", AbbreviationType.Grammatical),
- new OutputAbbreviation(dict, "n.", "noun", AbbreviationType.Grammatical),
- new OutputAbbreviation(dict, "Nak.", "Nakuina, Moolelo Hawaii ..."),
- new OutputAbbreviation(dict, "Nānā", "Pukui, Haertig, Lee, Nānā i ke Kumu"),
- new OutputAbbreviation(dict, "Neal", "Neal, In Gardens of Hawaii, 1965"),
- new OutputAbbreviation(dict, "num.", "numeral", AbbreviationType.Grammatical),
- new OutputAbbreviation(dict, "par.", "particle", AbbreviationType.Grammatical),
- new OutputAbbreviation(dict, "pas/imp.", "passive/imperative", AbbreviationType.Grammatical),
- new OutputAbbreviation(dict, "PH", "Emerson, Pele and Hiiaka"),
- new OutputAbbreviation(dict, "pl.", "plural", AbbreviationType.Grammatical),
- new OutputAbbreviation(dict, "PCP", "Proto Central Polynesian"),
- new OutputAbbreviation(dict, "PEP", "Proto East Polynesian"),
- new OutputAbbreviation(dict, "PNP", "Proto Nuclear Polynesian"),
- new OutputAbbreviation(dict, "poss.", "possessive", AbbreviationType.Grammatical),
- new OutputAbbreviation(dict, "PPN", "Proto Polynesian"),
- new OutputAbbreviation(dict, "prep.", "preposition", AbbreviationType.Grammatical),
- new OutputAbbreviation(dict, "RC", "Kamakau, Ruling Chiefs"),
- new OutputAbbreviation(dict, "redup.", "reduplication (for meanings of reduplications, see Gram. 6.2.2)"),
- new OutputAbbreviation(dict, "RSV", "Holy Bible, Revised Standard Version"),
- new OutputAbbreviation(dict, "sg.", "singular", AbbreviationType.Grammatical),
- new OutputAbbreviation(dict, "sp., spp.", "species"),
- new OutputAbbreviation(dict, "TC", "Taro Collection"),
- new OutputAbbreviation(dict, "UL", "Emerson, Unwritten Literature"),
- new OutputAbbreviation(dict, "v.", "verb", AbbreviationType.Grammatical),
- new OutputAbbreviation(dict, "var.", "variant, variety"),
- new OutputAbbreviation(dict, "nvi.", "noun-intransitive verb", AbbreviationType.Grammatical),
- new OutputAbbreviation(dict, "nvs.", "noun-stative verb", AbbreviationType.Grammatical),
- new OutputAbbreviation(dict, "nvt.", "noun-transitive verb", AbbreviationType.Grammatical),
- new OutputAbbreviation(dict, "vi.", "intransitive verb", AbbreviationType.Grammatical),
- new OutputAbbreviation(dict, "vs.", "stative verb", AbbreviationType.Grammatical),
- new OutputAbbreviation(dict, "vt.", "transitive verb", AbbreviationType.Grammatical),
- new OutputAbbreviation(dict, "Am.", "Amosa (Amos)"),
- new OutputAbbreviation(dict, "Dan.", "Daniela (Daniel)"),
- new OutputAbbreviation(dict, "Epeso", "(Ephesians)"),
- new OutputAbbreviation(dict, "Eset.", "Esetera (Esther)"),
- new OutputAbbreviation(dict, "Ezek.", "Ezekiela (Ezekiel)"),
- new OutputAbbreviation(dict, "Ezera", "(Ezra)"),
- new OutputAbbreviation(dict, "Gal.", "Galatia (Galatians)"),
- new OutputAbbreviation(dict, "Hagai", "(Haggai)"),
- new OutputAbbreviation(dict, "Hal.", "Halelu (Psalms)"),
- new OutputAbbreviation(dict, "Heb.", "Hebera (Hebrews)"),
- new OutputAbbreviation(dict, "Hoik.", "Hoikeana (Revelation)"),
- new OutputAbbreviation(dict, "Hos.", "Hosea (Hosea)"),
- new OutputAbbreviation(dict, "Iak.", "Iakobo (James)"),
- new OutputAbbreviation(dict, "Ier.", "Ieremia (Jeremiah)"),
- new OutputAbbreviation(dict, "Ioane", "(John)"),
- new OutputAbbreviation(dict, "Ioba", "(Job)"),
- new OutputAbbreviation(dict, "Ioela", "(Joel)"),
- new OutputAbbreviation(dict, "Ios.", "Iosua (Joshua)"),
- new OutputAbbreviation(dict, "Isa.", "Isaia (Isaiah)"),
- new OutputAbbreviation(dict, "Iuda", "(Jude)"),
- new OutputAbbreviation(dict, "Kanl.", "Kanawailua (Deuteronomy)"),
- new OutputAbbreviation(dict, "Kekah.", "Kekahuna (Ecclesiastes)"),
- new OutputAbbreviation(dict, "Kin.", "Kinohi (Genesis)"),
- new OutputAbbreviation(dict, "Kol.", "Kolosa (Colosians)"),
- new OutputAbbreviation(dict, "Kor.", "Korineto (Corinthians)"),
- new OutputAbbreviation(dict, "Luka", "(Luke)"),
- new OutputAbbreviation(dict, "Lunk.", "Lunakanawai (Judges)"),
- new OutputAbbreviation(dict, "Mal.", "Malaki (Malachi)"),
- new OutputAbbreviation(dict, "Mar.", "Mareko (Mark)"),
- new OutputAbbreviation(dict, "Mat.", "Mataio (Matthew)"),
- new OutputAbbreviation(dict, "Mele", "Mele a Solomona (Songs of Solomon)"),
- new OutputAbbreviation(dict, "Mika", "(Micah)"),
- new OutputAbbreviation(dict, "Nah.", "Nahelu (Numbers)"),
- new OutputAbbreviation(dict, "Nal.", "Nalii (Kings)"),
- new OutputAbbreviation(dict, "Neh.", "Nehemia (Nehemia)"),
- new OutputAbbreviation(dict, "Oih.", "Oihana (Acts)"),
- new OutputAbbreviation(dict, "Oihk.", "Oihanakahuna (Leviticus)"),
- new OutputAbbreviation(dict, "Oihn.", "Oihanaalii (Chronicles)"),
- new OutputAbbreviation(dict, "Pet.", "Petero (Peter)"),
- new OutputAbbreviation(dict, "Pilipi", "(Philippians)"),
- new OutputAbbreviation(dict, "Puk.", "Pukaana (Exodus)"),
- new OutputAbbreviation(dict, "Roma", "(Romans)"),
- new OutputAbbreviation(dict, "Ruta", "(Ruth)"),
- new OutputAbbreviation(dict, "Sam.", "Samuela (Samuel)"),
- new OutputAbbreviation(dict, "Sol.", "Solomona (Proverbs)"),
- new OutputAbbreviation(dict, "Tes.", "Tesalonike (Thessalonians)"),
- new OutputAbbreviation(dict, "Tim.", "Timoteo (Timothy)"),
- new OutputAbbreviation(dict, "Tito", "(Titus)"),
- new OutputAbbreviation(dict, "Zek.", "Zekaria (Zechariah)"),
- new OutputAbbreviation(dict, "Zep.", "Zepania (Zephaniah)"),
- });
+ dict.AddAbbreviation("And.", "Andrews dictionary, 1865; reference is given only if no evidence is available other than that in Andrews and Andrews-Parker (AP)");
+ dict.AddAbbreviation("AP", "Andrews-Parker dictionary, 1922; reference is given only if no evidence is available other than that in Andrews (And.) and Andrews-Parker");
+ dict.AddAbbreviation("Cap.", "beginning with a capital letter");
+ dict.AddAbbreviation("caus/sim.", "causative/simulative", AbbreviationType.Grammatical);
+ dict.AddAbbreviation("cf.", "compare", AbbreviationType.Auxiliary);
+ dict.AddAbbreviation("conj.", "conjunction", AbbreviationType.Grammatical);
+ dict.AddAbbreviation("demon.", "demonstrative", AbbreviationType.Grammatical);
+ dict.AddAbbreviation("Eng.", "word borrowed from English");
+ dict.AddAbbreviation("ex.", "example, examples", AbbreviationType.Auxiliary);
+ dict.AddAbbreviation("f.", "form (in names of plants)");
+ dict.AddAbbreviation("fig.", "figuratively");
+ dict.AddAbbreviation("For.", "Fornander, Hawaiian Antiquities (For. 4:297 = Fornander Vol. 4, p. 297)");
+ dict.AddAbbreviation("FS", "Elbert, Selections from Fornander");
+ dict.AddAbbreviation("GP", "Green and Pukui, Legend of Kawelo");
+ dict.AddAbbreviation("Gr.", "word probably borrowed from Greek");
+ dict.AddAbbreviation("Gram.", "Elbert and Pukui, Hawaiian Grammar");
+ dict.AddAbbreviation("Heb.", "word probably borrowed from Hebrew");
+ dict.AddAbbreviation("HM", "Beckwith, Hawaiian Mythology");
+ dict.AddAbbreviation("HP", "Handy, Hawaiian Planter");
+ dict.AddAbbreviation("Ii", "Ii, Fragments of Hawaiian History");
+ dict.AddAbbreviation("interr.", "interrogative", AbbreviationType.Grammatical);
+ dict.AddAbbreviation("interj.", "interjection", AbbreviationType.Grammatical);
+ dict.AddAbbreviation("Kam. 1964", "Kamakau, Ka Poʻe Kahiko");
+ dict.AddAbbreviation("Kam. 1976", "Kamakau, The Works of the People of Old");
+ dict.AddAbbreviation("Kel.", "Kelekona, Kaluaikoolau");
+ dict.AddAbbreviation("Kep.", "Beckwith, Kepelino");
+ dict.AddAbbreviation("KJV", "King James Version of the Bible");
+ dict.AddAbbreviation("KL.", "Beckwith, Kumulipo");
+ dict.AddAbbreviation("Laie", "Beckwith, Laieikawai");
+ dict.AddAbbreviation("lit.", "literally");
+ dict.AddAbbreviation("loc.n.", "locative noun", AbbreviationType.Grammatical);
+ dict.AddAbbreviation("Malo", "Malo, Hawaiian Antiquities, 1951");
+ dict.AddAbbreviation("MK", "Ke Alanui o ka Lani, Oia ka Manuale Kakolika");
+ dict.AddAbbreviation("n.v.", "noun-verb", AbbreviationType.Grammatical);
+ dict.AddAbbreviation("n.", "noun", AbbreviationType.Grammatical);
+ dict.AddAbbreviation("Nak.", "Nakuina, Moolelo Hawaii ...");
+ dict.AddAbbreviation("Nānā.", "Pukui, Haertig, Lee, Nānā i ke Kumu");
+ dict.AddAbbreviation("Neal", "Neal, In Gardens of Hawaii, 1965");
+ dict.AddAbbreviation("num.", "numeral", AbbreviationType.Grammatical);
+ dict.AddAbbreviation("par.", "particle", AbbreviationType.Grammatical);
+ dict.AddAbbreviation("pas/imp.", "passive/imperative", AbbreviationType.Grammatical);
+ dict.AddAbbreviation("PH", "Emerson, Pele and Hiiaka");
+ dict.AddAbbreviation("pl.", "plural", AbbreviationType.Grammatical);
+ dict.AddAbbreviation("PCP", "Proto Central Polynesian");
+ dict.AddAbbreviation("PEP", "Proto East Polynesian");
+ dict.AddAbbreviation("PNP", "Proto Nuclear Polynesian");
+ dict.AddAbbreviation("poss.", "possessive", AbbreviationType.Grammatical);
+ dict.AddAbbreviation("PPN", "Proto Polynesian");
+ dict.AddAbbreviation("prep.", "preposition", AbbreviationType.Grammatical);
+ dict.AddAbbreviation("RC", "Kamakau, Ruling Chiefs");
+ dict.AddAbbreviation("redup.", "reduplication (for meanings of reduplications, see Gram. 6.2.2)");
+ dict.AddAbbreviation("RSV", "Holy Bible, Revised Standard Version");
+ dict.AddAbbreviation("sg.", "singular", AbbreviationType.Grammatical);
+ dict.AddAbbreviation("sp., spp.", "species");
+ dict.AddAbbreviation("TC", "Taro Collection");
+ dict.AddAbbreviation("UL", "Emerson, Unwritten Literature");
+ dict.AddAbbreviation("v.", "verb", AbbreviationType.Grammatical);
+ dict.AddAbbreviation("var.", "variant, variety");
+ dict.AddAbbreviation("nvi.", "noun-intransitive verb", AbbreviationType.Grammatical);
+ dict.AddAbbreviation("nvs.", "noun-stative verb", AbbreviationType.Grammatical);
+ dict.AddAbbreviation("nvt.", "noun-transitive verb", AbbreviationType.Grammatical);
+ dict.AddAbbreviation("vi.", "intransitive verb", AbbreviationType.Grammatical);
+ dict.AddAbbreviation("vs.", "stative verb", AbbreviationType.Grammatical);
+ dict.AddAbbreviation("vt.", "transitive verb", AbbreviationType.Grammatical);
+ dict.AddAbbreviation("Am.", "Amosa (Amos)");
+ dict.AddAbbreviation("Dan.", "Daniela (Daniel)");
+ dict.AddAbbreviation("Epeso", "(Ephesians)");
+ dict.AddAbbreviation("Eset.", "Esetera (Esther)");
+ dict.AddAbbreviation("Ezek.", "Ezekiela (Ezekiel)");
+ dict.AddAbbreviation("Ezera", "(Ezra)");
+ dict.AddAbbreviation("Gal.", "Galatia (Galatians)");
+ dict.AddAbbreviation("Hagai", "(Haggai)");
+ dict.AddAbbreviation("Hal.", "Halelu (Psalms)");
+ dict.AddAbbreviation("Heb.", "Hebera (Hebrews)");
+ dict.AddAbbreviation("Hoik.", "Hoikeana (Revelation)");
+ dict.AddAbbreviation("Hos.", "Hosea (Hosea)");
+ dict.AddAbbreviation("Iak.", "Iakobo (James)");
+ dict.AddAbbreviation("Ier.", "Ieremia (Jeremiah)");
+ dict.AddAbbreviation("Ioane", "(John)");
+ dict.AddAbbreviation("Ioba", "(Job)");
+ dict.AddAbbreviation("Ioela", "(Joel)");
+ dict.AddAbbreviation("Ios.", "Iosua (Joshua)");
+ dict.AddAbbreviation("Isa.", "Isaia (Isaiah)");
+ dict.AddAbbreviation("Iuda", "(Jude)");
+ dict.AddAbbreviation("Kanl.", "Kanawailua (Deuteronomy)");
+ dict.AddAbbreviation("Kekah.", "Kekahuna (Ecclesiastes)");
+ dict.AddAbbreviation("Kin.", "Kinohi (Genesis)");
+ dict.AddAbbreviation("Kol.", "Kolosa (Colosians)");
+ dict.AddAbbreviation("Kor.", "Korineto (Corinthians)");
+ dict.AddAbbreviation("Luka", "(Luke)");
+ dict.AddAbbreviation("Lunk.", "Lunakanawai (Judges)");
+ dict.AddAbbreviation("Mal.", "Malaki (Malachi)");
+ dict.AddAbbreviation("Mar.", "Mareko (Mark)");
+ dict.AddAbbreviation("Mat.", "Mataio (Matthew)");
+ dict.AddAbbreviation("Mele", "Mele a Solomona (Songs of Solomon)");
+ dict.AddAbbreviation("Mika", "(Micah)");
+ dict.AddAbbreviation("Nah.", "Nahelu (Numbers)");
+ dict.AddAbbreviation("Nal.", "Nalii (Kings)");
+ dict.AddAbbreviation("Neh.", "Nehemia (Nehemia)");
+ dict.AddAbbreviation("Oih.", "Oihana (Acts)");
+ dict.AddAbbreviation("Oihk.", "Oihanakahuna (Leviticus)");
+ dict.AddAbbreviation("Oihn.", "Oihanaalii (Chronicles)");
+ dict.AddAbbreviation("Pet.", "Petero (Peter)");
+ dict.AddAbbreviation("Pilipi", "(Philippians)");
+ dict.AddAbbreviation("Puk.", "Pukaana (Exodus)");
+ dict.AddAbbreviation("Roma", "(Romans)");
+ dict.AddAbbreviation("Ruta", "(Ruth)");
+ dict.AddAbbreviation("Sam.", "Samuela (Samuel)");
+ dict.AddAbbreviation("Sol.", "Solomona (Proverbs)");
+ dict.AddAbbreviation("Tes.", "Tesalonike (Thessalonians)");
+ dict.AddAbbreviation("Tim.", "Timoteo (Timothy)");
+ dict.AddAbbreviation("Tito", "(Titus)");
+ dict.AddAbbreviation("Zek.", "Zekaria (Zechariah)");
+ dict.AddAbbreviation("Zep.", "Zepania (Zephaniah)");
}
}
}
diff --git a/src/HawDict/Output/OutputAbbreviation.cs b/src/HawDict/Output/OutputAbbreviation.cs
deleted file mode 100644
index 37f8d99..0000000
--- a/src/HawDict/Output/OutputAbbreviation.cs
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright (c) Jon Thysell
-// Licensed under the MIT License.
-
-using System;
-
-namespace HawDict
-{
- public class OutputAbbreviation
- {
- public OutputDictBase OutputDict { get; private set; }
-
- public string Key { get; private set; } = null;
- public string Value { get; private set; } = null;
-
- public AbbreviationType AbbreviationType { get; private set; } = AbbreviationType.None;
-
- public string XdxfKey
- {
- get
- {
- string key = StringUtils.WrapInTag(StringUtils.EscapeForXml(Key), "abbr_k");
-
- if (char.IsLower(Key[0]) && Key.Length > 1)
- {
- key += StringUtils.WrapInTag(StringUtils.EscapeForXml(char.ToUpper(Key[0]) + Key.Substring(1)), "abbr_k");
- }
-
- return key;
- }
- }
-
- public string XdxfValue
- {
- get
- {
- string value = StringUtils.EscapeForXml(Value);
-
- value = StringUtils.WrapInTag(value, "abbr_v");
-
- return value;
- }
- }
-
- public OutputAbbreviation(OutputDictBase dict, string key, string value, AbbreviationType abbreviationType = AbbreviationType.None)
- {
- OutputDict = dict ?? throw new ArgumentNullException(nameof(dict));
-
- Key = !string.IsNullOrWhiteSpace(key) ? key.Trim() : throw new ArgumentNullException(nameof(key));
- Value = !string.IsNullOrWhiteSpace(value) ? value.Trim() : throw new ArgumentNullException(nameof(value));
-
- AbbreviationType = abbreviationType;
- }
- }
-
- public enum AbbreviationType
- {
- None,
- Grammatical,
- Stylistic,
- Knowledge,
- Auxiliary,
- Other
- }
-}
diff --git a/src/HawDict/Output/OutputArticle.cs b/src/HawDict/Output/OutputArticle.cs
deleted file mode 100644
index 416184a..0000000
--- a/src/HawDict/Output/OutputArticle.cs
+++ /dev/null
@@ -1,258 +0,0 @@
-// Copyright (c) Jon Thysell
-// Licensed under the MIT License.
-
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text.RegularExpressions;
-
-namespace HawDict
-{
- public class OutputArticle
- {
- public OutputDictBase OutputDict { get; private set; }
-
- public string Key { get; private set; } = null;
- public string Value { get; private set; } = null;
-
- public string XdxfKey
- {
- get
- {
- string xdxfKey = "";
- foreach (string key in Key.Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries))
- {
- string rawKey = StringUtils.EscapeForXml(key);
- xdxfKey += GetXdxfKey(rawKey);
- }
-
- return xdxfKey;
- }
- }
-
- public string XdxfValue
- {
- get
- {
- return GetXdxfValue();
- }
- }
-
- public string StarDictKey
- {
- get
- {
- return Key;
- }
- }
-
- public IEnumerable StarDictKeySynonyms
- {
- get
- {
- if (_starDictKeySynonyms is null)
- {
- _starDictKeySynonyms = new HashSet
- {
- StarDictKey
- };
-
- foreach (string key in StarDictKey.Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries))
- {
- foreach (string synonym in GetSynonyms(key.Trim()))
- {
- _starDictKeySynonyms.Add(synonym);
- }
- }
-
- _starDictKeySynonyms.Remove(StarDictKey);
- }
- return _starDictKeySynonyms;
- }
- }
- private HashSet _starDictKeySynonyms;
-
- public string StarDictValue
- {
- get
- {
- string value = GetXdxfValue(true);
-
- value = value
- .Replace("", "").Replace("", "");
-
- value = value
- .Replace("", "").Replace("", "")
- .Replace("", "").Replace(" ", "");
-
- value = Regex.Replace(value, "([0-9]+)\\. ", " $1. ");
-
- if (value.Contains("2. "))
- {
- // Fix bolding number one for pre-text
- value = Regex.Replace(value, " (.*[^>])1\\. ", " $11. ");
- }
-
- return value;
- }
- }
-
- public OutputArticle(OutputDictBase dict, string key, string value)
- {
- OutputDict = dict ?? throw new ArgumentNullException(nameof(dict));
-
- Key = !string.IsNullOrWhiteSpace(key) ? key.Trim() : throw new ArgumentNullException(nameof(key));
- Value = !string.IsNullOrWhiteSpace(value) ? value.Trim() : throw new ArgumentNullException(nameof(value));
- }
-
- private static string GetXdxfKey(string key)
- {
- key = StringUtils.WrapInTag(key, StringUtils.SyllableDotUtf8, "opt");
- key = StringUtils.WrapInTag(key, ".", "opt");
-
- key = StringUtils.WrapInTag(key, "k");
-
- return key;
- }
-
- private string GetXdxfValue(bool keepDefinitionNumbers = false)
- {
- string value = StringUtils.EscapeForXml(Value);
-
- // Add abbreviation tags
- foreach (OutputAbbreviation abbreviation in OutputDict.Abbreviations)
- {
- value = AddXdxfAbbreviationTags(value, abbreviation.Key);
-
- if (char.IsLower(abbreviation.Key[0]) && abbreviation.Key.Length > 1)
- {
- value = AddXdxfAbbreviationTags(value, char.ToUpper(abbreviation.Key[0]) + abbreviation.Key.Substring(1));
- }
- }
-
- IEnumerable definitions = GetDefinitions(value, keepDefinitionNumbers);
-
- if (definitions.Count() > 1)
- {
- value = string.Join("", definitions);
- value = $"{value}";
- }
- else
- {
- value = $"{value}";
- }
-
- return value;
- }
-
- private static IEnumerable GetDefinitions(string value, bool keepDefinitionNumbers, int num = 1)
- {
- string numStr = $"{num}. ";
- string nextNumStr = $" {num + 1}. ";
-
- int foundIndex = value.IndexOf(numStr);
- int nextFoundIndex = value.IndexOf(nextNumStr, foundIndex + 1);
-
- if (num == 1 && foundIndex > 0 && nextFoundIndex > 0)
- {
- // Numbered definition with some pre-text
- if (keepDefinitionNumbers)
- {
- yield return value.Substring(0, nextFoundIndex);
- }
- else
- {
- yield return value[0..foundIndex] + value[(foundIndex + numStr.Length)..nextFoundIndex];
- }
- }
- else if (foundIndex == 0 && nextFoundIndex > 0)
- {
- // Numbered definition without pre-text
- if (keepDefinitionNumbers)
- {
- yield return value[0..nextFoundIndex];
- }
- else
- {
- yield return value[numStr.Length..nextFoundIndex];
- }
- }
- else if (foundIndex == 0)
- {
- // Last numbered definition
- if (keepDefinitionNumbers)
- {
- yield return value;
- }
- else
- {
- yield return value.Substring(numStr.Length);
- }
- }
- else
- {
- // No numbers, just one definition
- yield return value;
- }
-
- if (nextFoundIndex > 0)
- {
- foreach (string def in GetDefinitions(value.Substring(nextFoundIndex + 1), keepDefinitionNumbers, num + 1))
- {
- yield return def;
- }
- }
- }
-
- private static string AddXdxfAbbreviationTags(string value, string abbreviation)
- {
- value = value.Replace($" {abbreviation} ", $" {abbreviation} ");
- value = value.Replace($"({abbreviation} ", $"({abbreviation} ");
- value = value.Replace($" {abbreviation})", $" {abbreviation})");
- value = value.Replace($"({abbreviation})", $"({abbreviation})");
-
- value = value.Replace($"{abbreviation}.", $"{abbreviation}.");
-
- value = value.Replace($"{abbreviation};", $"{abbreviation};");
-
- value = value.Replace($"{abbreviation},", $"{abbreviation},");
- value = value.Replace($"({abbreviation},", $"({abbreviation},");
-
- value = value.Replace($"{abbreviation}/", $"{abbreviation}/");
- value = value.Replace($"/{abbreviation}", $"/{abbreviation}");
-
- value = value.Replace($"—{abbreviation}", $"—{abbreviation} ");
-
- if (value.StartsWith(abbreviation + " "))
- {
- value = $"{abbreviation}{value.Substring(abbreviation.Length)}";
- }
-
- if (value.EndsWith(" " + abbreviation))
- {
- value = $"{value.Substring(0, value.Length - abbreviation.Length)}{abbreviation}";
- }
-
- return value;
- }
-
- private static HashSet GetSynonyms(string key)
- {
- HashSet synonyms = new HashSet
- {
- key
- };
-
- string s = key.Replace(StringUtils.SyllableDotUtf8, "").Replace(".", "").Replace("*", "").Replace("-", "");
-
- synonyms.Add(s);
- synonyms.Add(StringUtils.ReplaceOkina(s));
- synonyms.Add(StringUtils.ReplaceOkina(s, ""));
- synonyms.Add(StringUtils.RemoveDiacritics(s));
- synonyms.Add(StringUtils.ReplaceOkina(StringUtils.RemoveDiacritics(s)));
- synonyms.Add(StringUtils.ReplaceOkina(StringUtils.RemoveDiacritics(s), ""));
-
- return synonyms;
- }
- }
-}
diff --git a/src/HawDict/Output/OutputDictBase.cs b/src/HawDict/Output/OutputDictBase.cs
deleted file mode 100644
index 688d959..0000000
--- a/src/HawDict/Output/OutputDictBase.cs
+++ /dev/null
@@ -1,44 +0,0 @@
-// Copyright (c) Jon Thysell
-// Licensed under the MIT License.
-
-using System;
-using System.Collections.Generic;
-
-namespace HawDict
-{
- public abstract class OutputDictBase
- {
- public string ID { get; private set; }
-
- public string FormatType { get; private set; }
-
- public TranslationType TranslationType { get; private set; }
-
- #region MetaData
-
- public string Title { get; set; } = null;
-
- public string Description { get; set; } = null;
-
- public DateTime CreationDateTime { get; private set; } = DateTime.UtcNow;
-
- public List Authors { get; private set; } = new List();
-
- public static string FileVersion => AppInfo.Version;
-
- #endregion
-
- public List Articles { get; private set; } = new List();
-
- public List Abbreviations { get; private set; } = new List();
-
- public OutputDictBase(string id, string formatType, TranslationType translationType)
- {
- ID = !string.IsNullOrWhiteSpace(id) ? id : throw new ArgumentNullException(nameof(id));
- FormatType = !string.IsNullOrWhiteSpace(formatType) ? formatType : throw new ArgumentNullException(nameof(formatType));
- TranslationType = translationType;
- }
-
- public abstract void Save(string dictDir);
- }
-}
diff --git a/src/HawDict/Output/StarDictDictionary.cs b/src/HawDict/Output/StarDictDictionary.cs
deleted file mode 100644
index 2396f2a..0000000
--- a/src/HawDict/Output/StarDictDictionary.cs
+++ /dev/null
@@ -1,196 +0,0 @@
-// Copyright (c) Jon Thysell
-// Licensed under the MIT License.
-
-using System;
-using System.Collections.Generic;
-using System.IO;
-using System.Linq;
-using System.Text;
-
-namespace HawDict
-{
- public class StarDictDictionary : OutputDictBase
- {
- private static readonly StarDictArticleComparer _keyComparer = new StarDictArticleComparer();
-
- public StarDictDictionary(string id, TranslationType translationType) : base(id, "StarDict", translationType) { }
-
- public override void Save(string dictDir)
- {
- if (string.IsNullOrWhiteSpace(dictDir))
- {
- throw new ArgumentNullException(nameof(dictDir));
- }
-
- SaveDataFiles(dictDir, out long idxFileSize, out int synWordCount);
-
- SaveIfoFile(dictDir, idxFileSize, synWordCount);
- }
-
- private void SaveDataFiles(string dictDir, out long idxFileSize, out int synWordCount)
- {
- string dictFile = Path.Combine(dictDir, $"{ID}.{TranslationType}.StarDict.dict");
- string idxFile = Path.Combine(dictDir, $"{ID}.{TranslationType}.StarDict.idx");
- string synFile = Path.Combine(dictDir, $"{ID}.{TranslationType}.StarDict.syn");
-
- BinaryWriter dictWriter = new BinaryWriter(new FileStream(dictFile, FileMode.Create), Encoding.UTF8);
- BinaryWriter idxWriter = new BinaryWriter(new FileStream(idxFile, FileMode.Create), Encoding.UTF8);
- BinaryWriter synWriter = new BinaryWriter(new FileStream(synFile, FileMode.Create), Encoding.UTF8);
-
- Dictionary articleIndexes = new Dictionary();
-
- uint index = 0;
- foreach (OutputArticle article in Articles.OrderBy(a => a.StarDictKey, _keyComparer))
- {
- long dictArticleOffset = dictWriter.BaseStream.Length;
-
- idxWriter.Write(article.StarDictKey.ToCharArray());
- idxWriter.Write('\0');
-
- dictWriter.Write(article.StarDictValue.ToCharArray());
-
- dictWriter.Flush();
-
- long dictArticleLength = dictWriter.BaseStream.Length - dictArticleOffset;
-
- WriteBigEndian(idxWriter, (uint)dictArticleOffset);
- WriteBigEndian(idxWriter, (uint)dictArticleLength);
-
- idxWriter.Flush();
-
- articleIndexes[article] = index;
- index++;
- }
-
- dictWriter.Flush();
- dictWriter.Close();
-
- idxWriter.Flush();
- idxFileSize = idxWriter.BaseStream.Length;
- idxWriter.Close();
-
- List> synonyms = new List>();
-
- foreach (KeyValuePair articleIndex in articleIndexes)
- {
- uint keyIndex = articleIndex.Value;
- foreach (string synonym in articleIndex.Key.StarDictKeySynonyms)
- {
- synonyms.Add(new KeyValuePair(synonym, keyIndex));
- }
- }
-
- foreach (KeyValuePair synonym in synonyms.OrderBy(kvp => kvp.Key, _keyComparer))
- {
- synWriter.Write(synonym.Key.ToCharArray());
- synWriter.Write('\0');
-
- WriteBigEndian(synWriter, synonym.Value);
- }
-
- synWordCount = synonyms.Count;
-
- synWriter.Flush();
- synWriter.Close();
- }
-
- private void SaveIfoFile(string dictDir, long idxFileSize, int synWordCount)
- {
- string ifoFile = Path.Combine(dictDir, $"{ID}.{TranslationType}.StarDict.ifo");
-
- using BinaryWriter ifoWriter = new BinaryWriter(new FileStream(ifoFile, FileMode.Create), Encoding.UTF8);
-
- WriteLine(ifoWriter, "StarDict's dict ifo file");
- WriteLine(ifoWriter, "version=2.4.2");
-
- WriteLine(ifoWriter, "bookname={0}", Title);
- WriteLine(ifoWriter, "wordcount={0}", Articles.Count);
- WriteLine(ifoWriter, "synwordcount={0}", synWordCount);
- WriteLine(ifoWriter, "idxfilesize={0}", idxFileSize);
- WriteLine(ifoWriter, "sametypesequence=h");
-
- WriteLine(ifoWriter, "author={0}", string.Join(", ", Authors));
- WriteLine(ifoWriter, "description={0}", Description);
- WriteLine(ifoWriter, "date={0}", CreationDateTime.ToString("yyyy.MM.dd"));
- }
-
- private static void WriteLine(BinaryWriter bw, string line, params object[] args)
- {
- bw.Write(string.Format(line, args).ToCharArray());
- bw.Write('\r');
- bw.Write('\n');
- }
-
- private static void WriteBigEndian(BinaryWriter bw, uint value)
- {
- byte[] bytes = BitConverter.GetBytes(value);
-
- if (BitConverter.IsLittleEndian)
- {
- Array.Reverse(bytes);
- }
-
- bw.Write(bytes);
- }
-
- private class StarDictArticleComparer : IComparer
- {
- public int Compare(string x, string y)
- {
- int result = AsciiStrCmp(x, y);
- return result == 0 ? StrCmp(x, y) : result;
- }
-
- private static int AsciiStrCmp(string x, string y)
- {
- int[] bx = Encoding.UTF8.GetBytes(x).Select(b => (int)(b)).ToArray();
- int[] by = Encoding.UTF8.GetBytes(y).Select(b => (int)(b)).ToArray();
-
- int minLength = Math.Min(bx.Length, by.Length);
-
- for (int i = 0; i < minLength; i++)
- {
- int cx = AsciiLower(bx[i]);
- int cy = AsciiLower(by[i]);
-
- if (cx != cy)
- {
- return cx - cy;
- }
- }
-
- return bx.Length - by.Length;
- }
-
- private static int AsciiLower(int c)
- {
- if (c >= 'A' && c <= 'Z')
- {
- return (c - 'A' + 'a');
- }
- return c;
- }
-
- private static int StrCmp(string x, string y)
- {
- int[] bx = Encoding.UTF8.GetBytes(x).Select(b => (int)(b)).ToArray();
- int[] by = Encoding.UTF8.GetBytes(y).Select(b => (int)(b)).ToArray();
-
- int minLength = Math.Min(bx.Length, by.Length);
-
- for (int i = 0; i < minLength; i++)
- {
- int cx = bx[i];
- int cy = by[i];
-
- if (cx != cy)
- {
- return cx - cy;
- }
- }
-
- return bx.Length - by.Length;
- }
- }
- }
-}
\ No newline at end of file
diff --git a/src/HawDict/Output/XdxfDictionary.cs b/src/HawDict/Output/XdxfDictionary.cs
deleted file mode 100644
index 9b0e7fc..0000000
--- a/src/HawDict/Output/XdxfDictionary.cs
+++ /dev/null
@@ -1,167 +0,0 @@
-// Copyright (c) Jon Thysell
-// Licensed under the MIT License.
-
-using System;
-using System.IO;
-using System.Text;
-using System.Xml;
-
-namespace HawDict
-{
- public class XdxfDictionary : OutputDictBase
- {
- #region MetaData
-
- public string FullTitle { get; set; } = null;
-
- public string SrcUrl { get; set; } = null;
-
- #endregion
-
- public XdxfDictionary(string id, TranslationType translationType) : base(id, "XDXF", translationType) { }
-
- public override void Save(string dictDir)
- {
- if (string.IsNullOrWhiteSpace(dictDir))
- {
- throw new ArgumentNullException(nameof(dictDir));
- }
-
- string xdxfFile = Path.Combine(dictDir, $"{ID}.{TranslationType}.dict.xdxf");
-
- using FileStream fs = new FileStream(xdxfFile, FileMode.Create);
-
- SaveDictFile(fs);
- }
-
- private void SaveDictFile(Stream output)
- {
- // Write to StringBuilder
- StringBuilder sb = new StringBuilder();
- using (XmlWriter xw = XmlWriter.Create(sb, new XmlWriterSettings() { Encoding = Encoding.UTF8, CloseOutput = false }))
- {
- xw.WriteStartDocument();
-
- xw.WriteStartElement("xdxf");
-
- xw.WriteAttributeString("format", "logical");
- xw.WriteAttributeString("revision", "33");
- xw.WriteAttributeString("lang_from", TranslationType == TranslationType.HawToEng ? "HAW" : "ENG");
- xw.WriteAttributeString("lang_to", TranslationType == TranslationType.HawToEng ? "ENG" : "HAW");
-
- WriteMetaInfoElements(xw);
-
- WriteArticles(xw);
-
- xw.WriteEndElement(); // xdxf
-
- xw.WriteEndDocument();
- }
-
- // Load from StringBuilder
- XmlDocument doc = new XmlDocument();
- doc.LoadXml(sb.ToString());
-
- // Write to stream
- using (XmlWriter xw = XmlWriter.Create(output, new XmlWriterSettings() { Encoding = Encoding.UTF8, Indent = true, CloseOutput = false }))
- {
- doc.Save(xw);
- }
- }
-
- private void WriteMetaInfoElements(XmlWriter xw)
- {
- xw.WriteStartElement("meta_info");
-
- xw.WriteElementString("title", Title);
-
- xw.WriteElementString("full_title", FullTitle);
-
- if (Authors.Count > 0)
- {
- xw.WriteStartElement("authors");
-
- foreach (string author in Authors)
- {
- WriteElementStringIfNotNull(xw, "author", author);
- }
-
- xw.WriteEndElement(); // authors
- }
-
- xw.WriteElementString("description", Description);
-
- if (Abbreviations.Count > 0)
- {
- xw.WriteStartElement("abbreviations");
-
- foreach (OutputAbbreviation abbreviation in Abbreviations)
- {
- xw.WriteStartElement("abbr_def");
-
- switch (abbreviation.AbbreviationType)
- {
- case AbbreviationType.Grammatical:
- xw.WriteAttributeString("type", "grm");
- break;
- case AbbreviationType.Stylistic:
- xw.WriteAttributeString("type", "stl");
- break;
- case AbbreviationType.Knowledge:
- xw.WriteAttributeString("type", "knl");
- break;
- case AbbreviationType.Auxiliary:
- xw.WriteAttributeString("type", "aux");
- break;
- case AbbreviationType.Other:
- xw.WriteAttributeString("type", "oth");
- break;
- }
-
- xw.WriteRaw(abbreviation.XdxfKey);
- xw.WriteRaw(abbreviation.XdxfValue);
-
- xw.WriteEndElement(); // abbr_def
- }
-
- xw.WriteEndElement(); // abbreviations
- }
-
- xw.WriteElementString("file_ver", FileVersion);
-
- xw.WriteElementString("creation_date", CreationDateTime.Date.ToString("dd-MM-yyyy"));
-
- WriteElementStringIfNotNull(xw, "dict_src_url", SrcUrl);
-
- xw.WriteEndElement(); // meta_info
- }
-
- private void WriteArticles(XmlWriter xw)
- {
- if (Articles.Count > 0)
- {
- xw.WriteStartElement("lexicon");
-
- foreach (OutputArticle article in Articles)
- {
- xw.WriteStartElement("ar");
-
- xw.WriteRaw(article.XdxfKey);
- xw.WriteRaw(article.XdxfValue);
-
- xw.WriteEndElement(); // ar
- }
-
- xw.WriteEndElement(); // lexicon
- }
- }
-
- private static void WriteElementStringIfNotNull(XmlWriter xw, string localName, string value)
- {
- if (!string.IsNullOrWhiteSpace(value))
- {
- xw.WriteElementString(localName, value);
- }
- }
- }
-}
diff --git a/src/HawDict/StringUtils.cs b/src/HawDict/StringUtils.cs
index b3f3c0a..ffdd7f9 100644
--- a/src/HawDict/StringUtils.cs
+++ b/src/HawDict/StringUtils.cs
@@ -141,54 +141,6 @@ public static string FixSentenceEnd(string s)
private static readonly Regex ListSplitterRegex = new Regex(@"([^\(][a-zāēīōū])([,;:])(ʻ?[a-zA-ZāēīōūĀĒĪŌŪʻ][^\)])", RegexOptions.Compiled);
private static readonly Regex SentenceSplitterRegex = new Regex(@"([a-zāēīōū])([\.\!\?])(ʻ?[A-ZĀĒĪŌŪ])", RegexOptions.Compiled);
- public static string EscapeForXml(string s)
- {
- if (string.IsNullOrWhiteSpace(s))
- {
- throw new ArgumentNullException(nameof(s));
- }
-
- return s
- .Replace("&", "&")
- .Replace("<", "<")
- .Replace(">", ">").Trim();
- }
-
- public static string WrapInTag(string s, string tag)
- {
- if (string.IsNullOrWhiteSpace(s))
- {
- throw new ArgumentNullException(nameof(s));
- }
-
- if (string.IsNullOrWhiteSpace(tag))
- {
- throw new ArgumentNullException(nameof(tag));
- }
-
- return $"<{tag}>{s}{tag}>";
- }
-
- public static string WrapInTag(string s, string target, string tag)
- {
- if (string.IsNullOrWhiteSpace(s))
- {
- throw new ArgumentNullException(nameof(s));
- }
-
- if (string.IsNullOrWhiteSpace(target))
- {
- throw new ArgumentNullException(nameof(target));
- }
-
- if (string.IsNullOrWhiteSpace(tag))
- {
- throw new ArgumentNullException(nameof(tag));
- }
-
- return s.Replace(target, WrapInTag(target, tag)).Trim();
- }
-
public static string ReplaceOkina(string s, string replacement = "'")
{
if (string.IsNullOrWhiteSpace(s))
|