Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# HawDict ChangeLog #

## next ##

* Switch to using QuickDict for StarDict and XDXF creation
* Fixed --format filtering issue

## v0.17.3 ##

* Fixed build break
Expand Down
2 changes: 1 addition & 1 deletion LICENSE.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
The MIT License (MIT)

Copyright (c) 2018-2024 Jon Thysell
Copyright (c) 2018-2025 Jon Thysell

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,4 +71,4 @@ HawDict is open-source under the MIT license.

HawDict does not include any copyrighted dictionary data - it is just a converter. HawDict uses the [Html Agility Pack](https://github.com/zzzprojects/html-agility-pack/) to download and parse the dictionary data from [Ulukau](https://ulukau.org/) at runtime. All dictionary data (terms, definitions, etc) is copyright their respective copyright owners.

HawDict Copyright (c) 2018-2024 Jon Thysell
HawDict Copyright (c) 2018-2025 Jon Thysell
2 changes: 1 addition & 1 deletion src/Directory.Build.props
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<Product>HawDict</Product>
<Company>Jon Thysell</Company>
<Authors>Jon Thysell</Authors>
<Copyright>Copyright © 2018-2024 Jon Thysell</Copyright>
<Copyright>Copyright © 2018-2025 Jon Thysell</Copyright>
<PackageLicenseFile>LICENSE.md</PackageLicenseFile>
<RepositoryUrl>https://github.com/jonthysell/HawDict</RepositoryUrl>
<TargetFramework>net6.0</TargetFramework>
Expand Down
4 changes: 2 additions & 2 deletions src/HawDict.sln
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 16
VisualStudioVersion = 16.0.31112.23
# Visual Studio Version 17
VisualStudioVersion = 17.14.36301.6
MinimumVisualStudioVersion = 10.0.40219.1
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "HawDict", "HawDict\HawDict.csproj", "{50C2D599-98A1-496B-8E0D-561071C54BE5}"
EndProject
Expand Down
2 changes: 2 additions & 0 deletions src/HawDict/HawDict.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
<AssemblyName>HawDict</AssemblyName>
<RootNamespace>HawDict</RootNamespace>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="HtmlAgilityPack" Version="1.11.42" />
<PackageReference Include="QuickDict" Version="0.9.0" />
</ItemGroup>
</Project>
168 changes: 131 additions & 37 deletions src/HawDict/Input/InputDictBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;

using QuickDict;

namespace HawDict
{
Expand All @@ -15,9 +18,9 @@ namespace HawDict
public enum OutputFormats
{
None,
CleanTxt,
StarDict,
Xdxf,
CleanTxt = 0x1,
StarDict = 0x2,
Xdxf = 0x4,
All = CleanTxt + StarDict + Xdxf,
}

Expand Down Expand Up @@ -79,12 +82,16 @@ public void Process(string rootDir, OutputFormats outputFormats = OutputFormats.

if (outputFormats.HasFlag(OutputFormats.StarDict))
{
SaveOutputDict<StarDictDictionary>();
string starDictFile = Path.Combine(DictDir, $"{ID}.{TranslationType}.StarDict.ifo");
SaveStarDictFile(starDictFile);
}

if (outputFormats.HasFlag(OutputFormats.Xdxf))
{
SaveOutputDict<XdxfDictionary>();
string xdxfFile = Path.Combine(DictDir, $"{ID}.{TranslationType}.dict.xdxf");
SaveXdxfFile(xdxfFile);

Log("Building XDXF dictionary.");
}

Log("Save end.");
Expand Down Expand Up @@ -128,57 +135,144 @@ private void SaveCleanFile(string cleanPath)
Log("Saved {0} entries.", count);
}

protected abstract void GetRawDataFromSource();

protected abstract IEnumerable<KeyValuePair<string, string>> GetCleanEntries();

private void AddArticles(OutputDictBase dict)
private DictionaryMetadata GetMetadata()
{
dict.Articles.AddRange(GetCleanEntries().Select(kvp => new OutputArticle(dict, kvp.Key, kvp.Value)));
var metadata = new DictionaryMetadata();

metadata.ShortTitle = ShortTitle;
metadata.LongTitle = LongTitle;
metadata.Description = Description;
metadata.Authors.AddRange(Authors);
metadata.SrcUrl = SrcUrl;
metadata.ArticleKeyLangCode = TranslationType == TranslationType.EngToHaw ? "ENG" : "HAW";
metadata.ArticleValueLangCode = TranslationType == TranslationType.EngToHaw ? "HAW" : "ENG";
metadata.FileVersion = AppInfo.Version;

return metadata;
}

private void SaveOutputDict<T>() where T : OutputDictBase
private void SaveStarDictFile(string starDictPath)
{
OutputDictBase outputDict = GetOutputDict<T>();
var dict = new StarDictDictionary(GetMetadata());

Log("Building {0} dictionary.", outputDict.FormatType);
AddArticles(outputDict);
dict.GetStarDictSynonymsFromArticle = a =>
{
HashSet<string> synonyms = new HashSet<string>
{
a.Key
};

Log("Saving {0} dictionary.", outputDict.FormatType);
outputDict.Save(DictDir);
}
foreach (string key in a.Key.Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries))
{
string s = key.Replace(StringUtils.SyllableDotUtf8, "").Replace(".", "").Replace("*", "").Replace("-", "");

synonyms.Add(s);
synonyms.Add(StringUtils.ReplaceOkina(s));
synonyms.Add(StringUtils.ReplaceOkina(s, ""));
synonyms.Add(StringUtils.RemoveDiacritics(s));
synonyms.Add(StringUtils.ReplaceOkina(StringUtils.RemoveDiacritics(s)));
synonyms.Add(StringUtils.ReplaceOkina(StringUtils.RemoveDiacritics(s), ""));
}

private OutputDictBase GetOutputDict<T>() where T : OutputDictBase
{
OutputDictBase dict = null;
return synonyms;
};

if (typeof(T) == typeof(XdxfDictionary))
dict.GetValueFromArticle = a =>
{
dict = new XdxfDictionary(ID, TranslationType)
var valueSB = new StringBuilder();

foreach (var definition in a.Value.GetDefinitions(true))
{
Title = ShortTitle,
FullTitle = LongTitle,
Description = Description,
SrcUrl = SrcUrl,
};
var value = definition;
// Add abbreviations
foreach (var abbreviation in a.Parent.Abbreviations)
{
value = value.WrapInTag(abbreviation.Key, "i", StringWrapInTagOptions.WrapWholeWordsOnly);
if (abbreviation.Key.Length > 1 && char.IsLower(abbreviation.Key[0]))
{
value = value.WrapInTag(char.ToUpper(abbreviation.Key[0]) + abbreviation.Key.Substring(1), "i", StringWrapInTagOptions.WrapWholeWordsOnly);
}
}

value = value.WrapInTag("p");

// Add bold for numbering
value = Regex.Replace(value, "<p>([0-9]+)\\. ", "<p><b>$1</b>. ");
if (value.Contains("<b>2</b>. "))
{
// Fix bolding number one for pre-text
value = Regex.Replace(value, "<p>(.*[^>])1\\. ", "<p>$1<b>1</b>. ");
}

valueSB.Append(value);
}

return valueSB.ToString();
};

Log("Building StarDict dictionary.");

foreach (var kvp in GetCleanEntries())
{
dict.AddArticle(kvp.Key, kvp.Value);
}
else if (typeof(T) == typeof(StarDictDictionary))

AddAbbreviations(dict);

Log("Saving StarDict dictionary.");

dict.Save(starDictPath);
}

private void SaveXdxfFile(string xdxfPath)
{
var dict = new XdxfDictionary(GetMetadata());

dict.GetXdxfKeysFromAbbreviation = a =>
{
dict = new StarDictDictionary(ID, TranslationType)
var list = new List<string>() { a.Key };
if (a.Key.Length > 1 && char.IsLower(a.Key[0]))
{
Title = LongTitle,
Description = Description,
};
}
list.Add(char.ToUpper(a.Key[0]) + a.Key.Substring(1));
}
return list;
};

dict.Authors.AddRange(Authors);
dict.GetXdxfKeysFromArticle = a =>
{
return a.Key.Split(',', StringSplitOptions.RemoveEmptyEntries).ToList();
};

dict.GetXdxfKeyOptionalTerms = () =>
{
return new HashSet<string>() { ".", StringUtils.SyllableDotUtf8 };
};

dict.GetXdxfValuesFromArticle = a =>
{
return a.Value.GetDefinitions(false).ToList();
};

Log("Building XDXF dictionary.");

foreach (var kvp in GetCleanEntries())
{
dict.AddArticle(kvp.Key, kvp.Value);
}

AddAbbreviations(dict);

return dict;
Log("Saving XDXF dictionary.");

dict.Save(xdxfPath);
}

protected abstract void AddAbbreviations(OutputDictBase dict);
protected abstract void GetRawDataFromSource();

protected abstract IEnumerable<KeyValuePair<string, string>> GetCleanEntries();

protected abstract void AddAbbreviations(DictionaryBase dict);

}

public enum TranslationType
Expand Down
93 changes: 46 additions & 47 deletions src/HawDict/Input/MamakaKaiaoInputDict.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

using HtmlAgilityPack;

using QuickDict;

namespace HawDict
{
public class MamakaKaiaoInputDict : HtmlInputDict
Expand Down Expand Up @@ -251,7 +253,7 @@ protected override string[] ParseEntryNode(HtmlNode node)
{
string entryName = node.FirstChild.OuterHtml;
string entryValue = node.InnerHtml.Remove(0, entryName.Length);

try
{
return new string[] { StringUtils.NormalizeWhiteSpace(StringUtils.SingleLineNoTabs(entryName)), StringUtils.NormalizeWhiteSpace(StringUtils.SingleLineNoTabs(entryValue)) };
Expand All @@ -269,53 +271,50 @@ protected override string FinalCleanValue(string value)
return StringUtils.FixSentenceSpacing(value);
}

protected override void AddAbbreviations(OutputDictBase dict)
protected override void AddAbbreviations(DictionaryBase dict)
{
dict.Abbreviations.AddRange(new OutputAbbreviation[]
{
new OutputAbbreviation(dict, "abb.", "abbreviation"),
new OutputAbbreviation(dict, "Bib.", "Bible"),
new OutputAbbreviation(dict, "cf.", "compare", AbbreviationType.Auxiliary),
new OutputAbbreviation(dict, "comb.", "combined form"),
new OutputAbbreviation(dict, "dic.", "dictionary definition"),
new OutputAbbreviation(dict, "e.g.", "for example", AbbreviationType.Auxiliary),
new OutputAbbreviation(dict, "Eng.", "English"),
new OutputAbbreviation(dict, "ext. mng.", "extended meaning"),
new OutputAbbreviation(dict, "i.e.", "that is", AbbreviationType.Auxiliary),
new OutputAbbreviation(dict, "inv.", "invention"),
new OutputAbbreviation(dict, "Japn.", "Japanese"),
new OutputAbbreviation(dict, "lit.", "literally"),
new OutputAbbreviation(dict, "mān.", "mānaleo (native speaker)"),
new OutputAbbreviation(dict, "new mng.", "new meaning"),
new OutputAbbreviation(dict, "PPN", "Proto Polynesian"),
new OutputAbbreviation(dict, "redup.", "reduplication"),
new OutputAbbreviation(dict, "sh.", "shortened form"),
new OutputAbbreviation(dict, "sp. var.", "spelling variation"),
new OutputAbbreviation(dict, "Tah.", "Tahitian"),
new OutputAbbreviation(dict, "trad.", "traditional literary sources"),
new OutputAbbreviation(dict, "var.", "variation"),
new OutputAbbreviation(dict, "ham", "hamani (transitive verb)", AbbreviationType.Grammatical),
new OutputAbbreviation(dict, "heh", "hehele (intransitive verb)", AbbreviationType.Grammatical),
new OutputAbbreviation(dict, "ʻaʻ", "ʻaʻano (stative verb)", AbbreviationType.Grammatical),
new OutputAbbreviation(dict, "kik", "kikino (common noun)", AbbreviationType.Grammatical),
new OutputAbbreviation(dict, "iʻoa", "iʻoa (proper noun)", AbbreviationType.Grammatical),
new OutputAbbreviation(dict, "EK", "Elama Kanahele"),
new OutputAbbreviation(dict, "HA", "Henry Auwae"),
new OutputAbbreviation(dict, "HHLH", "Helen Haleola Lee Hong"),
new OutputAbbreviation(dict, "HKM", "Harry Kunihi Mitchell"),
new OutputAbbreviation(dict, "JPM", "Joseph Puipui Makaai"),
new OutputAbbreviation(dict, "KKK", "Kaui Keola Keamoai"),
new OutputAbbreviation(dict, "LK", "Louise Keliihoomalu"),
new OutputAbbreviation(dict, "MMLH", "Martha Manoanoa Lum Ho"),
new OutputAbbreviation(dict, "MW", "Minnie Whitford"),
new OutputAbbreviation(dict, "Anatomia", "Judd, Gerrit P. Anatomia"),
new OutputAbbreviation(dict, "Bihopa", "Bihopa, E. A. Haawina Mua o ka Hoailona Helu"),
new OutputAbbreviation(dict, "Bounty", "HeMoolelo no na Luina Kipi o ka Moku Bounty"),
new OutputAbbreviation(dict, "Legendre", "Legendre, A. M. Ke Anahonua"),
new OutputAbbreviation(dict, "Judd", "Judd et al. Hawaiian Language Imprints, 1822-1899"),
new OutputAbbreviation(dict, "Pakaa", "Nakuina, Moses K. Pakaa a me Ku-a-Pakaa"),
new OutputAbbreviation(dict, "Wilcox", "Wilcox, Robert"),
});
dict.AddAbbreviation("abb.", "abbreviation");
dict.AddAbbreviation("Bib.", "Bible");
dict.AddAbbreviation("cf.", "compare", AbbreviationType.Auxiliary);
dict.AddAbbreviation("comb.", "combined form");
dict.AddAbbreviation("dic.", "dictionary definition");
dict.AddAbbreviation("e.g.", "for example", AbbreviationType.Auxiliary);
dict.AddAbbreviation("Eng.", "English");
dict.AddAbbreviation("ext. mng.", "extended meaning");
dict.AddAbbreviation("i.e.", "that is", AbbreviationType.Auxiliary);
dict.AddAbbreviation("inv.", "invention");
dict.AddAbbreviation("Japn.", "Japanese");
dict.AddAbbreviation("lit.", "literally");
dict.AddAbbreviation("mān.", "mānaleo (native speaker)");
dict.AddAbbreviation("new mng.", "new meaning");
dict.AddAbbreviation("PPN", "Proto Polynesian");
dict.AddAbbreviation("redup.", "reduplication");
dict.AddAbbreviation("sh.", "shortened form");
dict.AddAbbreviation("sp. var.", "spelling variation");
dict.AddAbbreviation("Tah.", "Tahitian");
dict.AddAbbreviation("trad.", "traditional literary sources");
dict.AddAbbreviation("var.", "variation");
dict.AddAbbreviation("ham", "hamani (transitive verb)", AbbreviationType.Grammatical);
dict.AddAbbreviation("heh", "hehele (intransitive verb)", AbbreviationType.Grammatical);
dict.AddAbbreviation("ʻaʻ", "ʻaʻano (stative verb)", AbbreviationType.Grammatical);
dict.AddAbbreviation("kik", "kikino (common noun)", AbbreviationType.Grammatical);
dict.AddAbbreviation("iʻoa", "iʻoa (proper noun)", AbbreviationType.Grammatical);
dict.AddAbbreviation("EK", "Elama Kanahele");
dict.AddAbbreviation("HA", "Henry Auwae");
dict.AddAbbreviation("HHLH", "Helen Haleola Lee Hong");
dict.AddAbbreviation("HKM", "Harry Kunihi Mitchell");
dict.AddAbbreviation("JPM", "Joseph Puipui Makaai");
dict.AddAbbreviation("KKK", "Kaui Keola Keamoai");
dict.AddAbbreviation("LK", "Louise Keliihoomalu");
dict.AddAbbreviation("MMLH", "Martha Manoanoa Lum Ho");
dict.AddAbbreviation("MW", "Minnie Whitford");
dict.AddAbbreviation("Anatomia", "Judd, Gerrit P. Anatomia");
dict.AddAbbreviation("Bihopa", "Bihopa, E. A. Haawina Mua o ka Hoailona Helu");
dict.AddAbbreviation("Bounty", "HeMoolelo no na Luina Kipi o ka Moku Bounty");
dict.AddAbbreviation("Legendre", "Legendre, A. M. Ke Anahonua");
dict.AddAbbreviation("Judd", "Judd et al. Hawaiian Language Imprints, 1822-1899");
dict.AddAbbreviation("Pakaa", "Nakuina, Moses K. Pakaa a me Ku-a-Pakaa");
dict.AddAbbreviation("Wilcox", "Wilcox, Robert");
}
}
}
Loading
Loading