Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,17 +65,32 @@ If a browser tab already translates it, you don't need WinLens. For everything e
- Upscales the screenshot before OCR and picks the best recognizer per text block, which helps accuracy on small text.
- Dark control panel. You can change the target language straight from the overlay.
- Right-click a block to copy the original text or the translation.
- Optional OpenAI-compatible API engine (OpenAI, DeepSeek, Ollama, …) for higher-quality, context-aware translation. Off by default; the free built-in engine stays the fallback.
- Optional "launch at startup". Otherwise it stays out of the way in the tray.

## Control panel

A small tray app with a dark control panel. Pick the target language, set the hotkey,
choose the OCR source language, and toggle launch-at-startup.
choose the OCR source language, pick the translation engine, and toggle launch-at-startup.

<div align="center">
<img src="assets/settings.png" alt="WinLens control panel" width="330"/>
</div>

### Translation engine

WinLens translates with the free built-in engine (Google, MyMemory fallback) out of the box —
no setup, no key. If you want higher-quality, context-aware translation, pick **OpenAI-compatible
API** under *Translation engine* and fill in:

- **API base URL** — e.g. `https://api.openai.com/v1`, `https://api.deepseek.com/v1`, or a local `http://localhost:11434/v1` (Ollama).
- **API key** — your bearer token.
- **Model** — e.g. `gpt-4o-mini`, `deepseek-chat`.

The key is stored locally in `%APPDATA%\WinLens\settings.json` and sent only to the endpoint you
configure. If a request fails (bad key, no network, rate limit), WinLens falls back to the
built-in engine so text is never left untranslated.

## Installation

### Download
Expand Down Expand Up @@ -144,7 +159,7 @@ Hotkey > capture screen > upscale > OCR (per script) > translate > overlay in pl
2. Upscale the image about 2x so small UI text is recognized more reliably.
3. Run every installed OCR recognizer and keep from each only the blocks whose script matches
it (Latin from the Latin engine, CJK from the CJK engine), then drop overlapping duplicates.
4. Translate each line (Google endpoint, with a MyMemory fallback), cached per session.
4. Translate each line, cached per session. By default the free built-in engine is used (Google endpoint, with a MyMemory fallback). If an OpenAI-compatible API is configured in the control panel, it is tried first and the built-in engine becomes the fallback.
5. Draw an opaque, color- and font-matched box over each original line.

## Roadmap
Expand Down
25 changes: 25 additions & 0 deletions src/Models/UserSettings.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,16 @@

namespace WinLens.Models;

/// <summary>Which backend translates the captured text.</summary>
public enum TranslationProvider
{
/// <summary>Built-in free engines: Google gtx with a MyMemory fallback (the default).</summary>
BuiltIn,

/// <summary>An OpenAI-compatible chat endpoint (OpenAI, DeepSeek, Ollama, etc.).</summary>
OpenAiCompatible,
}

public sealed class UserSettings
{
public string TargetLanguage { get; set; } = "en";
Expand All @@ -15,4 +25,19 @@ public sealed class UserSettings

public HotkeyModifiers HotkeyModifiers { get; set; } = HotkeyModifiers.Control | HotkeyModifiers.Alt;
public Key HotkeyKey { get; set; } = Key.T;

/// <summary>
/// Translation backend. Defaults to <see cref="TranslationProvider.BuiltIn"/> so
/// existing installs keep the free Google/MyMemory behaviour untouched.
/// </summary>
public TranslationProvider TranslationProvider { get; set; } = TranslationProvider.BuiltIn;

/// <summary>Base URL of the OpenAI-compatible API, e.g. https://api.openai.com/v1.</summary>
public string LlmBaseUrl { get; set; } = "";

/// <summary>Bearer token for the OpenAI-compatible API.</summary>
public string LlmApiKey { get; set; } = "";

/// <summary>Chat model id, e.g. gpt-4o-mini.</summary>
public string LlmModel { get; set; } = "";
}
102 changes: 98 additions & 4 deletions src/Services/TranslationService.cs
Original file line number Diff line number Diff line change
@@ -1,44 +1,64 @@
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.IO;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
using System.Web;
using WinLens.Models;

namespace WinLens.Services;

/// <summary>
/// Translates short strings. Tries Google gtx first (auto source detection),
/// falls back to MyMemory. Logs each failure to %TEMP%\winlens.log.
/// Translates short strings. By default it uses the free built-in engines
/// (Google gtx with a MyMemory fallback). When the user opts into an
/// OpenAI-compatible API in settings, that is tried first and the built-in
/// engines act as the fallback. Logs each failure to %TEMP%\winlens.log.
/// </summary>
public sealed class TranslationService : IDisposable
{
private const string GoogleEndpoint = "https://translate.googleapis.com/translate_a/single";
private const string MyMemoryEndpoint = "https://api.mymemory.translated.net/get";

private readonly HttpClient _http;
private readonly HttpClient _llmHttp;
private readonly ConcurrentDictionary<(string text, string tgt), string> _cache = new();
private readonly string _logPath;
private UserSettings? _config;

public TranslationService()
{
_http = new HttpClient { Timeout = TimeSpan.FromSeconds(10) };
_http.DefaultRequestHeaders.UserAgent.ParseAdd(
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) WinLens/1.0");
// LLM calls reason over the text and can take longer than the web engines.
_llmHttp = new HttpClient { Timeout = TimeSpan.FromSeconds(30) };
_logPath = Path.Combine(Path.GetTempPath(), "winlens.log");
}

/// <summary>
/// Supplies the live settings object (shared with the UI, so edits there
/// apply immediately). Until called, only the built-in engines are used.
/// </summary>
public void Configure(UserSettings settings) => _config = settings;

private bool UseLlm =>
_config is { TranslationProvider: TranslationProvider.OpenAiCompatible } c &&
!string.IsNullOrWhiteSpace(c.LlmApiKey) &&
!string.IsNullOrWhiteSpace(c.LlmBaseUrl) &&
!string.IsNullOrWhiteSpace(c.LlmModel);

public async Task<string> TranslateAsync(
string text,
string targetLang,
string? sourceLang = null,
CancellationToken ct = default)
{
// sourceLang from the OCR engine is the engine's profile language, not
// the actual content language. Ignore it — let Google auto-detect.
// the actual content language. Ignore it — let the engine auto-detect.
_ = sourceLang;

if (string.IsNullOrWhiteSpace(text))
Expand All @@ -49,6 +69,18 @@ public async Task<string> TranslateAsync(
if (_cache.TryGetValue(key, out var cached))
return cached;

// Opt-in LLM engine first; on any failure fall through to the built-in
// engines so a misconfigured key never leaves text untranslated.
if (UseLlm)
{
var llm = await TryLlmAsync(text, tgt, ct);
if (llm != null)
{
_cache[key] = llm;
return llm;
}
}

var google = await TryGoogleAsync(text, tgt, ct);
if (google != null)
{
Expand Down Expand Up @@ -145,6 +177,64 @@ public async Task<string> TranslateAsync(
}
}

private async Task<string?> TryLlmAsync(string text, string tgt, CancellationToken ct)
{
var cfg = _config!;
var url = cfg.LlmBaseUrl.TrimEnd('/') + "/chat/completions";
var payload = new Dictionary<string, object>
{
["model"] = cfg.LlmModel,
["temperature"] = 0.1,
["messages"] = new object[]
{
new
{
role = "system",
content =
"You are a translation engine. The user sends a single short string captured by OCR; " +
"it may contain small OCR errors (l/I confusion, missing spaces) — infer the intended text. " +
$"Translate it into the language whose BCP-47 code is \"{tgt}\". " +
"Reply with ONLY the translation: no quotes, no explanations, no markdown. " +
"Keep numbers, file names and proper nouns unchanged.",
},
new { role = "user", content = text },
},
};
// DeepSeek's reasoning models default to "thinking" mode, which burns
// tokens and latency a short translation doesn't need. The flag is
// DeepSeek-specific, so only send it to that endpoint.
if (url.Contains("deepseek", StringComparison.OrdinalIgnoreCase))
payload["thinking"] = new { type = "disabled" };

try
{
using var req = new HttpRequestMessage(HttpMethod.Post, url);
req.Headers.TryAddWithoutValidation("Authorization", "Bearer " + cfg.LlmApiKey.Trim());
req.Content = new StringContent(JsonSerializer.Serialize(payload), Encoding.UTF8, "application/json");

using var resp = await _llmHttp.SendAsync(req, ct);
var body = await resp.Content.ReadAsStringAsync(ct);
if (!resp.IsSuccessStatusCode)
{
Log($"llm http {(int)resp.StatusCode} for tgt={tgt}");
return null;
}

using var doc = JsonDocument.Parse(body);
var content = doc.RootElement
.GetProperty("choices")[0]
.GetProperty("message")
.GetProperty("content")
.GetString();
return string.IsNullOrWhiteSpace(content) ? null : content.Trim();
}
catch (Exception ex)
{
Log($"llm exception: {ex.GetType().Name}: {ex.Message}");
return null;
}
}

private void Log(string line)
{
try
Expand All @@ -155,5 +245,9 @@ private void Log(string line)
catch { /* logging must never throw */ }
}

public void Dispose() => _http.Dispose();
public void Dispose()
{
_http.Dispose();
_llmHttp.Dispose();
}
}
30 changes: 30 additions & 0 deletions src/Theme/Theme.xaml
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,36 @@
</Setter>
</Style>

<!-- ============================ Text box ============================ -->
<Style x:Key="ModernTextBox" TargetType="{x:Type TextBox}">
<Setter Property="Foreground" Value="{StaticResource TextBrush}"/>
<Setter Property="CaretBrush" Value="{StaticResource TextBrush}"/>
<Setter Property="FontFamily" Value="Segoe UI"/>
<Setter Property="FontSize" Value="13"/>
<Setter Property="Height" Value="42"/>
<Setter Property="SnapsToDevicePixels" Value="True"/>
<Setter Property="Template">
<Setter.Value>
<ControlTemplate TargetType="{x:Type TextBox}">
<Border x:Name="Bd"
Background="{StaticResource SurfaceAltBrush}"
BorderBrush="{StaticResource BorderBrush}"
BorderThickness="1" CornerRadius="10">
<ScrollViewer x:Name="PART_ContentHost" Margin="14,0" VerticalAlignment="Center"/>
</Border>
<ControlTemplate.Triggers>
<Trigger Property="IsMouseOver" Value="True">
<Setter TargetName="Bd" Property="BorderBrush" Value="{StaticResource BorderStrongBrush}"/>
</Trigger>
<Trigger Property="IsKeyboardFocused" Value="True">
<Setter TargetName="Bd" Property="BorderBrush" Value="{StaticResource AccentBrush}"/>
</Trigger>
</ControlTemplate.Triggers>
</ControlTemplate>
</Setter.Value>
</Setter>
</Style>

<!-- ============================ Toggle switch ============================ -->
<Style x:Key="ToggleSwitch" TargetType="CheckBox">
<Setter Property="Cursor" Value="Hand"/>
Expand Down
1 change: 1 addition & 0 deletions src/Views/MainWindow.xaml.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ public MainWindow()
{
InitializeComponent();
_settings.Load();
_translator.Configure(_settings.Current);
Loaded += OnLoaded;
}

Expand Down
25 changes: 24 additions & 1 deletion src/Views/SettingsWindow.xaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
Title="WinLens"
Icon="/assets/winlens.ico"
Width="440" Height="694"
Width="440" SizeToContent="Height"
WindowStyle="None"
AllowsTransparency="True"
Background="Transparent"
Expand Down Expand Up @@ -82,6 +82,29 @@
Text="+ Add OCR languages in Windows…"
MouseLeftButtonUp="OnAddLanguages"/>

<!-- Translation engine -->
<TextBlock Text="TRANSLATION ENGINE" Style="{StaticResource LabelText}" Margin="2,20,0,8"/>
<ComboBox x:Name="EngineCombo" Style="{StaticResource ModernComboBox}"
SelectionChanged="OnEngineChanged">
<ComboBoxItem Tag="BuiltIn" Content="Built-in (Google · free)"/>
<ComboBoxItem Tag="OpenAiCompatible" Content="OpenAI-compatible API"/>
</ComboBox>

<!-- OpenAI-compatible API settings (shown only for that engine) -->
<StackPanel x:Name="LlmPanel" Visibility="Collapsed">
<TextBlock Text="API BASE URL" Style="{StaticResource LabelText}" Margin="2,16,0,8"/>
<TextBox x:Name="LlmBaseUrlBox" Style="{StaticResource ModernTextBox}"
TextChanged="OnLlmConfigChanged"/>
<TextBlock Text="API KEY" Style="{StaticResource LabelText}" Margin="2,16,0,8"/>
<TextBox x:Name="LlmApiKeyBox" Style="{StaticResource ModernTextBox}"
TextChanged="OnLlmConfigChanged"/>
<TextBlock Text="MODEL" Style="{StaticResource LabelText}" Margin="2,16,0,8"/>
<TextBox x:Name="LlmModelBox" Style="{StaticResource ModernTextBox}"
TextChanged="OnLlmConfigChanged"/>
<TextBlock Style="{StaticResource CaptionText}" Margin="2,8,0,0" TextWrapping="Wrap"
Text="Works with any OpenAI-compatible chat endpoint (OpenAI, DeepSeek, Ollama, …). Falls back to the built-in engine if a request fails."/>
</StackPanel>

<!-- Shortcut -->
<TextBlock Text="SHORTCUT" Style="{StaticResource LabelText}" Margin="2,20,0,8"/>
<Grid>
Expand Down
47 changes: 47 additions & 0 deletions src/Views/SettingsWindow.xaml.cs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ public SettingsWindow(SettingsService settings, Action onTranslate, Action onHot

RebuildLanguageOptions();
BuildSourceOptions();
FillEngineFields();

StartupToggle.IsChecked = StartupRegistration.IsEnabled();
UpdateHotkeyText();
Expand Down Expand Up @@ -120,6 +121,52 @@ private void OnSourceChanged(object sender, SelectionChangedEventArgs e)
_settings.Save(_settings.Current);
}

// ---------------- Translation engine ----------------

private void FillEngineFields()
{
var provider = _settings.Current.TranslationProvider;
foreach (var item in EngineCombo.Items.OfType<ComboBoxItem>())
{
if (item.Tag is string tag &&
Enum.TryParse<TranslationProvider>(tag, out var p) && p == provider)
{
EngineCombo.SelectedItem = item;
break;
}
}

LlmBaseUrlBox.Text = _settings.Current.LlmBaseUrl;
LlmApiKeyBox.Text = _settings.Current.LlmApiKey;
LlmModelBox.Text = _settings.Current.LlmModel;
UpdateLlmPanelVisibility();
}

private void UpdateLlmPanelVisibility() =>
LlmPanel.Visibility =
_settings.Current.TranslationProvider == TranslationProvider.OpenAiCompatible
? Visibility.Visible
: Visibility.Collapsed;

private void OnEngineChanged(object sender, SelectionChangedEventArgs e)
{
if (_suppressEvents) return;
if (EngineCombo.SelectedItem is not ComboBoxItem item || item.Tag is not string tag) return;
if (!Enum.TryParse<TranslationProvider>(tag, out var provider)) return;
_settings.Current.TranslationProvider = provider;
_settings.Save(_settings.Current);
UpdateLlmPanelVisibility();
}

private void OnLlmConfigChanged(object sender, RoutedEventArgs e)
{
if (_suppressEvents) return;
_settings.Current.LlmBaseUrl = LlmBaseUrlBox.Text.Trim();
_settings.Current.LlmApiKey = LlmApiKeyBox.Text.Trim();
_settings.Current.LlmModel = LlmModelBox.Text.Trim();
_settings.Save(_settings.Current);
}

// ---------------- Translate ----------------

private async void OnTranslateNow(object sender, RoutedEventArgs e)
Expand Down