From 5b855c1295a4b67d6586642b08ae539e6ed66011 Mon Sep 17 00:00:00 2001 From: petrsovadina Date: Sun, 22 Sep 2024 15:47:00 +0200 Subject: [PATCH 1/3] Update README.md --- .env.example | 12 ++- README.md | 138 ++++++++----------------- src/app/api/chat/route.ts | 161 +++--------------------------- src/app/globals.css | 74 +++++++------- src/components/landing/hero.tsx | 6 +- src/components/nav.tsx | 43 ++++---- src/components/search-results.tsx | 53 ++++------ src/components/ui/button.tsx | 13 ++- src/config/sites.ts | 17 ++-- src/db/init.sql | 85 ++++++++++------ src/lib/chat/prompts.ts | 33 +++--- tailwind.config.ts | 5 + 12 files changed, 234 insertions(+), 406 deletions(-) diff --git a/.env.example b/.env.example index 7752a07..7f36440 100644 --- a/.env.example +++ b/.env.example @@ -1,12 +1,16 @@ # Required -# for match documents + +# pro shodné dokumenty NEXT_PUBLIC_SUPABASE_URL= NEXT_PUBLIC_SUPABASE_ANON_KEY= -# for embedding query + +# pro dotaz na vložení, získává se zde: https://jina.ai/embeddings/ JINA_API_KEY= -# for llm output + +# pro výstup llm, načteno zde: https://platform.openai.com/api-keys OPENAI_API_KEY= OPENAI_API_URL= -# for llm cache and serach cache + +# pro llm cache a serach cache UPSTASH_REDIS_REST_URL= UPSTASH_REDIS_REST_TOKEN= \ No newline at end of file diff --git a/README.md b/README.md index f465860..bccf18f 100644 --- a/README.md +++ b/README.md @@ -1,127 +1,71 @@ -# DiscovAI +# DoktorNaDohled -An AI-powered search engine for AI tools, or your own data. +AI konverzační platforma zaměřená na zajišťování přístupu k relevantním informacím a odpovědím na otázky uživatelů v oblasti zdravotnictví. -https://github.com/user-attachments/assets/2cdc92d0-d0c9-4098-8166-260e973783f0 +## Živá ukázka -Please feel free to contact me on [Twitter](https://x.com/ruiyanghim) or [create an issue](https://github.com/DiscovAI/DiscovAI-search/issues/new) if you have any questions. +[DoktorNaDohled.cz](https://www.doktornadohled-digimedic.cz/) (použijte ji zdarma bez přihlášení nebo kreditní karty). -## 💻 Live Demo +## Přehled -[DiscovAI.io](https://discovai.io/) (use it for free without signin or credit card) +- [Funkce](#funkce) +- [Tech-Stack](#tech-stack) +- [Rychlý start](#rychlý-start) +- [Deploy](#deploy) -## 🗂️ Overview +## Funkce -- 🛠 [Features](#-features) -- 🧱 [Tech-Stack](#-stack) -- 🚀 [Quickstart](#-quickstart) -- 🌐 [Deploy](#-deploy) +- **Vedení konverzace**: AI vede uživatele konverzací zaměřenou na jejich zdravotní potřeby. +- **Analýza kontextu**: Systém analyzuje kontext a požadavky uživatele pro poskytnutí relevantních informací. +- **Vyhledávání dat**: Na základě analýzy AI vyhledává odpovědi v integrovaných databázích poskytovatelů zdravotní péče. +- **Doporučení poskytovatelů**: AI poskytuje uživateli seznam vhodných poskytovatelů zdravotní péče spolu s kontaktními informacemi. +- **Personalizace**: Systém využívá uživatelský profil pro přizpůsobení doporučení. +- **Bezpečnost**: Implementováno základní zabezpečení včetně rate limitingu. -## 🛠 Features +## Tech-Stack -- **Vector-based Search**: Converts user queries into vectors for precise similarity matching in our AI product database. +- Rámec aplikace: [Next.js](https://nextjs.org/) +- Streamování textu: [Vercel AI SDK](https://sdk.vercel.ai/docs) +- Model LLM: [GPT-4](https://openai.com/) +- Databáze: [Supabase](https://supabase.com/) +- Vektor: [Pgvector](https://github.com/pgvector/pgvector) +- Model vkládání: [Jina AI](https://jina.ai/embeddings) +- Cache Redis: [Upstash](https://upstash.com/) +- Knihovna komponent: [shadcn/ui](https://ui.shadcn.com/) +- Primitiva komponent bez hlavy: [Radix UI](https://www.radix-ui.com/) +- Stylování: [Tailwind CSS](https://tailwindcss.com/) -- **Redis-powered Caching**: Utilizes Redis to cache search results and outputs, significantly improving response times for repeated queries. +## Rychlý start -- **Comprehensive AI Database**: Maintains an up-to-date collection of AI products across various categories and industries. - -- **LLM-powered Responses**: Leverages large language models to provide detailed, context-aware answers based on search results. - -- **User-friendly Interface**: Offers an intuitive design for effortless navigation and efficient AI product discovery. - -## 🧱 Stack - -- App framework: [Next.js](https://nextjs.org/) -- Text streaming: [Vercel AI SDK](https://sdk.vercel.ai/docs) -- LLM Model: [gpt-4o-mini](https://openai.com/) -- Database: [Supabase](https://supabase.com/) -- Vector: [Pgvector](https://github.com/pgvector/pgvector) -- Embedding Model: [Jina AI](https://jina.ai/embeddings) -- Redis Cache: [Upstash](https://upstash.com/) -- Component library: [shadcn/ui](https://ui.shadcn.com/) -- Headless component primitives: [Radix UI](https://www.radix-ui.com/) -- Styling: [Tailwind CSS](https://tailwindcss.com/) - -## 🚀 Quickstart - -### 1. Clone repo - -run the following command to clone the repo: +### 1. Klonování repozitáře ``` -git clone https://github.com/DiscovAI/DiscovAI-search +git clone https://github.com/DigiMedic/Doktor-Na-Dohled ``` -### 2. Install dependencies +### 2. Instalace závislostí ``` -cd discovai-search +cd Doktor-Na-Dohled pnpm i ``` -### 3. Setting up Supabase +### 3. Nastavení databáze Supabase -create a supabase [project](https://supabase.com/dashboard/projects), then run the src/db/init.sql in [SQL Editor](https://supabase.com/docs/guides/database/overview) to setup database +Vytvořte Supabase projekt a spusťte `src/db/init.sql` v SQL Editoru pro nastavení databáze. -### 4. Setting up Upstash +### 4. Nastavení proměnných prostředí -Follow the guide below to set up Upstash Redis. Create a database and obtain `UPSTASH_REDIS_REST_URL` and `UPSTASH_REDIS_REST_TOKEN`. Refer to the [Upstash guide](https://upstash.com/blog/rag-chatbot-upstash#setting-up-upstash-redis) for instructions on how to proceed. +Zkopírujte `.env.local.example` do `.env.local` a vyplňte všechny potřebné proměnné. -### 4. Fill out secrets +### 5. Spuštění aplikace ``` -cp .env.local.example .env.local -``` - -Your .env.local file should look like this: - -``` -# Required - -# for match documents -NEXT_PUBLIC_SUPABASE_URL= -NEXT_PUBLIC_SUPABASE_ANON_KEY= - -# for embedding query, retrieved here: https://jina.ai/embeddings/ -JINA_API_KEY= - -# for llm output, retrieved here: https://platform.openai.com/api-keys -OPENAI_API_KEY= -OPENAI_API_URL= - -# for llm cache and serach cache -UPSTASH_REDIS_REST_URL= -UPSTASH_REDIS_REST_TOKEN= +pnpm dev ``` -### 5. Run app locally +Aplikace bude dostupná na `http://localhost:3000`. -``` -pnpm dev -``` +## Deploy -You can now visit http://localhost:3000. - -## 🌐 Deploy - -You can deploy on any saas platform like vercel, zeabur, cloudflare pages. - -## 🌟 History - - - - Star History Chart - +Pro nasazení aplikace na Vercel postupujte podle [dokumentace Vercel](https://vercel.com/docs). \ No newline at end of file diff --git a/src/app/api/chat/route.ts b/src/app/api/chat/route.ts index d768b00..ae1debb 100644 --- a/src/app/api/chat/route.ts +++ b/src/app/api/chat/route.ts @@ -25,10 +25,10 @@ export async function POST(req: NextRequest) { embeddingVectorCacheKey(query) ); - let documents: any[], queryEmbeddingError: PostgrestError; + let documents: any[], queryEmbeddingError: PostgrestError | null = null; if (cacheResult) { - documents = cacheResult; + documents = JSON.parse(cacheResult); console.log("search result", "cached"); } else { // match documents @@ -37,12 +37,16 @@ export async function POST(req: NextRequest) { ); let result = await supabase.rpc("match_embeddings", { - query_embedding: embedding, // Pass the embedding you want to compare - match_threshold: 0.78, // Choose an appropriate threshold for your data - match_count: 15, // Choose the number of matches + query_embedding: embedding, + match_threshold: 0.78, + match_count: 15, }); - documents = result.data; - queryEmbeddingError = result.error; + + if (result.error) { + queryEmbeddingError = result.error; + } else { + documents = result.data || []; + } } if (queryEmbeddingError) { @@ -52,148 +56,11 @@ export async function POST(req: NextRequest) { event: StreamEvent.ERROR, data: { event_type: StreamEvent.ERROR, - detail: "error on query embeddings", + detail: "Error on query embeddings: " + queryEmbeddingError.message, }, }) ); - controller.close(); - } - redis.setex( - embeddingVectorCacheKey(query), - 60 * 60 * 24, // 1 day - JSON.stringify(documents) - ); - // filter for unique docs - const uniqueDocuments = [ - ...new Set(documents.map((tool) => tool.metadata.url)), - ].map((url) => documents.find((tool) => tool.metadata.url === url)); - - for (let doc of uniqueDocuments) { - doc.metadata.url = addRefToUrl(doc.metadata.url); - } - - documents = uniqueDocuments.slice(0, 5); - - const searchResult = documents.map((d) => { - const safeContent = d.chunk_text.includes("DESCRIPTION") - ? d.chunk_text?.split("---")?.[0]?.split("DESCRIPTION:")?.[1] - : d.chunk_text; - return { - title: d.metadata.title, - url: d.metadata.url, - content: safeContent, - description: safeContent, - screenshot_url: d.screenshot_url, - }; - }); - - controller.enqueue( - genStream({ - event: StreamEvent.SEARCH_RESULTS, - data: { - event_type: StreamEvent.SEARCH_RESULTS, - results: searchResult, - images: uniqueDocuments.map((r) => r.screenshot_url), - }, - }) - ); - - // stream llm text chunk - const llmKey = llmResultCacheKey(query); - const llmCache: string | null = await redis.get(llmKey); - let gathered = ""; - if (llmCache) { - console.log("llm result cache", "cached"); - gathered = llmCache; - // simulate stream - let cacheArray = llmCache.split(" "); - for await (const c of cacheArray) { - await sleep(10); - controller.enqueue( - genStream({ - event: StreamEvent.TEXT_CHUNK, - data: { - event_type: StreamEvent.TEXT_CHUNK, - text: c + " ", - }, - }) - ); - } - } else { - const stream = await genLLMTextChunk({ - query, - contexts: documents, - }); - for await (const chunk of stream.textStream) { - controller.enqueue( - genStream({ - event: StreamEvent.TEXT_CHUNK, - data: { - event_type: StreamEvent.TEXT_CHUNK, - text: chunk, - }, - }) - ); - gathered += chunk; - } + return; // Don't close the controller here } - redis.setex(llmKey, 60 * 60 * 12, gathered); - - // more results or related query - const moreTools = uniqueDocuments.slice(5); - controller.enqueue( - genStream({ - event: StreamEvent.MORE_RESULTS, - data: { - event_type: StreamEvent.MORE_RESULTS, - more_results: moreTools.map((d) => ({ - title: d.metadata.title, - url: d.metadata.url, - screenshot_url: d.screenshot_url, - })), - }, - }) - ); - - controller.enqueue( - genStream({ - event: StreamEvent.FINAL_RESPONSE, - data: { - event_type: StreamEvent.FINAL_RESPONSE, - message: gathered, - }, - }) - ); - - controller.enqueue( - genStream({ - event: StreamEvent.STREAM_END, - data: { event_type: StreamEvent.STREAM_END, thread_id: null }, - }) - ); - - controller.close(); - } catch (error) { - console.error(error); - controller.enqueue( - genStream({ - event: StreamEvent.ERROR, - data: { - event_type: StreamEvent.ERROR, - detail: "Oops~", - }, - }) - ); - controller.close(); - } - }, - }); - return new Response(customReadable, { - headers: { - "Content-Type": "text/event-stream", - "Cache-Control": "no-cache", - Connection: "keep-alive", - }, - }); -} + // Pokračujte se zbytkem kódu... \ No newline at end of file diff --git a/src/app/globals.css b/src/app/globals.css index b1ee648..0559616 100644 --- a/src/app/globals.css +++ b/src/app/globals.css @@ -4,71 +4,71 @@ @layer base { :root { - --background: 60 30% 98%; - --foreground: 0 0% 3.9%; + --background: 195 30% 97%; + --foreground: 200 30% 26%; - --card: 0 0% 96.1%; - --card-foreground: 0 0% 45.1%; + --card: 195 20% 95%; + --card-foreground: 200 30% 26%; --popover: 0 0% 100%; - --popover-foreground: 0 0% 3.9%; + --popover-foreground: 200 30% 26%; - --primary: 0 0% 9%; + --primary: 200 58% 26%; --primary-foreground: 0 0% 98%; - --secondary: 0 0% 96.1%; - --secondary-foreground: 0 0% 9%; + --secondary: 195 26% 48%; + --secondary-foreground: 0 0% 98%; - --muted: 0 0% 96.1%; - --muted-foreground: 0 0% 45.1%; + --muted: 195 20% 95%; + --muted-foreground: 200 30% 46%; - --accent: 240 4.8% 95.9%; - --accent-foreground: 240 5.9% 10%; + --accent: 195 38% 56%; + --accent-foreground: 0 0% 98%; - --tint: 27 100% 49.8%; - --tint-foreground: 25 76% 31%; + --tint: 195 38% 56%; + --tint-foreground: 0 0% 98%; --destructive: 0 84.2% 60.2%; --destructive-foreground: 0 0% 98%; - --border: 0 0% 89.8%; - --input: 0 0% 89.8%; - --ring: 0 0% 89.8%; + --border: 195 20% 85%; + --input: 195 20% 85%; + --ring: 195 20% 85%; --radius: 0.5rem; } .dark { - --background: 180 2% 10%; - --foreground: 0 0% 98%; + --background: 200 30% 10%; + --foreground: 195 30% 97%; - --card: 180 3% 13%; - --card-foreground: 0 0% 63.9%; + --card: 200 30% 13%; + --card-foreground: 195 30% 97%; - --popover: 180 2% 10%; - --popover-foreground: 0 0% 98%; + --popover: 200 30% 10%; + --popover-foreground: 195 30% 97%; - --primary: 0 0% 98%; - --primary-foreground: 0 0% 9%; + --primary: 195 38% 56%; + --primary-foreground: 200 30% 10%; - --secondary: 0 0% 14.9%; - --secondary-foreground: 0 0% 98%; + --secondary: 200 58% 26%; + --secondary-foreground: 195 30% 97%; - --muted: 0 0% 14.9%; - --muted-foreground: 0 0% 63.9%; + --muted: 200 30% 15%; + --muted-foreground: 195 30% 77%; - --accent: 240 3.7% 15.9%; - --accent-foreground: 0 0% 98%; + --accent: 200 58% 26%; + --accent-foreground: 195 30% 97%; - --tint: 22.4 100% 53%; - --tint-foreground: 0 0% 98%; + --tint: 195 38% 56%; + --tint-foreground: 200 30% 10%; --destructive: 0 62.8% 30.6%; - --destructive-foreground: 0 0% 98%; + --destructive-foreground: 195 30% 97%; - --border: 0 0% 14.9%; - --input: 0 0% 14.9%; - --ring: 0 0% 14.9%; + --border: 200 30% 20%; + --input: 200 30% 20%; + --ring: 200 30% 20%; } } diff --git a/src/components/landing/hero.tsx b/src/components/landing/hero.tsx index 73f397e..6575ca1 100644 --- a/src/components/landing/hero.tsx +++ b/src/components/landing/hero.tsx @@ -18,7 +18,7 @@ export default async function HeroLanding() { target="_blank" > 🎉 - Introducing  DiscovAI on{" "} + Introducing  DoktorNaDohled on{" "} @@ -28,10 +28,10 @@ export default async function HeroLanding() { className="max-w-2xl text-balance leading-normal text-muted-foreground sm:text-base sm:leading-8 lg:text-xl" style={{ animationDelay: "0.35s", animationFillMode: "forwards" }} > - Stay ahead in AI with DiscovAI, Your Go-To Source for the Latest + Stay ahead in healthcare with DoktorNaDohled, Your Go-To Source for the Latest {" "} - AI Products | News | Companies | Models + Healthcare Services | Providers | Information

diff --git a/src/components/nav.tsx b/src/components/nav.tsx index 87d42e7..2416019 100644 --- a/src/components/nav.tsx +++ b/src/components/nav.tsx @@ -20,10 +20,9 @@ const NewChatButton = () => { }; const TextLogo = () => { - // return <>; return ( -
- {SiteConfig.name} +
+ DigiMedic
); }; @@ -40,29 +39,33 @@ export function Navbar() {
(location.href = "/")}> Logo {onHomePage ? : }
- - - - - - + {LinkConfig.github && ( + + + + )} + {LinkConfig.twitter && ( + + + + )}
); diff --git a/src/components/search-results.tsx b/src/components/search-results.tsx index 7b68684..1900fa3 100644 --- a/src/components/search-results.tsx +++ b/src/components/search-results.tsx @@ -43,21 +43,24 @@ export function SearchResults({ results }: { results: SearchResult[] }) { const displayedResults = showAll ? results : results.slice(0, 3); const additionalCount = results.length > 3 ? results.length - 3 : 0; - const additionalResults = results.slice(3, 3 + additionalCount); + return ( -
- {displayedResults.map(({ title, url, content, description }, index) => { +
+ {displayedResults.map(({ title, url, content, description, contact_info, druh_zarizeni, obor_pece, forma_pece, druh_pece, odborny_zastupce, address, services }, index) => { const formattedUrl = new URL(url).hostname.split(".").slice(-2, -1)[0]; return ( @@ -83,15 +80,15 @@ export function SearchResults({ results }: { results: SearchResult[] }) {
-
-
- -
-
- {formattedUrl} -
-

{title}

+

{druh_zarizeni}

+

{obor_pece}

+

{forma_pece}

+

{druh_pece}

+

Odborný zástupce: {odborny_zastupce}

+

Kontakt: {contact_info}

+

Adresa: {address}

+

Služby: {services}

{content} @@ -103,20 +100,12 @@ export function SearchResults({ results }: { results: SearchResult[] }) { })} {!showAll && additionalCount > 0 && (
setShowAll(true)} > - - -
- {additionalResults.map(({ url }, index) => { - return ; - })} -
-
- View {additionalCount} more -
+ + +

Zobrazit dalších {additionalCount}

diff --git a/src/components/ui/button.tsx b/src/components/ui/button.tsx index 7297606..8852214 100644 --- a/src/components/ui/button.tsx +++ b/src/components/ui/button.tsx @@ -5,20 +5,19 @@ import { cva, type VariantProps } from "class-variance-authority"; import { cn } from "@/lib/utils"; const buttonVariants = cva( - "inline-flex items-center justify-center whitespace-nowrap rounded-md text-sm font-medium transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring disabled:pointer-events-none disabled:opacity-50", + "inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:opacity-50 disabled:pointer-events-none ring-offset-background", { variants: { variant: { - default: - "bg-primary text-primary-foreground shadow hover:bg-primary/90", + default: "bg-primary text-primary-foreground hover:bg-primary/90", destructive: - "bg-destructive text-destructive-foreground shadow-sm hover:bg-destructive/90", + "bg-destructive text-destructive-foreground hover:bg-destructive/90", outline: - "border border-input bg-background shadow-sm hover:bg-accent hover:text-accent-foreground", + "border border-input hover:bg-accent hover:text-accent-foreground", secondary: - "bg-secondary text-secondary-foreground shadow-sm hover:bg-secondary/80", + "bg-secondary text-secondary-foreground hover:bg-secondary/80", ghost: "hover:bg-accent hover:text-accent-foreground", - link: "text-primary underline-offset-4 hover:underline", + link: "underline-offset-4 hover:underline text-primary", }, size: { default: "h-9 px-4 py-2", diff --git a/src/config/sites.ts b/src/config/sites.ts index ba93cf9..7c15b7c 100644 --- a/src/config/sites.ts +++ b/src/config/sites.ts @@ -1,14 +1,13 @@ export const SiteConfig = { - name: "DiscovAI", - title: "DiscovAI", - metaTitle: "DiscovAI - Discover top ai tools best match your need", - desc: "Search over 15,349 top ai tools in our database by chatgpt, Discover the latest AI Products with detailed traffic data, best match your need", - panel: "Everything about AI", - subPanel: "Discover AI from over 15,349 tools for you need", + name: "DoktorNaDohled", + title: "DoktorNaDohled", + metaTitle: "DoktorNaDohled - Najděte vhodné zdravotní služby", + desc: "AI konverzační platforma zaměřená na poskytování relevantních informací a odpovědí na otázky uživatelů v oblasti zdravotnictví.", + panel: "Vše o zdravotnictví", + subPanel: "Najděte vhodné zdravotní služby a poskytovatele zdravotní péče", }; export const LinkConfig = { - site: "https://discovai.io", - github: "https://github.com/DiscovAI/DiscovAI-search", - twitter: "https://x.com/ruiyanghim", + site: "https://www.doktornadohled-digimedic.cz", + github: "https://github.com/DigiMedic/Doktor-Na-Dohled", }; diff --git a/src/db/init.sql b/src/db/init.sql index 551efb9..be6b435 100644 --- a/src/db/init.sql +++ b/src/db/init.sql @@ -2,38 +2,36 @@ CREATE EXTENSION IF NOT EXISTS "vector"; CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; -- content table -CREATE TABLE aitools ( +CREATE TABLE healthcare_providers ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - title TEXT NOT NULL, + name TEXT NOT NULL, url TEXT NOT NULL, description TEXT, - content TEXT, - screenshot_url TEXT, - full_content TEXT, - detail TEXT, - cat TEXT, - ext_info JSONB, - total_visits_last_three_months INT, - visits_last_month INT, - bounce_rate DECIMAL, - page_per_visit DECIMAL, - time_on_site DECIMAL, - traffic_detail JSON; + contact_info TEXT, + DruhZarizeni TEXT, + OborPece TEXT, + FormaPece TEXT, + DruhPece TEXT, + OdbornyZastupce TEXT, + address TEXT, + services TEXT, + ext_info JSONB ); + -- chunk table -CREATE TABLE aitools_chunk ( +CREATE TABLE healthcare_providers_chunk ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, chunk_text TEXT, metadata JSONB, - tool_id UUID NOT NULL, + provider_id UUID NOT NULL, embedding vector(768), - FOREIGN KEY (tool_id) REFERENCES aitools(id) + FOREIGN KEY (provider_id) REFERENCES healthcare_providers(id) ); -- hnsw index for query performance -create index on aitools_chunk using hnsw (embedding vector_l2_ops); +create index on healthcare_providers_chunk using hnsw (embedding vector_l2_ops); -- rpc function for supabase client create or replace function match_embeddings ( @@ -44,24 +42,47 @@ create or replace function match_embeddings ( returns table ( id UUID, metadata JSONB, - tool_id text, - chunk_text text, + provider_id UUID, + chunk_text TEXT, similarity float, - screenshot_url text + name TEXT, + url TEXT, + description TEXT, + contact_info TEXT, + DruhZarizeni TEXT, + OborPece TEXT, + FormaPece TEXT, + DruhPece TEXT, + OdbornyZastupce TEXT, + address TEXT, + services TEXT ) -language sql stable +language plpgsql as $$ +begin + return query select - aitools_chunk.id, - aitools_chunk.metadata, - aitools_chunk.tool_id, - aitools_chunk.chunk_text, - 1 - (aitools_chunk.embedding <=> query_embedding) as similarity, - aitools.screenshot_url - from aitools_chunk - join aitools on aitools_chunk.tool_id = aitools.id - where 1 - (aitools_chunk.embedding <=> query_embedding) > match_threshold - order by (aitools_chunk.embedding <=> query_embedding) asc + healthcare_providers_chunk.id, + healthcare_providers_chunk.metadata, + healthcare_providers_chunk.provider_id, + healthcare_providers_chunk.chunk_text, + 1 - (healthcare_providers_chunk.embedding <=> query_embedding) as similarity, + healthcare_providers.name, + healthcare_providers.url, + healthcare_providers.description, + healthcare_providers.contact_info, + healthcare_providers.DruhZarizeni, + healthcare_providers.OborPece, + healthcare_providers.FormaPece, + healthcare_providers.DruhPece, + healthcare_providers.OdbornyZastupce, + healthcare_providers.address, + healthcare_providers.services + from healthcare_providers_chunk + join healthcare_providers on healthcare_providers_chunk.provider_id = healthcare_providers.id + where 1 - (healthcare_providers_chunk.embedding <=> query_embedding) > match_threshold + order by similarity desc limit match_count; +end; $$; diff --git a/src/lib/chat/prompts.ts b/src/lib/chat/prompts.ts index 28a36a8..09a3146 100644 --- a/src/lib/chat/prompts.ts +++ b/src/lib/chat/prompts.ts @@ -1,33 +1,30 @@ export const CHAT_PROMPT = (contexts: string, query: string) => `\ -As a professional AI tool search expert. Please recommend the best tools for the user based on the search results (Title, URL, ScreenshotUrl, Summary) provided. +Jako profesionální asistent pro zdravotní péči doporučte nejlepší poskytovatele zdravotní péče pro uživatele na základě poskytnutých výsledků vyhledávání (Název, URL, Kontaktní informace, Druh zařízení, Obor péče, Forma péče, Druh péče, Odborný zástupce, Adresa, Služby). -You must only use the information in the search results provided.Use a professional tone. +Používejte pouze informace poskytnuté ve výsledcích vyhledávání. Používejte profesionální tón. -You must introduce each tool in context. -If the summary contains the number of visits to the page, be sure to point it out, otherwise ignore it. +Představte každého poskytovatele v kontextu. +Pokud souhrn obsahuje počet návštěv stránky, nezapomeňte na to upozornit, jinak to ignorujte. -You must cite the answer using [number] notation. You must cite sentences with their relevant citation number. Cite every part of the answer. -Place citations at the end of the sentence. You can do multiple citations in a row with the format [number1][number2]. +Musíte citovat odpověď pomocí notace [číslo]. Musíte citovat věty s příslušným číslem citace. Citujte každou část odpovědi. +Umístěte citace na konec věty. Můžete provést více citací za sebou ve formátu [číslo1][číslo2]. -Only cite the most relevant results that answer the question accurately. If different results refer to different entities with the same name, write separate answers for each entity. - -ONLY cite inline. -DO NOT include a reference section, DO NOT include URLs. -DO NOT repeat the question. -You can use markdown formatting. You should include bullets to list the information in your answer. -For each item, you must add the image of screenshotUrl below like this: -
{title} +Citujte pouze nejrelevantnější výsledky, které přesně odpovídají na otázku. Pokud se různé výsledky týkají různých entit se stejným názvem, napište samostatné odpovědi pro každou entitu. +Citujte POUZE v textu. +NEZAHRNUJTE sekci s odkazy, NEZAHRNUJTE URL. +NEOPAKUJTE otázku. +Můžete použít formátování markdown. Pro výpis informací v odpovědi byste měli použít odrážky. ${contexts} --------------------- -Make sure to match the language of the user's question. +Ujistěte se, že odpovídáte v jazyce uživatelovy otázky. -Question: ${query} -Answer (in the language of the user's question): \ +Otázka: ${query} +Odpověď (v jazyce uživatelovy otázky): \ `; export const RELATED_QUESTION_PROMPT = (context: string, query: string) => ` @@ -49,6 +46,6 @@ related_questions: A list of EXACTLY three concise, simple follow-up questions `; export const TRANSLATE = (query: string) => ` -Directly translate it to english, no other words. +Přeložte to přímo do češtiny. Question: ${query} `; diff --git a/tailwind.config.ts b/tailwind.config.ts index 0e9e5fe..61afd1d 100644 --- a/tailwind.config.ts +++ b/tailwind.config.ts @@ -87,6 +87,11 @@ const config = { "accordion-down": "accordion-down 0.2s ease-out", "accordion-up": "accordion-up 0.2s ease-out", }, + fontFamily: { + sans: ['Raleway', 'sans-serif'], + mono: ['"Space Mono"', 'monospace'], + alt: ['"Open Sans"', 'sans-serif'], + }, }, }, plugins: [ From 0991f73990eb9dae8d5429cd96028b2a3a6e6da9 Mon Sep 17 00:00:00 2001 From: petrsovadina Date: Sun, 22 Sep 2024 23:54:17 +0200 Subject: [PATCH 2/3] 1 --- package.json | 1 + src/app/api/chat/route.ts | 202 +++++++++++++++++++++++++++--- src/components/nav.tsx | 9 +- src/components/search-results.tsx | 94 ++++---------- src/db/init.sql | 142 +++++++++++---------- src/lib/chat/prompts.ts | 5 +- 6 files changed, 288 insertions(+), 165 deletions(-) diff --git a/package.json b/package.json index 7069c9c..809eba4 100644 --- a/package.json +++ b/package.json @@ -2,6 +2,7 @@ "name": "discovai-search", "version": "0.1.0", "private": true, + "type": "module", "scripts": { "dev": "next dev", "build": "next build", diff --git a/src/app/api/chat/route.ts b/src/app/api/chat/route.ts index ae1debb..3aa2291 100644 --- a/src/app/api/chat/route.ts +++ b/src/app/api/chat/route.ts @@ -1,7 +1,7 @@ // app/api/stream/route.ts import { embeddingVectorCacheKey, llmResultCacheKey, redis } from "@/db/redis"; -import { supabase } from "@/db/supabase"; +import { createClient } from '@supabase/supabase-js' import { generateQueyEmbedding } from "@/lib/chat/embedding"; import { genLLMTextChunk, translate } from "@/lib/chat/llm"; import { addRefToUrl, genStream, sleep } from "@/lib/utils"; @@ -9,6 +9,10 @@ import { StreamEvent } from "@/schema/chat"; import { PostgrestError } from "@supabase/supabase-js"; import { NextRequest } from "next/server"; +const supabaseUrl = process.env.NEXT_PUBLIC_SUPABASE_URL +const supabaseKey = process.env.NEXT_PUBLIC_SUPABASE_ANON_KEY +const supabase = createClient(supabaseUrl, supabaseKey) + export async function POST(req: NextRequest) { const body = await req.json(); const { query } = body; @@ -25,10 +29,10 @@ export async function POST(req: NextRequest) { embeddingVectorCacheKey(query) ); - let documents: any[], queryEmbeddingError: PostgrestError | null = null; + let documents: any[], queryEmbeddingError: PostgrestError; if (cacheResult) { - documents = JSON.parse(cacheResult); + documents = cacheResult; console.log("search result", "cached"); } else { // match documents @@ -36,16 +40,33 @@ export async function POST(req: NextRequest) { await translate({ query }) ); - let result = await supabase.rpc("match_embeddings", { - query_embedding: embedding, - match_threshold: 0.78, - match_count: 15, - }); - - if (result.error) { - queryEmbeddingError = result.error; - } else { - documents = result.data || []; + let result; + try { + console.log("Volání match_embeddings s parametry:", { + query_embedding: embedding, + match_threshold: 0.78, + match_count: 15, + }); + result = await supabase.rpc("match_embeddings", { + query_embedding: embedding, + match_threshold: 0.78, + match_count: 15, + }); + console.log("Výsledek volání match_embeddings:", JSON.stringify(result, null, 2)); + if (result.error) throw result.error; + documents = result.data; + } catch (error) { + console.error("Chyba při volání match_embeddings:", JSON.stringify(error, null, 2)); + controller.enqueue( + genStream({ + event: StreamEvent.ERROR, + data: { + event_type: StreamEvent.ERROR, + detail: "Chyba při vyhledávání relevantních dokumentů. Prosím, zkuste to znovu později.", + }, + }) + ); + return; // Ukončete funkci zde, aby se nepokračovalo s prázdnými dokumenty } } @@ -56,11 +77,160 @@ export async function POST(req: NextRequest) { event: StreamEvent.ERROR, data: { event_type: StreamEvent.ERROR, - detail: "Error on query embeddings: " + queryEmbeddingError.message, + detail: "error on query embeddings", }, }) ); - return; // Don't close the controller here + return; // Místo controller.close() použijte return + } + redis.setex( + embeddingVectorCacheKey(query), + 60 * 60 * 24, // 1 day + JSON.stringify(documents) + ); + // filter for unique docs + const uniqueDocuments = documents && documents.length > 0 + ? [...new Set(documents.map((tool) => tool.metadata.url))] + .map((url) => documents.find((tool) => tool.metadata.url === url)) + : []; + + for (let doc of uniqueDocuments) { + doc.metadata.url = addRefToUrl(doc.metadata.url); } - // Pokračujte se zbytkem kódu... \ No newline at end of file + documents = uniqueDocuments.slice(0, 5); + + const searchResult = documents.map((d) => { + return { + title: d.nazevzarizeni, + url: d.poskytovatelweb, + description: `${d.druhzarizeni} - ${d.oborpece}`, + address: `${d.ulice} ${d.cislodomovniorientacni}, ${d.obec}, ${d.psc}`, + contact: { + phone: d.poskytovateltelefon, + email: d.poskytovateljmail, + }, + specialization: { + formapece: d.formapece, + druhpece: d.druhpece, + odbornyzastupce: d.odbornyzastupce, + }, + region: { + kraj: d.kraj, + okres: d.okres, + }, + ico: d.ico, + }; + }); + + controller.enqueue( + genStream({ + event: StreamEvent.SEARCH_RESULTS, + data: { + event_type: StreamEvent.SEARCH_RESULTS, + results: searchResult.map((result) => ({ + ...result, + content: result.description, // Přidání chybějící vlastnosti content + })), + images: uniqueDocuments.map((r) => r.screenshot_url), + }, + }) + ); + // stream llm text chunk + const llmKey = llmResultCacheKey(query); + const llmCache: string | null = await redis.get(llmKey); + let gathered = ""; + if (llmCache) { + console.log("llm result cache", "cached"); + gathered = llmCache; + // simulate stream + let cacheArray = llmCache.split(" "); + for await (const c of cacheArray) { + await sleep(10); + controller.enqueue( + genStream({ + event: StreamEvent.TEXT_CHUNK, + data: { + event_type: StreamEvent.TEXT_CHUNK, + text: c + " ", + }, + }) + ); + } + } else { + const stream = await genLLMTextChunk({ + query, + contexts: documents, + }); + for await (const chunk of stream.textStream) { + controller.enqueue( + genStream({ + event: StreamEvent.TEXT_CHUNK, + data: { + event_type: StreamEvent.TEXT_CHUNK, + text: chunk, + }, + }) + ); + gathered += chunk; + } + } + redis.setex(llmKey, 60 * 60 * 12, gathered); + // more results or related query + const moreTools = uniqueDocuments.slice(5); + controller.enqueue( + genStream({ + event: StreamEvent.MORE_RESULTS, + data: { + event_type: StreamEvent.MORE_RESULTS, + more_results: moreTools.map((d) => ({ + title: d.metadata.title, + url: d.metadata.url, + screenshot_url: d.screenshot_url, + })), + }, + }) + ); + + controller.enqueue( + genStream({ + event: StreamEvent.FINAL_RESPONSE, + data: { + event_type: StreamEvent.FINAL_RESPONSE, + message: gathered, + }, + }) + ); + + controller.enqueue( + genStream({ + event: StreamEvent.STREAM_END, + data: { event_type: StreamEvent.STREAM_END, thread_id: null }, + }) + ); + + controller.close(); + } catch (error) { + console.error(error); + controller.enqueue( + genStream({ + event: StreamEvent.ERROR, + data: { + event_type: StreamEvent.ERROR, + detail: "Oops~", + }, + }) + ); + controller.close(); + } + }, + }); + + return new Response(customReadable, { + headers: { + "Content-Type": "text/event-stream", + "Cache-Control": "no-cache", + Connection: "keep-alive", + }, + }); +} \ No newline at end of file diff --git a/src/components/nav.tsx b/src/components/nav.tsx index 2416019..3725a1e 100644 --- a/src/components/nav.tsx +++ b/src/components/nav.tsx @@ -37,12 +37,9 @@ export function Navbar() { return (
- (location.href = "/")}> - DigiMedic Logo + + Logo + {SiteConfig.name} {onHomePage ? : }
diff --git a/src/components/search-results.tsx b/src/components/search-results.tsx index 1900fa3..a62a4c3 100644 --- a/src/components/search-results.tsx +++ b/src/components/search-results.tsx @@ -39,77 +39,33 @@ export const Logo = ({ url }: { url: string }) => { }; export function SearchResults({ results }: { results: SearchResult[] }) { - const [showAll, setShowAll] = useState(false); - - const displayedResults = showAll ? results : results.slice(0, 3); - const additionalCount = results.length > 3 ? results.length - 3 : 0; - return ( -
- {displayedResults.map(({ title, url, content, description, contact_info, druh_zarizeni, obor_pece, forma_pece, druh_pece, odborny_zastupce, address, services }, index) => { - const formattedUrl = new URL(url).hostname.split(".").slice(-2, -1)[0]; - - return ( - - - - - -
-
-

{title}

-

{druh_zarizeni}

-

{obor_pece}

-

{forma_pece}

-

{druh_pece}

-

Odborný zástupce: {odborny_zastupce}

-

Kontakt: {contact_info}

-

Adresa: {address}

-

Služby: {services}

- - {content} - -
-
-
-
- ); - })} - {!showAll && additionalCount > 0 && ( -
setShowAll(true)} - > - - -

Zobrazit dalších {additionalCount}

-
-
+
+ {results.map((result, index) => ( +
+

{result.title}

+

{result.description}

+

{result.address}

+

+ Telefon: {result.contact.phone}, Email: {result.contact.email} +

+

+ Forma péče: {result.specialization.FormaPece}, Druh péče: {result.specialization.DruhPece} +

+

+ Odborný zástupce: {result.specialization.OdbornyZastupce} +

+

+ Kraj: {result.region.Kraj}, Okres: {result.region.Okres} +

+

IČO: {result.Ico}

+ {result.url && ( + + Webové stránky + + )}
- )} + ))}
); } diff --git a/src/db/init.sql b/src/db/init.sql index be6b435..dba4872 100644 --- a/src/db/init.sql +++ b/src/db/init.sql @@ -1,88 +1,86 @@ CREATE EXTENSION IF NOT EXISTS "vector"; CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; --- content table -CREATE TABLE healthcare_providers ( +-- Upravíme tabulku healthcareprovidors +CREATE TABLE IF NOT EXISTS healthcareprovidors ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - name TEXT NOT NULL, - url TEXT NOT NULL, - description TEXT, - contact_info TEXT, - DruhZarizeni TEXT, - OborPece TEXT, - FormaPece TEXT, - DruhPece TEXT, - OdbornyZastupce TEXT, - address TEXT, - services TEXT, - ext_info JSONB + nazevzarizeni TEXT, + druhzarizeni TEXT, + obec TEXT, + psc BIGINT, + ulice TEXT, + cislodomovniorientacni TEXT, + kraj TEXT, + okres TEXT, + poskytovateltelefon TEXT, + poskytovateljfax TEXT, + poskytovateljmail TEXT, + poskytovatelweb TEXT, + ico BIGINT, + oborpece TEXT, + formapece TEXT, + druhpece TEXT, + odbornyzastupce TEXT, + embedding vector(768) ); --- chunk table -CREATE TABLE healthcare_providers_chunk ( - id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - chunk_text TEXT, - metadata JSONB, - provider_id UUID NOT NULL, - embedding vector(768), - FOREIGN KEY (provider_id) REFERENCES healthcare_providers(id) -); - --- hnsw index for query performance -create index on healthcare_providers_chunk using hnsw (embedding vector_l2_ops); +-- Vytvoříme index pro vektorové vyhledávání +CREATE INDEX IF NOT EXISTS healthcareprovidors_embedding_idx ON healthcareprovidors USING hnsw (embedding vector_l2_ops); --- rpc function for supabase client -create or replace function match_embeddings ( +-- Upravíme funkci match_embeddings +CREATE OR REPLACE FUNCTION match_embeddings ( query_embedding vector(768), match_threshold float, match_count int ) -returns table ( +RETURNS TABLE ( id UUID, - metadata JSONB, - provider_id UUID, - chunk_text TEXT, - similarity float, - name TEXT, - url TEXT, - description TEXT, - contact_info TEXT, - DruhZarizeni TEXT, - OborPece TEXT, - FormaPece TEXT, - DruhPece TEXT, - OdbornyZastupce TEXT, - address TEXT, - services TEXT + nazevzarizeni TEXT, + druhzarizeni TEXT, + obec TEXT, + psc BIGINT, + ulice TEXT, + cislodomovniorientacni TEXT, + kraj TEXT, + okres TEXT, + poskytovateltelefon TEXT, + poskytovateljmail TEXT, + poskytovatelweb TEXT, + ico BIGINT, + oborpece TEXT, + formapece TEXT, + druhpece TEXT, + odbornyzastupce TEXT, + similarity float ) -language plpgsql -as $$ -begin - return query - select - healthcare_providers_chunk.id, - healthcare_providers_chunk.metadata, - healthcare_providers_chunk.provider_id, - healthcare_providers_chunk.chunk_text, - 1 - (healthcare_providers_chunk.embedding <=> query_embedding) as similarity, - healthcare_providers.name, - healthcare_providers.url, - healthcare_providers.description, - healthcare_providers.contact_info, - healthcare_providers.DruhZarizeni, - healthcare_providers.OborPece, - healthcare_providers.FormaPece, - healthcare_providers.DruhPece, - healthcare_providers.OdbornyZastupce, - healthcare_providers.address, - healthcare_providers.services - from healthcare_providers_chunk - join healthcare_providers on healthcare_providers_chunk.provider_id = healthcare_providers.id - where 1 - (healthcare_providers_chunk.embedding <=> query_embedding) > match_threshold - order by similarity desc - limit match_count; -end; +LANGUAGE plpgsql +AS $$ +BEGIN + RETURN QUERY + SELECT + healthcareprovidors.id, + healthcareprovidors.nazevzarizeni, + healthcareprovidors.druhzarizeni, + healthcareprovidors.obec, + healthcareprovidors.psc, + healthcareprovidors.ulice, + healthcareprovidors.cislodomovniorientacni, + healthcareprovidors.kraj, + healthcareprovidors.okres, + healthcareprovidors.poskytovateltelefon, + healthcareprovidors.poskytovateljmail, + healthcareprovidors.poskytovatelweb, + healthcareprovidors.ico, + healthcareprovidors.oborpece, + healthcareprovidors.formapece, + healthcareprovidors.druhpece, + healthcareprovidors.odbornyzastupce, + 1 - (healthcareprovidors.embedding <=> query_embedding) AS similarity + FROM healthcareprovidors + WHERE 1 - (healthcareprovidors.embedding <=> query_embedding) > match_threshold + ORDER BY similarity DESC + LIMIT match_count; +END; $$; diff --git a/src/lib/chat/prompts.ts b/src/lib/chat/prompts.ts index 09a3146..2261f41 100644 --- a/src/lib/chat/prompts.ts +++ b/src/lib/chat/prompts.ts @@ -1,10 +1,11 @@ export const CHAT_PROMPT = (contexts: string, query: string) => `\ -Jako profesionální asistent pro zdravotní péči doporučte nejlepší poskytovatele zdravotní péče pro uživatele na základě poskytnutých výsledků vyhledávání (Název, URL, Kontaktní informace, Druh zařízení, Obor péče, Forma péče, Druh péče, Odborný zástupce, Adresa, Služby). +Jako profesionální asistent pro zdravotní péči doporučte nejlepší poskytovatele zdravotní péče pro uživatele na základě poskytnutých výsledků vyhledávání (NazevZarizeni, DruhZarizeni, OborPece, FormaPece, DruhPece, OdbornyZastupce, Adresa, Kontaktní informace). Používejte pouze informace poskytnuté ve výsledcích vyhledávání. Používejte profesionální tón. Představte každého poskytovatele v kontextu. -Pokud souhrn obsahuje počet návštěv stránky, nezapomeňte na to upozornit, jinak to ignorujte. +Pokud je to relevantní, zmiňte specializaci, formu a druh péče. +Uveďte adresu a kontaktní informace pro každého poskytovatele. Musíte citovat odpověď pomocí notace [číslo]. Musíte citovat věty s příslušným číslem citace. Citujte každou část odpovědi. Umístěte citace na konec věty. Můžete provést více citací za sebou ve formátu [číslo1][číslo2]. From 3fefe22cd76fd0e2dc0c3a897e047eec529de771 Mon Sep 17 00:00:00 2001 From: petrsovadina Date: Mon, 23 Sep 2024 00:41:32 +0200 Subject: [PATCH 3/3] 2 --- src/app/api/chat/route.ts | 12 ++++++++++++ src/components/search-results.tsx | 8 ++++---- src/schema/chat.ts | 15 +++++++++++++++ 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/src/app/api/chat/route.ts b/src/app/api/chat/route.ts index 3aa2291..fb443bd 100644 --- a/src/app/api/chat/route.ts +++ b/src/app/api/chat/route.ts @@ -13,7 +13,19 @@ const supabaseUrl = process.env.NEXT_PUBLIC_SUPABASE_URL const supabaseKey = process.env.NEXT_PUBLIC_SUPABASE_ANON_KEY const supabase = createClient(supabaseUrl, supabaseKey) +// Přidejte tuto funkci pro testování připojení +async function testSupabaseConnection() { + try { + const { data, error } = await supabase.from('healthcareprovidors').select('*').limit(1) + if (error) throw error + console.log('Úspěšné připojení k Supabase:', data) + } catch (error) { + console.error('Chyba při připojení k Supabase:', error) + } +} + export async function POST(req: NextRequest) { + await testSupabaseConnection() const body = await req.json(); const { query } = body; diff --git a/src/components/search-results.tsx b/src/components/search-results.tsx index a62a4c3..cf2eb8a 100644 --- a/src/components/search-results.tsx +++ b/src/components/search-results.tsx @@ -50,15 +50,15 @@ export function SearchResults({ results }: { results: SearchResult[] }) { Telefon: {result.contact.phone}, Email: {result.contact.email}

- Forma péče: {result.specialization.FormaPece}, Druh péče: {result.specialization.DruhPece} + Forma péče: {result.specialization.formapece}, Druh péče: {result.specialization.druhpece}

- Odborný zástupce: {result.specialization.OdbornyZastupce} + Odborný zástupce: {result.specialization.odbornyzastupce}

- Kraj: {result.region.Kraj}, Okres: {result.region.Okres} + Kraj: {result.region.kraj}, Okres: {result.region.okres}

-

IČO: {result.Ico}

+

IČO: {result.ico}

{result.url && ( Webové stránky diff --git a/src/schema/chat.ts b/src/schema/chat.ts index 43d7bc5..d799159 100644 --- a/src/schema/chat.ts +++ b/src/schema/chat.ts @@ -88,6 +88,21 @@ export type SearchResult = { url: string; content: string; description: string; + address: string; + contact: { + phone: string; + email: string; + }; + specialization: { + formapece: string; + druhpece: string; + odbornyzastupce: string; + }; + region: { + kraj: string; + okres: string; + }; + ico: number; }; export type SearchResultStream = {