{"id":243,"date":"2026-03-09T08:22:15","date_gmt":"2026-03-09T08:22:15","guid":{"rendered":"https:\/\/blog.rebalai.com\/ko\/2026\/03\/09\/rag-2\/"},"modified":"2026-03-09T22:30:55","modified_gmt":"2026-03-09T22:30:55","slug":"rag-2","status":"publish","type":"post","link":"https:\/\/blog.rebalai.com\/ko\/2026\/03\/09\/rag-2\/","title":{"rendered":"RAG \uc81c\ub300\ub85c \uc4f0\uae30: \uccad\ud0b9\ubd80\ud130 \uac80\uc0c9 \ucd5c\uc801\ud654\uae4c\uc9c0, 2\uc8fc \uc0bd\uc9c8\uc758 \uae30\ub85d"},"content":{"rendered":"<p>\uc791\ub144 11\uc6d4\uc5d0 \uc0ac\ub0b4 \ubb38\uc11c \uac80\uc0c9 \uc2dc\uc2a4\ud15c\uc744 \ub9cc\ub4e4\uc5c8\ub294\ub370, \ucd08\ubc18\uc5d0 \uc644\uc804\ud788 \ub9dd\ud588\ub2e4. GPT-4\ub97c \uc4f0\ub294\ub370\ub3c4 \ub2f5\ubcc0\uc774 \uc5c9\ud130\ub9ac\uc600\ub2e4. \uc54c\uace0 \ubcf4\ub2c8 \ubb38\uc81c\ub294 \ubaa8\ub378\uc774 \uc544\ub2c8\ub77c \uac80\uc0c9 \uc790\uccb4\uc600\ub2e4 \u2014 \uad00\ub828 \uc5c6\ub294 \uccad\ud06c\uac00 \ucee8\ud14d\uc2a4\ud2b8\uc5d0 \uc794\ub729 \ub4e4\uc5b4\uac00\uace0 \uc788\uc5c8\ub2e4. \uadf8\ub54c\ubd80\ud130 <a href=\"https:\/\/blog.rebalai.com\/ko\/2026\/03\/08\/2026-github-copilot-cursor-codeium-tabnine-amazo\/\" title=\"2\uc8fc \ub3d9\uc548\">2\uc8fc \ub3d9\uc548<\/a> \uccad\ud0b9 \uc804\ub7b5, \ubca1\ud130 DB, \uac80\uc0c9 \ud30c\uc774\ud504\ub77c\uc778\uc744 \ub72f\uc5b4\uace0\uce58\uba74\uc11c \ubc30\uc6b4 \uac83\ub4e4\uc744 \uc5ec\uae30 \uc815\ub9ac\ud574\ubcf8\ub2e4.<\/p>\n<h2>\uccad\ud0b9\uc774 \uac80\uc0c9 \ud488\uc9c8\uc758 70%\ub97c \uacb0\uc815\ud55c\ub2e4<\/h2>\n<p>\uc194\uc9c1\ud788 \ucc98\uc74c\uc5d4 \uccad\ud0b9\uc744 \ub300\ucda9 \uc0dd\uac01\ud588\ub2e4. &#8220;\uadf8\ub0e5 1000 \ud1a0\ud070\uc73c\ub85c \uc790\ub974\uba74 \ub418\uc9c0 \uc54a\ub098?&#8221; \ud588\ub294\ub370, \uc774\uac8c \uac00\uc7a5 \ud070 \ucc29\uac01\uc774\uc5c8\ub2e4.<\/p>\n<p>\uccad\ud0b9\uc758 \ud575\uc2ec \ub51c\ub808\ub9c8\ub294 \uc774\uac70\ub2e4. \uccad\ud06c\uac00 \ub108\ubb34 \ud06c\uba74 \uc784\ubca0\ub529\uc774 \uc758\ubbf8\ub97c \ud76c\uc11d\uc2dc\ud0a4\uace0, \ub108\ubb34 \uc791\uc73c\uba74 \ucee8\ud14d\uc2a4\ud2b8\uac00 \ubd80\uc871\ud574\uc11c LLM\uc774 \ub2f5\ubcc0\uc744 \uc81c\ub300\ub85c \ubabb \ub9cc\ub4e0\ub2e4. \uc5ec\uae30\uc11c \ubcf4\ud1b5 \ub450 \uac00\uc9c0 \uc2e4\uc218\ub97c \ud55c\ub2e4 \u2014 \uccad\ud06c \ud06c\uae30\ub9cc \uc870\uc815\ud558\uac70\ub098, overlap\uc744 \uc544\uc608 \uc548 \uc8fc\uac70\ub098.<\/p>\n<p>\ub0b4 \ucf00\uc774\uc2a4\ub294 \uc0ac\ub0b4 \uae30\uc220 \ubb38\uc11c(\ub9c8\ud06c\ub2e4\uc6b4 \ud30c\uc77c\ub4e4, \uc57d 2\ub9cc \ud398\uc774\uc9c0)\uc600\ub2e4. \ucc98\uc74c\uc5d0 <code>RecursiveCharacterTextSplitter<\/code>\ub85c 500\uc790\uc529 \uc798\ub790\ub354\ub2c8 \ubb38\uc7a5 \uc911\uac04\uc5d0\uc11c \uc798\ub9ac\ub294 \uacbd\uc6b0\uac00 \ub9ce\uc558\ub2e4. \ud2b9\ud788 \ucf54\ub4dc \ube14\ub85d\uc774 \uc798\ub9ac\uba74 \uadf8 \uccad\ud06c\ub294 \uac80\uc0c9\uc5d0\uc11c \uc644\uc804\ud788 \uc4f8\ubaa8\uc5c6\uc5b4\uc9c4\ub2e4.<\/p>\n<p>Fixed-size chunking\ubd80\ud130 \uc2dc\uc791\ud588\ub2e4. \ube60\ub974\uae34 \ud55c\ub370 \uc758\ubbf8 \ub2e8\uc704\ub97c \ubb34\uc2dc\ud55c\ub2e4. \ud14c\uc774\ube14\uc774\ub098 \ucf54\ub4dc \ube14\ub85d\uc774 \uc788\ub294 \ubb38\uc11c\uc5d4 \ucd5c\uc545\uc774\uc5c8\ub2e4 \u2014 \ucf54\ub4dc\uac00 \ub531 \uc808\ubc18\uc5d0\uc11c \uc798\ub824\uc11c \uac80\uc0c9 \uacb0\uacfc\uc5d0 \uc62c\ub77c\uc624\uba74 LLM\uc744 \ud63c\ub780\uc2a4\ub7fd\uac8c \ub9cc\ub4e0\ub2e4.<\/p>\n<p>\uadf8 \ub2e4\uc74c\uc5d4 LangChain\uc758 <code>SemanticChunker<\/code>\ub97c \uc368\ubd24\ub2e4. \uc784\ubca0\ub529 \uae30\ubc18\uc73c\ub85c \uc758\ubbf8 \ubcc0\ud654\uac00 \uac10\uc9c0\ub418\ub294 \uc9c0\uc810\uc5d0\uc11c \uc790\ub974\ub294 \ubc29\uc2dd\uc778\ub370, \uacb0\uacfc \ud488\uc9c8\uc740 \ud655\uc2e4\ud788 \uc88b\uc558\ub2e4. \ubb38\uc81c\ub294 \uc18d\ub3c4\uc600\ub2e4 \u2014 \uccad\ud06c \ud558\ub098 \ub9cc\ub4e4 \ub54c\ub9c8\ub2e4 \uc784\ubca0\ub529 API\ub97c \uc3d8\ub2e4 \ubcf4\ub2c8 \uc778\ub371\uc2f1\uc774 \ub108\ubb34 \ub290\ub838\ub2e4. \uadf8\ub9ac\uace0 \uccad\ud06c \ud06c\uae30\uac00 \ub4e4\ucb49\ub0a0\ucb49\ud574\uc11c \ubca1\ud130 DB \uc778\ub371\uc2f1\ub3c4 \uc880 \ube44\ud6a8\uc728\uc801\uc774\uc5c8\ub2e4.<\/p>\n<p>\uacb0\uad6d \uc815\ucc29\ud55c \uac74 \uad6c\uc870 \uae30\ubc18 chunking\uc774\ub2e4. \ub9c8\ud06c\ub2e4\uc6b4 \ud5e4\ub354\ub97c \uae30\uc900\uc73c\ub85c \uc790\ub974\uace0, \uadf8 \uc548\uc5d0\uc11c \ub2e4\uc2dc \ucd5c\ub300 \uae38\uc774 \uc81c\ud55c\uc744 \uac70\ub294 \ubc29\uc2dd. \uad6c\ud604\uc774 \ub2e8\uc21c\ud558\uace0 \uc608\uce21 \uac00\ub2a5\ud558\ub2e4\ub294 \uac8c \uc7a5\uc810\uc774\ub2e4.<\/p>\n<pre><code class=\"language-python\">from langchain.text_splitter import MarkdownHeaderTextSplitter, RecursiveCharacterTextSplitter\n\n# 1\ub2e8\uacc4: \ud5e4\ub354 \uae30\ubc18\uc73c\ub85c \ubb38\uc11c \uad6c\uc870 \ubd84\ub9ac\nheader_splitter = MarkdownHeaderTextSplitter(\n    headers_to_split_on=[\n        (&quot;#&quot;, &quot;h1&quot;),\n        (&quot;##&quot;, &quot;h2&quot;),\n        (&quot;###&quot;, &quot;h3&quot;),\n    ],\n    strip_headers=False  # \ud5e4\ub354 \ud14d\uc2a4\ud2b8\ub97c \uccad\ud06c\uc5d0 \ub0a8\uaca8\ub454\ub2e4 \u2014 \ucee8\ud14d\uc2a4\ud2b8\uc5d0 \uc911\uc694\n)\n\n# 2\ub2e8\uacc4: \ub108\ubb34 \ud070 \uc139\uc158\uc740 \ub2e4\uc2dc \uc790\ub974\ub418, \ucf54\ub4dc \ube14\ub85d \uacbd\uacc4\ub97c \uc6b0\uc120\uc2dc\nsecondary_splitter = RecursiveCharacterTextSplitter(\n    chunk_size=800,\n    chunk_overlap=100,\n    separators=[&quot;\\n```\\n&quot;, &quot;\\n\\n&quot;, &quot;\\n&quot;, &quot; &quot;],  # \ucf54\ub4dc \ube14\ub85d \uacbd\uacc4 \uc6b0\uc120\n)\n\ndef chunk_document(text: str, source_metadata: dict) -&gt; list[dict]:\n    header_chunks = header_splitter.split_text(text)\n    final_chunks = []\n\n    for chunk in header_chunks:\n        if len(chunk.page_content) &gt; 800:\n            sub_chunks = secondary_splitter.split_text(chunk.page_content)\n            for sub in sub_chunks:\n                final_chunks.append({\n                    &quot;content&quot;: sub,\n                    &quot;metadata&quot;: {**source_metadata, **chunk.metadata}\n                })\n        else:\n            final_chunks.append({\n                &quot;content&quot;: chunk.page_content,\n                &quot;metadata&quot;: {**source_metadata, **chunk.metadata}\n            })\n\n    return final_chunks\n<\/code><\/pre>\n<p>\uc774 \ubc29\uc2dd\uc73c\ub85c \ubc14\uafbc \ud6c4 \uac80\uc0c9 \uad00\ub828\uc131\uc774 \ub208\uc5d0 \ub744\uac8c \uc62c\ub77c\uac14\ub2e4. \ud2b9\ud788 &#8220;XX \uae30\ub2a5\uc758 \uc124\uc815 \ubc29\ubc95&#8221;\ucc98\ub7fc \ud2b9\uc815 \uc139\uc158\uc744 \ucc3e\ub294 \ucffc\ub9ac\uc5d0\uc11c \ud6a8\uacfc\uac00 \ucef8\ub2e4.<\/p>\n<p>\ud55c \uac00\uc9c0 \ud301: <code>strip_headers=False<\/code>\ub85c \ud5e4\ub354\ub97c \uccad\ud06c \uc548\uc5d0 \ub0a8\uaca8\ub450\ub294 \uac8c \uc911\uc694\ud558\ub2e4. \ub098\uc911\uc5d0 \uac80\uc0c9\ud588\uc744 \ub54c &#8220;\uc774 \uccad\ud06c\uac00 \uc5b4\ub5a4 \uc139\uc158 \uc18c\uc18d\uc778\uc9c0&#8221;\uac00 LLM\ud55c\ud14c \ud78c\ud2b8\uac00 \ub41c\ub2e4. \ucc98\uc74c\uc5d4 <code>True<\/code>\ub85c \ub480\ub2e4\uac00 \uac80\uc0c9 \ud488\uc9c8\uc774 \ubbf8\ubb18\ud558\uac8c \ub5a8\uc5b4\uc9c0\ub294 \uac78 \ubcf4\uace0 \ubc14\uafe8\ub2e4 \u2014 \ud5e4\ub354\uac00 \uc5c6\uc73c\uba74 \uccad\ud06c\uac00 \uace0\uc544\ucc98\ub7fc \ub5a0\ub2e4\ub2cc\ub2e4.<\/p>\n<h2>\ubca1\ud130 DB \uc120\ud0dd \u2014 \ub0b4\uac00 \uc0bd\uc9c8\ud55c \uc774\uc57c\uae30<\/h2>\n<p>\ud300\uc774 3\uba85\uc778 \uc2a4\ud0c0\ud2b8\uc5c5 \ud658\uacbd\uc5d0\uc11c \ubca1\ud130 DB\ub97c \uace0\ub97c \ub54c \uace0\ub824\ud560 \uac83\ub4e4\uc774 \uc0dd\uac01\ubcf4\ub2e4 \ub9ce\ub2e4. \ub098\ub294 Pinecone \u2192 Weaviate \u2192 pgvector \uc21c\uc11c\ub85c \uac14\ub2e4.<\/p>\n<p><strong>Pinecone<\/strong>: \ucc98\uc74c\uc5d4 \ud3b8\ud588\ub2e4. \uad00\ub9ac\ud615 \uc11c\ube44\uc2a4\ub77c \uc778\ud504\ub77c \uac71\uc815\uc774 \uc5c6\uace0, SDK\ub3c4 \uae54\ub054\ud558\ub2e4. \uadfc\ub370 \ubb38\uc11c\uac00 100\ub9cc \uac1c \ub118\uc5b4\uac00\ub2c8\uae4c \ube44\uc6a9\uc774 \uae09\uaca9\ud788 \uc62c\ub77c\uac14\ub2e4. \uadf8\ub9ac\uace0 \u2014 \uc774\uac74 \ub098\ub9cc \ub290\ub080 \uac74\uc9c0 \ubaa8\ub974\uaca0\ub294\ub370 \u2014 \uba54\ud0c0\ub370\uc774\ud130 \ud544\ud130\ub9c1\uc774 \uc0dd\uac01\ubcf4\ub2e4 \uc81c\ud55c\uc801\uc774\uc5c8\ub2e4. \ubcf5\uc7a1\ud55c \uc870\uac74\uc744 \uac78\uba74 \ucffc\ub9ac\uac00 \ub290\ub824\uc84c\ub2e4.<\/p>\n<p><strong>Weaviate<\/strong>: \uae30\ub2a5\uc774 \ub9ce\ub2e4. Hybrid search\uac00 \ub0b4\uc7a5\ub418\uc5b4 \uc788\uace0, GraphQL API\ub3c4 \uc788\ub2e4. \uadfc\ub370 self-hosted\ub85c \uc6b4\uc601\ud560 \ub54c \uc124\uc815\uc774 \ubcf5\uc7a1\ud558\uace0, \uba54\ubaa8\ub9ac\ub97c \uaf64 \ub9ce\uc774 \uba39\ub294\ub2e4. \ub3c4\ucee4\ub85c \uc62c\ub838\uc744 \ub54c 8GB \ucee8\ud14c\uc774\ub108\uac00 \uae30\ubcf8\uc774\uc5c8\ub2e4. \uc791\uc740 \ud300\ud55c\ud14c\ub294 \uad00\ub9ac \ubd80\ub2f4\uc774 \uc880 \uc788\uc5c8\ub2e4.<\/p>\n<p><strong>pgvector<\/strong>: \uacb0\uad6d \uc5ec\uae30\ub85c \uc654\ub2e4. \uc774\ubbf8 PostgreSQL\uc744 \uc4f0\uace0 \uc788\uc73c\ub2c8\uae4c \ucd94\uac00 \uc778\ud504\ub77c\uac00 \uc5c6\ub2e4\ub294 \uac8c \uacb0\uc815\uc801\uc774\uc5c8\ub2e4. <code>pgvector<\/code> 0.5.0\ubd80\ud130 HNSW \uc778\ub371\uc2a4\uac00 \ub4e4\uc5b4\uc624\uba74\uc11c \uc131\ub2a5\uc774 \ub9ce\uc774 \uc62c\ub77c\uc654\ub2e4. \ubc31\ub9cc \uac74 \uc774\ud558 \uaddc\ubaa8\uc5d0\uc11c\ub294 \ucda9\ubd84\ud788 \ube60\ub974\ub2e4.<\/p>\n<p>\ub0b4\uac00 \ud588\ub358 \uc2e4\uc218 \ud558\ub098\ub97c \uacf5\uc720\ud558\uc790\uba74 \u2014 pgvector\uc5d0\uc11c \ucc98\uc74c\uc5d0 IVFFlat \uc778\ub371\uc2a4\ub97c \uc37c\ub294\ub370, \ub370\uc774\ud130\ub97c \ub2e4 \ub123\uae30 \uc804\uc5d0 \uc778\ub371\uc2a4\ub97c \ub9cc\ub4e4\uc5c8\ub2e4. IVFFlat\uc740 \uc778\ub371\uc2a4 \uc0dd\uc131 \uc2dc\uc810\uc758 \ub370\uc774\ud130 \ubd84\ud3ec\ub97c \uae30\ubc18\uc73c\ub85c \ud074\ub7ec\uc2a4\ud130\ub97c \ub9cc\ub4e4\uae30 \ub54c\ubb38\uc5d0, \ub098\uc911\uc5d0 \ub370\uc774\ud130\uac00 \ub9ce\uc774 \ucd94\uac00\ub418\uba74 \uac80\uc0c9 \ud488\uc9c8\uc774 \ub5a8\uc5b4\uc9c4\ub2e4. HNSW\ub294 \uc774 \ubb38\uc81c\uac00 \uc5c6\ub2e4. \uc9c0\uae08\uc740 \ubb34\uc870\uac74 HNSW\ub97c \uc4f4\ub2e4.<\/p>\n<pre><code class=\"language-sql\">-- IVFFlat \ub300\uc2e0 \uc774\uac78 \uc4f0\uc790\nCREATE INDEX ON documents \nUSING hnsw (embedding vector_cosine_ops)\nWITH (m = 16, ef_construction = 64);\n\n-- \uac80\uc0c9 \uc2dc ef_search \ud30c\ub77c\ubbf8\ud130\ub85c \uc815\ud655\ub3c4\/\uc18d\ub3c4 \ud2b8\ub808\uc774\ub4dc\uc624\ud504 \uc870\uc808 \uac00\ub2a5\nSET hnsw.ef_search = 100;  -- \uae30\ubcf8\uac12\uc740 40, \ub192\uc744\uc218\ub85d \uc815\ud655\ud558\uc9c0\ub9cc \ub290\ub9bc\n<\/code><\/pre>\n<p>1\ucc9c\ub9cc \uac74 \uc774\uc0c1\uc774\ub77c\uba74 pgvector\ub9cc\uc73c\ub860 \ubd80\uc871\ud560 \uc218 \uc788\ub2e4. \uadf8 \uaddc\ubaa8\uba74 Qdrant\ub098 Weaviate\uac00 \ub9de\uc744 \uac83 \uac19\uc740\ub370, \ub098\ub294 \uc544\uc9c1 \uadf8 \uaddc\ubaa8\ub97c \uacbd\ud5d8\ud574\ubcf4\uc9c0 \ubabb\ud574\uc11c \ud655\uc2e0\uc740 \uc5c6\ub2e4.<\/p>\n<h2>Hybrid Search\uc640 Reranking\uc774 \uac80\uc0c9\uc744 \ubc14\uafbc \ubc29\uc2dd<\/h2>\n<p>\uc21c\uc218 \ubca1\ud130 \uac80\uc0c9\ub9cc\uc73c\ub85c\ub294 \ud55c\uacc4\uac00 \uc788\ub2e4. \ud2b9\ud788 \uace0\uc720\uba85\uc0ac, \uc81c\ud488 \ucf54\ub4dc, \uc624\ud0c0\uac00 \uc11e\uc778 \ucffc\ub9ac\uc5d0\uc11c \ucde8\uc57d\ud558\ub2e4. &#8220;k8s ingress 502 \uc5d0\ub7ec&#8221;\ucc98\ub7fc \ud0a4\uc6cc\ub4dc\uac00 \uba85\ud655\ud55c \ucffc\ub9ac\ub97c \ubca1\ud130 \uac80\uc0c9\uc73c\ub85c\ub9cc \ucc98\ub9ac\ud558\uba74 \uc624\ud788\ub824 \uc5c9\ub6b1\ud55c \uacb0\uacfc\uac00 \ub098\uc628\ub2e4.<\/p>\n<p>Hybrid search\ub294 BM25(\ud0a4\uc6cc\ub4dc \uac80\uc0c9)\uc640 \ubca1\ud130 \uac80\uc0c9 \uacb0\uacfc\ub97c \ud569\uce58\ub294 \ubc29\uc2dd\uc774\ub2e4. \ub450 \uacb0\uacfc\ub97c \ud569\uce60 \ub54c RRF(Reciprocal Rank Fusion)\ub97c \uc8fc\ub85c \uc4f4\ub2e4.<\/p>\n<pre><code class=\"language-python\">from rank_bm25 import BM25Okapi\nimport numpy as np\n\ndef reciprocal_rank_fusion(rankings: list[list[str]], k: int = 60) -&gt; list[tuple[str, float]]:\n    &quot;&quot;&quot;\n    \uc5ec\ub7ec \ub7ad\ud0b9 \ub9ac\uc2a4\ud2b8\ub97c RRF\ub85c \ud569\uce5c\ub2e4.\n    k=60\uc740 Cormack et al. 2009 \ub17c\ubb38\uc5d0\uc11c \uad8c\uc7a5\ud55c \uac12 \u2014 \uc2e4\ud5d8\ud574\ubcf4\ub2c8 \ub300\ubd80\ubd84 \ucf00\uc774\uc2a4\uc5d0\uc11c \uc798 \ub428.\n    &quot;&quot;&quot;\n    scores: dict[str, float] = {}\n    for ranking in rankings:\n        for rank, doc_id in enumerate(ranking):\n            scores[doc_id] = scores.get(doc_id, 0) + 1 \/ (k + rank + 1)\n\n    return sorted(scores.items(), key=lambda x: x[1], reverse=True)\n\ndef hybrid_search(query: str, top_k: int = 20) -&gt; list[dict]:\n    # \ubca1\ud130 \uac80\uc0c9\n    query_embedding = embed(query)\n    vector_results = vector_db.search(query_embedding, top_k=top_k)\n\n    # BM25 \ud0a4\uc6cc\ub4dc \uac80\uc0c9\n    bm25_results = bm25_index.search(query, top_k=top_k)\n\n    # RRF\ub85c \ud569\uce58\uae30\n    fused = reciprocal_rank_fusion([\n        [r[&quot;id&quot;] for r in vector_results],\n        [r[&quot;id&quot;] for r in bm25_results],\n    ])\n\n    return [fetch_doc(doc_id) for doc_id, _ in fused[:top_k]]\n<\/code><\/pre>\n<p>\uadfc\ub370 hybrid search\ub9cc\uc73c\ub85c \ubd80\uc871\ud560 \ub54c\uac00 \uc788\ub2e4. \uc0c1\uc704 20\uac1c\ub97c \uac00\uc838\uc654\ub294\ub370 \uadf8 \uc911\uc5d0\uc11c \uc2e4\uc81c\ub85c \uad00\ub828 \uc788\ub294 \uac74 3~4\uac1c\uc778 \uacbd\uc6b0 \u2014 \uadf8\ub7fc LLM\uc5d0 20\uac1c \uccad\ud06c\ub97c \ub2e4 \ubcf4\ub0b4\ub294 \uac74 \ub0ad\ube44\ub2e4.<\/p>\n<p>\uc774\ub54c <strong>reranking<\/strong>\uc774 \ub4e4\uc5b4\uc628\ub2e4. Cohere\uc758 rerank API\ub098 <code>cross-encoder\/ms-marco-MiniLM-L-6-v2<\/code> \uac19\uc740 cross-encoder \ubaa8\ub378\uc744 \uc4f0\uba74 \ub41c\ub2e4. Bi-encoder(\uc784\ubca0\ub529 \ubaa8\ub378)\uc640 \ub2ec\ub9ac cross-encoder\ub294 \ucffc\ub9ac-\ubb38\uc11c \uc30d\uc744 \uac19\uc774 \ucc98\ub9ac\ud558\uae30 \ub54c\ubb38\uc5d0 \uad00\ub828\uc131 \ud310\ub2e8\uc774 \ud6e8\uc52c \uc815\ud655\ud558\ub2e4. \ub300\uc2e0 \uc18d\ub3c4\uac00 \ub290\ub824\uc11c \ubcf4\ud1b5 top-20 \u2192 rerank \u2192 top-5 \uc2dd\uc73c\ub85c \ud30c\uc774\ud504\ub77c\uc778\uc744 \uad6c\uc131\ud55c\ub2e4.<\/p>\n<p>\ub0b4 \uacbd\ud5d8\uc0c1 reranker \ud558\ub098 \ucd94\uac00\ud588\uc744 \ub54c \uccb4\uac10 \ud488\uc9c8\uc774 \uac00\uc7a5 \ub9ce\uc774 \uc62c\ub77c\uac14\ub2e4. \uccad\ud0b9\uc744 \uc544\ubb34\ub9ac \uc798 \ud574\ub3c4 \uac80\uc0c9 \ub2e8\uacc4\uc5d0\uc11c \ub178\uc774\uc988\uac00 \ub4e4\uc5b4\uc624\uba74 LLM\uc774 \ud5f7\uac08\ub9b0\ub2e4. Reranker\uac00 \uadf8 \ub178\uc774\uc988\ub97c \ub9ce\uc774 \uac78\ub7ec\uc900\ub2e4.<\/p>\n<h2>\uba54\ud0c0\ub370\uc774\ud130 \ud544\ud130\ub9c1\uc73c\ub85c \uac80\uc0c9 \ubc94\uc704\ub97c \uc881\ud788\ub294 \ubc95<\/h2>\n<p>\uc9c1\uc811 \uacaa\uc5b4\ubcf4\ub2c8 \uac80\uc0c9 \ud488\uc9c8 \ubb38\uc81c\uc758 \uc0c1\ub2f9 \ubd80\ubd84\uc774 \uc0ac\uc2e4\uc740 &#8220;\uad00\ub828 \uc5c6\ub294 \ubb38\uc11c \uc720\ud615\uc5d0\uc11c \uac80\uc0c9\uc774 \uc77c\uc5b4\ub098\ub294 \uac83&#8221;\uc774\uc5c8\ub2e4.<\/p>\n<p>\uc608\ub97c \ub4e4\uc5b4 \uc6b0\ub9ac \ubb38\uc11c \ubca0\uc774\uc2a4\uc5d4 API \ub808\ud37c\ub7f0\uc2a4, \ud29c\ud1a0\ub9ac\uc5bc, \ub9b4\ub9ac\uc988 \ub178\ud2b8, \ub0b4\ubd80 \uc124\uacc4 \ubb38\uc11c\uac00 \uc11e\uc5ec \uc788\uc5c8\ub2e4. \uc0ac\uc6a9\uc790\uac00 &#8220;\uc778\uc99d \ud1a0\ud070 \uac31\uc2e0\ud558\ub294 \ubc95&#8221;\uc744 \ubb3c\uc5b4\ubcfc \ub54c \ub9b4\ub9ac\uc988 \ub178\ud2b8\uc5d0\uc11c &#8220;v2.3.0 \u2014 \uc778\uc99d \ud1a0\ud070 \uac31\uc2e0 \ub85c\uc9c1 \ubcc0\uacbd&#8221; \uac19\uc740 \uac8c \uc0c1\uc704\uc5d0 \uc62c\ub77c\uc624\ub294 \ubb38\uc81c\uac00 \uc788\uc5c8\ub2e4. \uc758\ubbf8\uc801\uc73c\ub85c\ub294 \uad00\ub828 \uc788\uc9c0\ub9cc, \uc2e4\uc81c\ub85c \uc6d0\ud558\ub294 \uac74 \ubc29\ubc95\ub860\uc801\uc778 \uc124\uba85\uc774\uc9c0 \ubc84\uc804 \ubcc0\uacbd \uc774\ub825\uc774 \uc544\ub2c8\ub2e4.<\/p>\n<p>\uc774\uac78 \ud574\uacb0\ud55c \ubc29\ubc95\uc740 \uac04\ub2e8\ud588\ub2e4. \uccad\ud06c \uc0dd\uc131 \uc2dc\uc810\uc5d0 \ubb38\uc11c \uc720\ud615\uc744 \uba54\ud0c0\ub370\uc774\ud130\ub85c \uc800\uc7a5\ud558\uace0, \ucffc\ub9ac \uc758\ub3c4\uc5d0 \ub530\ub77c \ud544\ud130\ub97c \uc801\uc6a9\ud588\ub2e4.<\/p>\n<pre><code class=\"language-python\"># \ucffc\ub9ac \ub77c\uc6b0\ud305 \u2014 \uac04\ub2e8\ud55c \ubd84\ub958\uae30\ub85c \ucda9\ubd84\ud558\ub2e4\ndef classify_query(query: str) -&gt; dict:\n    # GPT-4o-mini\ub85c \ube60\ub974\uac8c \ubd84\ub958 (\ube44\uc6a9 \ucd5c\uc18c\ud654)\n    response = llm.invoke(\n        f&quot;\ub2e4\uc74c \uc9c8\ubb38\uc758 \uc758\ub3c4\ub97c \ubd84\ub958\ud558\uc138\uc694: '{query}'\\n&quot;\n        &quot;\uc635\uc158: how-to, troubleshooting, reference, changelog\\n&quot;\n        &quot;\ub2e8\uc5b4 \ud558\ub098\ub85c\ub9cc \ub2f5\ud558\uc138\uc694.&quot;\n    )\n    intent = response.content.strip().lower()\n\n    filter_map = {\n        &quot;how-to&quot;: {&quot;doc_type&quot;: {&quot;$in&quot;: [&quot;tutorial&quot;, &quot;guide&quot;]}},\n        &quot;troubleshooting&quot;: {&quot;doc_type&quot;: {&quot;$in&quot;: [&quot;guide&quot;, &quot;faq&quot;]}},\n        &quot;reference&quot;: {&quot;doc_type&quot;: &quot;api-reference&quot;},\n        &quot;changelog&quot;: {&quot;doc_type&quot;: &quot;release-notes&quot;},\n    }\n\n    return filter_map.get(intent, {})  # \ubd84\ub958 \uc2e4\ud328 \uc2dc \ud544\ud130 \uc5c6\uc774\n<\/code><\/pre>\n<p>\uc774 \ubc29\uc2dd\uc758 \ub2e8\uc810\uc740 \ucffc\ub9ac \ubd84\ub958\uac00 \ud2c0\ub9ac\uba74 \uac80\uc0c9 \uc790\uccb4\uac00 \ub9dd\ud55c\ub2e4\ub294 \uac83\uc774\ub2e4. \uadf8\ub798\uc11c \ubd84\ub958 \uc2e0\ub8b0\ub3c4\uac00 \ub0ae\uc744 \ub54c\ub294 \ud544\ud130\ub97c \uc544\uc608 \uc548 \uac70\ub294 \ud3f4\ubc31 \ub85c\uc9c1\uc744 \ub123\uc5c8\ub2e4. \uc644\ubcbd\ud55c \ud574\ubc95\uc740 \uc544\ub2c8\uc9c0\ub9cc, \uc801\uc5b4\ub3c4 &#8220;\uc65c \ub9b4\ub9ac\uc988 \ub178\ud2b8\uac00 \ub2f5\ubcc0\uc5d0 \ub098\uc624\uc9c0?&#8221;\ub77c\ub294 \uc2ac\ub799 \uba54\uc2dc\uc9c0\ub294 \ub354 \uc774\uc0c1 \uc548 \ubc1b\ub294\ub2e4.<\/p>\n<h2>\uc2e4\uc81c\ub85c \ub0b4\uac00 \ucd94\ucc9c\ud558\ub294 \uc2a4\ud0dd<\/h2>\n<p>&#8220;\uc0c1\ud669\uc5d0 \ub530\ub77c \ub2e4\ub974\ub2e4&#8221;\ub294 \ub9d0\uc740 \ub108\ubb34 \ubb34\ucc45\uc784\ud558\ub2c8\uae4c, \ub0b4 \uacbd\ud5d8 \uae30\ubc18\uc73c\ub85c \uad6c\uccb4\uc801\uc73c\ub85c \ub9d0\ud558\uaca0\ub2e4.<\/p>\n<p><strong>\ubb38\uc11c \uc218 10\ub9cc \uac74 \uc774\ud558, \ud300 3\uba85 \uc774\ud558<\/strong>: pgvector + \uad6c\uc870 \uae30\ubc18 \uccad\ud0b9 + hybrid search (BM25 + vector) + <code>cross-encoder\/ms-marco-MiniLM-L-6-v2<\/code> reranker. \ucd94\uac00 \uc778\ud504\ub77c \uc5c6\uace0, \uc6b4\uc601 \ube44\uc6a9 \ub0ae\uace0, \uc131\ub2a5\ub3c4 \ucda9\ubd84\ud558\ub2e4. \ub098\ub77c\uba74 Pinecone \uac19\uc740 \uad00\ub9ac\ud615 \uc11c\ube44\uc2a4 \ube44\uc6a9 \ub0b4\ub290\ub2c8 \uc774 \uc870\ud569\uc73c\ub85c \uac04\ub2e4.<\/p>\n<p><strong>\ubb38\uc11c \uc218 100\ub9cc \uac74 \uc774\uc0c1, \ub610\ub294 \ub2e4\uad6d\uc5b4 \uc9c0\uc6d0 \ud544\uc694<\/strong>: Qdrant\ub97c \uc9c4\uc9c0\ud558\uac8c \uace0\ub824\ud55c\ub2e4. Rust\ub85c \ub9cc\ub4e4\uc5b4\uc838\uc11c \uba54\ubaa8\ub9ac \ud6a8\uc728\uc774 \uc88b\uace0, \ud544\ud130\ub9c1 \uae30\ub2a5\uc774 pgvector\ubcf4\ub2e4 \uc720\uc5f0\ud558\ub2e4. \ud55c\uad6d\uc5b4 BM25\ub294 <code>kiwipiepy<\/code> + \ud615\ud0dc\uc18c \ubd84\uc11d\uae30\ub97c \ubd99\uc5ec\uc11c \uc4f0\ub294 \uac8c \uc601\ubb38 BM25\ubcf4\ub2e4 \ud655\uc2e4\ud788 \ub0ab\ub2e4.<\/p>\n<p><strong>\uccad\ud0b9 \uc804\ub7b5<\/strong>: \uad6c\uc870\uac00 \uc788\ub294 \ubb38\uc11c(\ub9c8\ud06c\ub2e4\uc6b4, HTML)\uba74 \uad6c\uc870 \uae30\ubc18. \uad6c\uc870 \uc5c6\ub294 \uae34 \ud14d\uc2a4\ud2b8\uba74 <code>SemanticChunker<\/code>\uac00 \ud488\uc9c8\uc740 \uc88b\uc9c0\ub9cc \ub290\ub9ac\ub2e4\ub294 \uac70 \uac10\uc548\ud574\uc57c \ud55c\ub2e4. \uc778\ub371\uc2f1\uc744 \uc2e4\uc2dc\uac04\uc73c\ub85c \ud574\uc57c \ud558\ub294 \uc0c1\ud669\uc774\uba74 recursive character splitting\uc5d0 \ucda9\ubd84\ud55c overlap(10~15%)\uc744 \uc8fc\ub294 \uac8c \uc2e4\uc6a9\uc801\uc774\ub2e4.<\/p>\n<p>\uac80\uc0c9 \ucd5c\uc801\ud654\ub294 reranker \ud558\ub098\ub9cc \ucd94\uac00\ud574\ub3c4 \uccb4\uac10 \ucc28\uc774\uac00 \uac00\uc7a5 \ud06c\ub2e4. \uccad\ud0b9\uc774\ub098 \ubca1\ud130 DB\ub97c \ub72f\uc5b4\uace0\uce58\uae30 \uc804\uc5d0 reranker\ubd80\ud130 \ubd99\uc5ec\ubcf4\ub294 \uac78 \uad8c\ud55c\ub2e4. \ub0b4 \uacbd\uc6b0\uc5d4 reranker \ucd94\uac00 \ud6c4 &#8220;\uc774 \ub2f5\ubcc0\uc774 \ub9de\ub098\uc694?&#8221; \uc0ac\uc6a9\uc790 \ud53c\ub4dc\ubc31\uc774 \ub208\uc5d0 \ub744\uac8c \uc904\uc5c8\ub2e4.<\/p>\n<p>\ud3c9\uac00 \uc5c6\uc774\ub294 \ubc29\ud5a5\uc744 \ubaa8\ub978\ub2e4. <a href=\"https:\/\/github.com\/explodinggradients\/ragas\">RAGAS<\/a> \uac19\uc740 \ud504\ub808\uc784\uc6cc\ud06c\ub85c faithfulness, answer relevancy, context recall\uc744 \uc8fc\uae30\uc801\uc73c\ub85c \uce21\uc815\ud558\ub294 \uac8c \uc9c4\uc9dc \uc911\uc694\ud558\ub2e4. \ucc98\uc74c \ud55c \ub2ec\uc744 \uac10\uc73c\ub85c \ud29c\ub2dd\ud588\ub294\ub370, \uc9c0\ud45c \ub123\uace0 \ub098\uc11c\uc57c \ub0b4\uac00 \ucc29\uac01\ud588\ub358 \ubd80\ubd84\ub4e4\uc774 \ubcf4\uc600\ub2e4 \u2014 \ud488\uc9c8\uc774 \uc88b\uc544\uc84c\ub2e4\uace0 \ud655\uc2e0\ud588\ub358 \uac83\ub4e4\uc774 \uc2e4\uc81c\ub860 \ubcc4 \ucc28\uc774\uac00 \uc5c6\uc5c8\uace0, \uc9c4\uc9dc \ud6a8\uacfc \uc788\uc5c8\ub358 \uac74 \uc608\uc0c1 \ubc16\uc758 \uac83\ub4e4\uc774\uc5c8\ub2e4.<\/p>\n<p><!-- Reviewed: 2026-03-07 | Status: ready_to_publish | Changes: meta_description expanded to ~150 chars; \"\uc2dc\ub3c4 1\/2\/3\" parallel list converted to narrative flow; pgvector HNSW version corrected 0.7.0\u21920.5.0; strip_headers tip made more personal; metadata section closing line added for voice; generic \"\ub9c8\uc9c0\ub9c9\uc73c\ub85c\" opener removed from conclusion --><\/p>\n","protected":false},"excerpt":{"rendered":"<p>\uc0ac\ub0b4 RAG \uc2dc\uc2a4\ud15c\uc774 GPT-4\ub97c \uc368\ub3c4 \uc5c9\ud130\ub9ac \ub2f5\ubcc0\uc744 \ub0b4\ub1a8\ub358 \uc774\uc720\ub294 \uac80\uc0c9 \uc790\uccb4\uac00 \ubb38\uc81c\uc600\ub2e4. \uccad\ud0b9 \uc804\ub7b5, pgvector vs Pinecone vs Weaviate \uc120\ud0dd, hybrid search, reranking\uae4c\uc9c0 \uc2e4\uc81c 2\uc8fc \uc0bd\uc9c8\uc5d0\uc11c \ubc30\uc6b4 \uac83\ub4e4\uc744 \uc194\uc9c1\ud558\uac8c \uacf5\uc720\ud569\ub2c8\ub2e4.<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"site-sidebar-layout":"default","site-content-layout":"","ast-site-content-layout":"default","site-content-style":"default","site-sidebar-style":"default","ast-global-header-display":"","ast-banner-title-visibility":"","ast-main-header-display":"","ast-hfb-above-header-display":"","ast-hfb-below-header-display":"","ast-hfb-mobile-header-display":"","site-post-title":"","ast-breadcrumbs-content":"","ast-featured-img":"","footer-sml-layout":"","ast-disable-related-posts":"","theme-transparent-header-meta":"","adv-header-id-meta":"","stick-header-meta":"","header-above-stick-meta":"","header-main-stick-meta":"","header-below-stick-meta":"","astra-migrate-meta-layouts":"default","ast-page-background-enabled":"default","ast-page-background-meta":{"desktop":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"ast-content-background-meta":{"desktop":{"background-color":"var(--ast-global-color-4)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"var(--ast-global-color-4)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"var(--ast-global-color-4)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"footnotes":""},"categories":[1],"tags":[],"class_list":["post-243","post","type-post","status-publish","format-standard","hentry","category-general"],"_links":{"self":[{"href":"https:\/\/blog.rebalai.com\/ko\/wp-json\/wp\/v2\/posts\/243","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/blog.rebalai.com\/ko\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/blog.rebalai.com\/ko\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/blog.rebalai.com\/ko\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/blog.rebalai.com\/ko\/wp-json\/wp\/v2\/comments?post=243"}],"version-history":[{"count":1,"href":"https:\/\/blog.rebalai.com\/ko\/wp-json\/wp\/v2\/posts\/243\/revisions"}],"predecessor-version":[{"id":279,"href":"https:\/\/blog.rebalai.com\/ko\/wp-json\/wp\/v2\/posts\/243\/revisions\/279"}],"wp:attachment":[{"href":"https:\/\/blog.rebalai.com\/ko\/wp-json\/wp\/v2\/media?parent=243"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/blog.rebalai.com\/ko\/wp-json\/wp\/v2\/categories?post=243"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/blog.rebalai.com\/ko\/wp-json\/wp\/v2\/tags?post=243"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}