{"id":4,"date":"2026-03-04T05:49:50","date_gmt":"2026-03-04T05:49:50","guid":{"rendered":"https:\/\/blog.rebalai.com\/ja\/2026\/03\/04\/rag-vector-database-production\/"},"modified":"2026-03-09T03:33:02","modified_gmt":"2026-03-09T03:33:02","slug":"rag-vector-database-production","status":"publish","type":"post","link":"https:\/\/blog.rebalai.com\/ja\/2026\/03\/04\/rag-vector-database-production\/","title":{"rendered":"\u30d9\u30af\u30bf\u30fc\u30c7\u30fc\u30bf\u30d9\u30fc\u30b9\u3092\u4f7f\u7528\u3057\u305f\u672c\u756a\u74b0\u5883\u5bfe\u5fdc\u306eRAG\u30a2\u30d7\u30ea\u30b1\u30fc\u30b7\u30e7\u30f3\u69cb\u7bc9"},"content":{"rendered":"<p><script type=\"application\/ld+json\">\n{\n  \"@context\": \"https:\/\/schema.org\",\n  \"@type\": \"BlogPosting\",\n  \"headline\": \"\u30d9\u30af\u30bf\u30fc\u30c7\u30fc\u30bf\u30d9\u30fc\u30b9\u3092\u4f7f\u7528\u3057\u305f<a href=\"https:\/\/m.do.co\/c\/06956e5e2802\" title=\"DigitalOcean \u672c\u756a\u74b0\u5883\u30af\u30e9\u30a6\u30c9\" rel=\"nofollow sponsored\" target=\"_blank\">\u672c\u756a\u74b0\u5883<\/a>\u5bfe\u5fdc\u306eRAG\u30a2\u30d7\u30ea\u30b1\u30fc\u30b7\u30e7\u30f3\u69cb\u7bc9\",\n  \"description\": \"\u30d9\u30af\u30bf\u30fc\u30c7\u30fc\u30bf\u30d9\u30fc\u30b9\u3092\u4f7f\u7528\u3057\u305f<a href=\"https:\/\/m.do.co\/c\/06956e5e2802\" title=\"DigitalOcean \u672c\u756a\u74b0\u5883\u30af\u30e9\u30a6\u30c9\" rel=\"nofollow sponsored\" target=\"_blank\">\u672c\u756a\u74b0\u5883<\/a>\u5bfe\u5fdc\u306eRAG\u30a2\u30d7\u30ea\u30b1\u30fc\u30b7\u30e7\u30f3\u69cb\u7bc9 \u6b63\u76f4\u306a\u3068\u3053\u308d\u3001RAG\u3092\u521d\u3081\u3066\u5b9f\u88c5\u3057\u305f\u3068\u304d\u3001\u300c\u30d9\u30af\u30bf\u30fc\u691c\u7d22\u3055\u3048\u52d5\u3051\u3070\u307b\u307c\u5b8c\u6210\u3060\u300d\u3068\u7518\u304f\u898b\u3066\u3044\u305f\u3002\u5b9f\u969b\u306b\u306f\u3001\u30c1\u30e3\u30f3\u30af\u6226\u7565\u306e\u30df\u30b9\u3067\u691c\u7d22\u7cbe\u5ea6\u304c\u5d29\u58ca\u3057\u3001\u30ea\u30c8\u30e9\u30a4\u30ed\u30b8\u30c3\u30af\u306e\u6b20\u5982\u3067\u672c\u756a\u969c\u5bb3\u3092\u8d77\u3053\u3057\u305f\u3002\u3053\u306e\u8a18\u4e8b\u306f\u305d\u306e\u53cd\u7701\u3082\u8fbc\u3081\u3066\u66f8\u3044\u3066\u3044\u308b\u3002 LLM\u306b\u306f\u300c\",\n  \"url\": \"https:\/\/blog.rebalai.com\/ja\/2026\/03\/04\/rag-vector-database-<a href=\"https:\/\/m.do.co\/c\/06956e5e2802\" title=\"DigitalOcean for Production Workloads\" rel=\"nofollow sponsored\" target=\"_blank\">production<\/a>\/\",\n  \"datePublished\": \"2026-03-04T05:49:50\",\n  \"dateModified\": \"2026-03-05T17:39:39\",\n  \"inLanguage\": \"ja_JP\",\n  \"author\": {\n    \"@type\": \"Organization\",\n    \"name\": \"RebalAI\",\n    \"url\": \"https:\/\/blog.rebalai.com\/ja\/\"\n  },\n  \"publisher\": {\n    \"@type\": \"Organization\",\n    \"name\": \"RebalAI\",\n    \"logo\": {\n      \"@type\": \"ImageObject\",\n      \"url\": \"https:\/\/blog.rebalai.com\/wp-content\/uploads\/logo.png\"\n    }\n  },\n  \"mainEntityOfPage\": {\n    \"@type\": \"WebPage\",\n    \"@id\": \"https:\/\/blog.rebalai.com\/ja\/2026\/03\/04\/rag-vector-database-<a href=\"https:\/\/m.do.co\/c\/06956e5e2802\" title=\"DigitalOcean for Production Workloads\" rel=\"nofollow sponsored\" target=\"_blank\">production<\/a>\/\"\n  }\n}\n<\/script><\/p>\n<h1>\u30d9\u30af\u30bf\u30fc\u30c7\u30fc\u30bf\u30d9\u30fc\u30b9\u3092\u4f7f\u7528\u3057\u305f\u672c\u756a\u74b0\u5883\u5bfe\u5fdc\u306eRAG\u30a2\u30d7\u30ea\u30b1\u30fc\u30b7\u30e7\u30f3\u69cb\u7bc9<\/h1>\n<p>\u6b63\u76f4\u306a\u3068\u3053\u308d\u3001RAG\u3092\u521d\u3081\u3066\u5b9f\u88c5\u3057\u305f\u3068\u304d\u3001\u300c\u30d9\u30af\u30bf\u30fc\u691c\u7d22\u3055\u3048\u52d5\u3051\u3070\u307b\u307c\u5b8c\u6210\u3060\u300d\u3068\u7518\u304f\u898b\u3066\u3044\u305f\u3002\u5b9f\u969b\u306b\u306f\u3001\u30c1\u30e3\u30f3\u30af\u6226\u7565\u306e\u30df\u30b9\u3067\u691c\u7d22\u7cbe\u5ea6\u304c\u5d29\u58ca\u3057\u3001\u30ea\u30c8\u30e9\u30a4\u30ed\u30b8\u30c3\u30af\u306e\u6b20\u5982\u3067\u672c\u756a\u969c\u5bb3\u3092\u8d77\u3053\u3057\u305f\u3002\u3053\u306e\u8a18\u4e8b\u306f\u305d\u306e\u53cd\u7701\u3082\u8fbc\u3081\u3066\u66f8\u3044\u3066\u3044\u308b\u3002<\/p>\n<p>LLM\u306b\u306f\u300c\u77e5\u8b58\u306e\u30ab\u30c3\u30c8\u30aa\u30d5\u300d\u3068\u300c\u30cf\u30eb\u30b7\u30cd\u30fc\u30b7\u30e7\u30f3\u300d\u3068\u3044\u3046\u6839\u672c\u7684\u306a\u8ab2\u984c\u304c\u3042\u308b\u3002\u3053\u308c\u3092\u89e3\u6c7a\u3059\u308b\u73fe\u5b9f\u7684\u306a\u30a2\u30d7\u30ed\u30fc\u30c1\u304c<strong>\u691c\u7d22\u62e1\u5f35\u751f\u6210\uff08RAG\uff1aRetrieval-Augmented Generation\uff09<\/strong>\u3060\u3002<strong>\u30d9\u30af\u30bf\u30fc\u30c7\u30fc\u30bf\u30d9\u30fc\u30b9<\/strong>\u3092\u6838\u3068\u3057\u305fRAG\u30b7\u30b9\u30c6\u30e0\u3092\u3001\u5b9f\u969b\u306e<a href=\"https:\/\/m.do.co\/c\/06956e5e2802\" title=\"DigitalOcean \u672c\u756a\u74b0\u5883\u30af\u30e9\u30a6\u30c9\" rel=\"nofollow sponsored\" target=\"_blank\">\u672c\u756a\u74b0\u5883<\/a>\u3067\u904b\u7528\u3067\u304d\u308b\u30ec\u30d9\u30eb\u307e\u3067\u6301\u3063\u3066\u3044\u304f\u65b9\u6cd5\u3092\u3001\u5b9f\u88c5\u30b3\u30fc\u30c9\u3092\u4ea4\u3048\u3066\u8aac\u660e\u3059\u308b\u3002<\/p>\n<hr \/>\n<h2>RAG\u3068\u306f\u4f55\u304b<\/h2>\n<p><strong>\u691c\u7d22\u62e1\u5f35\u751f\u6210\uff08RAG\uff09<\/strong>\u306f\u3001LLM\u304c\u56de\u7b54\u3092\u751f\u6210\u3059\u308b\u524d\u306b\u3001\u5916\u90e8\u306e\u30ca\u30ec\u30c3\u30b8\u30d9\u30fc\u30b9\u304b\u3089\u95a2\u9023\u60c5\u5831\u3092\u691c\u7d22\u30fb\u53d6\u5f97\u3057\u3001\u305d\u306e\u60c5\u5831\u3092\u30d7\u30ed\u30f3\u30d7\u30c8\u306b\u7d44\u307f\u8fbc\u3080\u6280\u8853\u3060\u3002<\/p>\n<p>\u901a\u5e38\u306eLLM\u306f\u5b66\u7fd2\u30c7\u30fc\u30bf\u3060\u3051\u3067\u56de\u7b54\u3092\u751f\u6210\u3059\u308b\u305f\u3081\u3001\u3053\u3046\u3044\u3063\u305f\u554f\u984c\u304c\u8d77\u304d\u308b\u3002<\/p>\n<ul>\n<li><strong>\u60c5\u5831\u306e\u9673\u8150\u5316<\/strong>\uff1a\u30c8\u30ec\u30fc\u30cb\u30f3\u30b0\u30c7\u30fc\u30bf\u306e\u30ab\u30c3\u30c8\u30aa\u30d5\u4ee5\u964d\u306e\u60c5\u5831\u3092\u77e5\u3089\u306a\u3044<\/li>\n<li><strong>\u30cf\u30eb\u30b7\u30cd\u30fc\u30b7\u30e7\u30f3<\/strong>\uff1a\u5b58\u5728\u3057\u306a\u3044\u60c5\u5831\u3092\u81ea\u4fe1\u6e80\u3005\u306b\u751f\u6210\u3057\u3066\u3057\u307e\u3046<\/li>\n<li><strong>\u793e\u5185\u60c5\u5831\u3078\u306e\u975e\u5bfe\u5fdc<\/strong>\uff1a\u30d7\u30ed\u30d7\u30e9\u30a4\u30a8\u30bf\u30ea\u306a\u30c7\u30fc\u30bf\u3084\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u3092\u6271\u3048\u306a\u3044<\/li>\n<\/ul>\n<p>RAG\u306f\u3053\u308c\u3089\u3092\u6839\u672c\u304b\u3089\u89e3\u6c7a\u3059\u308b\u3002\u30e6\u30fc\u30b6\u30fc\u306e\u8cea\u554f\u306b\u5bfe\u3057\u3066\u3001\u307e\u305a\u95a2\u9023\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u3092\u30ea\u30a2\u30eb\u30bf\u30a4\u30e0\u3067\u691c\u7d22\u3057\u3001\u305d\u306e\u6587\u66f8\u3092\u30b3\u30f3\u30c6\u30ad\u30b9\u30c8\u3068\u3057\u3066LLM\u306b\u6e21\u3059\u3053\u3068\u3067\u3001\u6b63\u78ba\u3067\u6700\u65b0\u306e\u56de\u7b54\u3092\u751f\u6210\u3067\u304d\u308b\u3002<\/p>\n<h3>RAG\u30d1\u30a4\u30d7\u30e9\u30a4\u30f3\u306e\u57fa\u672c\u69cb\u9020<\/h3>\n<p>RAG\u30b7\u30b9\u30c6\u30e0\u306f2\u3064\u306e\u30d5\u30a7\u30fc\u30ba\u3067\u69cb\u6210\u3055\u308c\u308b\u3002<\/p>\n<p><strong>\u30a4\u30f3\u30c7\u30c3\u30af\u30b9\u4f5c\u6210\u30d5\u30a7\u30fc\u30ba\uff08\u30aa\u30d5\u30e9\u30a4\u30f3\u51e6\u7406\uff09<\/strong><br \/>\n1. \u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u306e\u53ce\u96c6\u3068\u524d\u51e6\u7406<br \/>\n2. \u30c6\u30ad\u30b9\u30c8\u306e\u30c1\u30e3\u30f3\u30af\u5206\u5272<br \/>\n3. \u57cb\u3081\u8fbc\u307f\u30e2\u30c7\u30eb\u306b\u3088\u308b\u30d9\u30af\u30c8\u30eb\u5316<br \/>\n4. \u30d9\u30af\u30bf\u30fc\u30c7\u30fc\u30bf\u30d9\u30fc\u30b9\u3078\u306e\u683c\u7d0d<\/p>\n<p><strong>\u30af\u30a8\u30ea\u51e6\u7406\u30d5\u30a7\u30fc\u30ba\uff08\u30aa\u30f3\u30e9\u30a4\u30f3\u51e6\u7406\uff09<\/strong><br \/>\n1. \u30e6\u30fc\u30b6\u30fc\u306e\u8cea\u554f\u3092\u30d9\u30af\u30c8\u30eb\u5316<br \/>\n2. \u30d9\u30af\u30bf\u30fc\u30c7\u30fc\u30bf\u30d9\u30fc\u30b9\u3067\u985e\u4f3c\u691c\u7d22\u3092\u5b9f\u884c<br \/>\n3. \u95a2\u9023\u30c1\u30e3\u30f3\u30af\u3092\u30d7\u30ed\u30f3\u30d7\u30c8\u306b\u7d44\u307f\u8fbc\u3080<br \/>\n4. LLM\u304c\u6587\u8108\u3092\u8e0f\u307e\u3048\u305f\u56de\u7b54\u3092\u751f\u6210<\/p>\n<hr \/>\n<h2>\u30d9\u30af\u30bf\u30fc\u30c7\u30fc\u30bf\u30d9\u30fc\u30b9\u306f\u3069\u308c\u3092\u9078\u3076\u304b\uff1aPinecone\u3068Weaviate\u306e\u6bd4\u8f03<\/h2>\n<p>RAG\u30b7\u30b9\u30c6\u30e0\u306e\u6027\u80fd\u306f\u30d9\u30af\u30bf\u30fc\u30c7\u30fc\u30bf\u30d9\u30fc\u30b9\u306e\u9078\u629e\u306b\u5927\u304d\u304f\u4f9d\u5b58\u3059\u308b\u3002\u79c1\u306f\u3053\u308c\u307e\u3067\u3044\u304f\u3064\u304b\u306e\u30d7\u30ed\u30b8\u30a7\u30af\u30c8\u3067\u4e21\u65b9\u3092\u4f7f\u3063\u3066\u304d\u305f\u304c\u3001\u300c\u3069\u3061\u3089\u304c\u512a\u308c\u3066\u3044\u308b\u304b\u300d\u3088\u308a\u300c\u3069\u3061\u3089\u304c\u81ea\u5206\u305f\u3061\u306e\u30b3\u30f3\u30c6\u30ad\u30b9\u30c8\u306b\u5408\u3063\u3066\u3044\u308b\u304b\u300d\u3067\u5224\u65ad\u3059\u3079\u304d\u3060\u3068\u601d\u3063\u3066\u3044\u308b\u3002<\/p>\n<h3>Pinecone<\/h3>\n<p><strong>Pinecone<\/strong>\u306f\u30d5\u30eb\u30de\u30cd\u30fc\u30b8\u30c9\u306e\u30d9\u30af\u30bf\u30fc\u30c7\u30fc\u30bf\u30d9\u30fc\u30b9\u30b5\u30fc\u30d3\u30b9\u3060\u3002\u30a4\u30f3\u30d5\u30e9\u7ba1\u7406\u304c\u4e0d\u8981\u3067\u3001API\u3092\u53e9\u3051\u3070\u3059\u3050\u4f7f\u3048\u308b\u3002<\/p>\n<p><strong>\u4e3b\u306a\u7279\u5fb4\uff1a<\/strong><br \/>\n&#8211; <a href=\"https:\/\/m.do.co\/c\/06956e5e2802\" title=\"DigitalOcean <a href=\"https:\/\/m.do.co\/c\/06956e5e2802\" title=\"DigitalOcean <a href=\"https:\/\/m.do.co\/c\/06956e5e2802\" title=\"DigitalOcean \u30af\u30e9\u30a6\u30c9\u30db\u30b9\u30c6\u30a3\u30f3\u30b0\" rel=\"nofollow sponsored\" target=\"_blank\">\u30af\u30e9\u30a6\u30c9<\/a>\u30db\u30b9\u30c6\u30a3\u30f3\u30b0&#8221; rel=&#8221;nofollow sponsored&#8221; target=&#8221;_blank&#8221;>\u30af\u30e9\u30a6\u30c9<\/a>\u30b5\u30fc\u30d0\u30fc&#8221; rel=&#8221;nofollow sponsored&#8221; target=&#8221;_blank&#8221;>\u30b5\u30fc\u30d0\u30fc<\/a>\u30ec\u30b9\u30a2\u30fc\u30ad\u30c6\u30af\u30c1\u30e3\uff08\u30b9\u30bf\u30fc\u30bf\u30fc\u30d7\u30e9\u30f3\u306f\u7121\u6599\u67a0\u3042\u308a\uff09<br \/>\n&#8211; \u81ea\u52d5\u30b9\u30b1\u30fc\u30ea\u30f3\u30b0\u3068\u30ec\u30d7\u30ea\u30b1\u30fc\u30b7\u30e7\u30f3<br \/>\n&#8211; \u30cd\u30fc\u30e0\u30b9\u30da\u30fc\u30b9\u306b\u3088\u308b\u30c7\u30fc\u30bf\u5206\u96e2<br \/>\n&#8211; \u30cf\u30a4\u30d6\u30ea\u30c3\u30c9\u691c\u7d22\uff08\u5bc6\u30d9\u30af\u30c8\u30eb\uff0b\u30b9\u30d1\u30fc\u30b9\u30d9\u30af\u30c8\u30eb\uff09\u306e\u30b5\u30dd\u30fc\u30c8<br \/>\n&#8211; REST API\u304a\u3088\u3073Python\/Node.js SDK\u306e\u5b8c\u5099<\/p>\n<p><strong>\u5411\u3044\u3066\u3044\u308b\u30b1\u30fc\u30b9\uff1a<\/strong> \u958b\u767a\u30b9\u30d4\u30fc\u30c9\u3092\u512a\u5148\u3057\u305f\u3044\u5834\u5408\u3001\u30a4\u30f3\u30d5\u30e9\u7ba1\u7406\u306e\u8ca0\u8377\u3092\u6700\u5c0f\u5316\u3057\u305f\u3044\u7d44\u7e54<\/p>\n<h3>Weaviate<\/h3>\n<p><strong>Weaviate<\/strong>\u306f\u30aa\u30fc\u30d7\u30f3\u30bd\u30fc\u30b9\u306e\u30d9\u30af\u30bf\u30fc\u30c7\u30fc\u30bf\u30d9\u30fc\u30b9\u3067\u3001\u30bb\u30eb\u30d5\u30db\u30b9\u30c8\u3068<a href=\"https:\/\/m.do.co\/c\/06956e5e2802\" title=\"DigitalOcean \u30af\u30e9\u30a6\u30c9\u30db\u30b9\u30c6\u30a3\u30f3\u30b0\" rel=\"nofollow sponsored\" target=\"_blank\">\u30af\u30e9\u30a6\u30c9<\/a>\u7248\u306e\u4e21\u65b9\u3092\u9078\u3079\u308b\u3002<\/p>\n<p><strong>\u4e3b\u306a\u7279\u5fb4\uff1a<\/strong><br \/>\n&#8211; GraphQL\u30d9\u30fc\u30b9\u306e\u30af\u30a8\u30ea\u30a4\u30f3\u30bf\u30fc\u30d5\u30a7\u30fc\u30b9<br \/>\n&#8211; \u30e2\u30b8\u30e5\u30fc\u30eb\u578b\u30a2\u30fc\u30ad\u30c6\u30af\u30c1\u30e3\uff08\u57cb\u3081\u8fbc\u307f\u30e2\u30c7\u30eb\u3092\u5185\u8535\u53ef\u80fd\uff09<br \/>\n&#8211; \u30de\u30eb\u30c1\u30c6\u30ca\u30f3\u30b7\u30fc\u306e\u30cd\u30a4\u30c6\u30a3\u30d6\u30b5\u30dd\u30fc\u30c8<br \/>\n&#8211; BM25\u3068\u30d9\u30af\u30c8\u30eb\u691c\u7d22\u3092\u7d44\u307f\u5408\u308f\u305b\u305f\u30cf\u30a4\u30d6\u30ea\u30c3\u30c9\u691c\u7d22<br \/>\n&#8211; \u30c7\u30fc\u30bf\u306e\u5b8c\u5168\u306a\u5236\u5fa1\u3068\u30ab\u30b9\u30bf\u30de\u30a4\u30ba\u6027<\/p>\n<p><strong>\u5411\u3044\u3066\u3044\u308b\u30b1\u30fc\u30b9\uff1a<\/strong> \u30c7\u30fc\u30bf\u4e3b\u6a29\u304c\u91cd\u8981\u306a\u5834\u5408\u3001\u30ab\u30b9\u30bf\u30de\u30a4\u30ba\u6027\u3092\u91cd\u8996\u3059\u308b\u958b\u767a\u30c1\u30fc\u30e0<\/p>\n<h3>\u6bd4\u8f03\u30b5\u30de\u30ea\u30fc<\/h3>\n<table>\n<thead>\n<tr>\n<th>\u9805\u76ee<\/th>\n<th>Pinecone<\/th>\n<th>Weaviate<\/th>\n<\/tr>\n<\/thead>\n<tbody>\n<tr>\n<td>\u30db\u30b9\u30c6\u30a3\u30f3\u30b0<\/td>\n<td>\u30d5\u30eb\u30de\u30cd\u30fc\u30b8\u30c9<\/td>\n<td>\u30bb\u30eb\u30d5\uff0f\u30af\u30e9\u30a6\u30c9<\/td>\n<\/tr>\n<tr>\n<td>\u30bb\u30c3\u30c8\u30a2\u30c3\u30d7\u96e3\u6613\u5ea6<\/td>\n<td>\u4f4e\u3044<\/td>\n<td>\u4e2d\u7a0b\u5ea6<\/td>\n<\/tr>\n<tr>\n<td>\u30b9\u30b1\u30fc\u30e9\u30d3\u30ea\u30c6\u30a3<\/td>\n<td>\u81ea\u52d5<\/td>\n<td>\u624b\u52d5\u8a2d\u5b9a\u53ef<\/td>\n<\/tr>\n<tr>\n<td>\u30b3\u30b9\u30c8<\/td>\n<td>\u5f93\u91cf\u8ab2\u91d1<\/td>\n<td>\u30a4\u30f3\u30d5\u30e9\u30b3\u30b9\u30c8<\/td>\n<\/tr>\n<tr>\n<td>\u30ab\u30b9\u30bf\u30de\u30a4\u30ba\u6027<\/td>\n<td>\u9650\u5b9a\u7684<\/td>\n<td>\u9ad8\u3044<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<hr \/>\n<h2>\u5b9f\u88c5\uff1aRAG\u30b7\u30b9\u30c6\u30e0\u3092\u30bc\u30ed\u304b\u3089\u4f5c\u308b<\/h2>\n<p>Pinecone\u3068OpenAI\u3092\u4f7f\u3063\u305f\u57fa\u672c\u7684\u306a\u5b9f\u88c5\u304b\u3089\u59cb\u3081\u3001<a href=\"https:\/\/m.do.co\/c\/06956e5e2802\" title=\"DigitalOcean \u672c\u756a\u74b0\u5883\u30af\u30e9\u30a6\u30c9\" rel=\"nofollow sponsored\" target=\"_blank\">\u672c\u756a\u74b0\u5883<\/a>\u3092\u610f\u8b58\u3057\u305f\u8a2d\u8a08\u3078\u3068\u767a\u5c55\u3055\u305b\u308b\u3002<\/p>\n<h3>\u74b0\u5883\u306e\u30bb\u30c3\u30c8\u30a2\u30c3\u30d7<\/h3>\n<p>\u307e\u305a\u5fc5\u8981\u306a\u30e9\u30a4\u30d6\u30e9\u30ea\u3092\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3059\u308b\u3002<\/p>\n<pre><code class=\"language-bash\">pip install pinecone-client openai langchain tiktoken python-dotenv redis\n<\/code><\/pre>\n<p>\u74b0\u5883\u5909\u6570\u306e\u8a2d\u5b9a\uff1a<\/p>\n<pre><code class=\"language-bash\"># .env \u30d5\u30a1\u30a4\u30eb\nOPENAI_API_KEY=sk-your-openai-api-key\nPINECONE_API_KEY=your-pinecone-api-key\nPINECONE_ENVIRONMENT=us-east-1\nPINECONE_INDEX_NAME=rag-<a href=\"https:\/\/m.do.co\/c\/06956e5e2802\" title=\"DigitalOcean for Production Workloads\" rel=\"nofollow sponsored\" target=\"_blank\">production<\/a>\n<\/code><\/pre>\n<h3>\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u306e\u57cb\u3081\u8fbc\u307f\u3068\u30a4\u30f3\u30c7\u30c3\u30af\u30b9\u767b\u9332<\/h3>\n<pre><code class=\"language-python\">import os\nimport hashlib\nfrom dotenv import load_dotenv\nfrom pinecone import Pinecone, ServerlessSpec\nfrom openai import OpenAI\nfrom langchain.text_splitter import RecursiveCharacterTextSplitter\n\nload_dotenv()\n\n# \u30af\u30e9\u30a4\u30a2\u30f3\u30c8\u306e\u521d\u671f\u5316\nopenai_client = OpenAI(api_key=os.getenv(\"OPENAI_API_KEY\"))\npc = Pinecone(api_key=os.getenv(\"PINECONE_API_KEY\"))\n\n# \u30a4\u30f3\u30c7\u30c3\u30af\u30b9\u306e\u4f5c\u6210\uff08\u5b58\u5728\u3057\u306a\u3044\u5834\u5408\uff09\nindex_name = os.getenv(\"PINECONE_INDEX_NAME\")\nif index_name not in pc.list_indexes().names():\n    pc.create_index(\n        name=index_name,\n        dimension=1536,  # text-embedding-3-small \u306e\u6b21\u5143\u6570\n        metric=\"cosine\",\n        spec=ServerlessSpec(\n            cloud=\"aws\",\n            region=os.getenv(\"PINECONE_ENVIRONMENT\")\n        )\n    )\n\nindex = pc.Index(index_name)\n\ndef get_embedding(text: str) -> list[float]:\n    \"\"\"\u30c6\u30ad\u30b9\u30c8\u3092\u30d9\u30af\u30c8\u30eb\u306b\u5909\u63db\u3059\u308b\"\"\"\n    response = openai_client.embeddings.create(\n        model=\"text-embedding-3-small\",\n        input=text\n    )\n    return response.data[0].embedding\n\ndef chunk_and_index_documents(documents: list[dict]) -> None:\n    \"\"\"\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u3092\u30c1\u30e3\u30f3\u30af\u5206\u5272\u3057\u3066\u30d9\u30af\u30bf\u30fcDB\u306b\u767b\u9332\u3059\u308b\"\"\"\n    # \u65e5\u672c\u8a9e\u30c6\u30ad\u30b9\u30c8\u306b\u9069\u3057\u305f\u30bb\u30d1\u30ec\u30fc\u30bf\u3092\u6307\u5b9a\n    splitter = RecursiveCharacterTextSplitter(\n        chunk_size=512,\n        chunk_overlap=64,\n        separators=[\"\\n\\n\", \"\\n\", \"\u3002\", \"\u3001\", \" \", \"\"]\n    )\n\n    vectors = []\n    for doc in documents:\n        chunks = splitter.split_text(doc[\"content\"])\n\n        for i, chunk in enumerate(chunks):\n            chunk_id = hashlib.md5(f\"{doc['id']}_{i}\".encode()).hexdigest()\n            embedding = get_embedding(chunk)\n\n            vectors.append({\n                \"id\": chunk_id,\n                \"values\": embedding,\n                \"metadata\": {\n                    \"text\": chunk,\n                    \"source\": doc[\"source\"],\n                    \"doc_id\": doc[\"id\"],\n                    \"chunk_index\": i\n                }\n            })\n\n        # \u30d0\u30c3\u30c1\u30b5\u30a4\u30ba100\u3067\u30a2\u30c3\u30d7\u30b5\u30fc\u30c8\uff08API\u30b3\u30b9\u30c8\u524a\u6e1b\uff09\n        if len(vectors) >= 100:\n            index.upsert(vectors=vectors)\n            vectors = []\n\n    if vectors:\n        index.upsert(vectors=vectors)\n\n    stats = index.describe_index_stats()\n    print(f\"\u30a4\u30f3\u30c7\u30c3\u30af\u30b9\u767b\u9332\u5b8c\u4e86: \u7dcf\u30d9\u30af\u30c8\u30eb\u6570 = {stats['total_vector_count']}\")\n<\/code><\/pre>\n<h3>\u691c\u7d22\u3068\u751f\u6210\u306e\u7d71\u5408<\/h3>\n<pre><code class=\"language-python\">def retrieve_relevant_chunks(\n    query: str,\n    top_k: int = 5,\n    score_threshold: float = 0.70\n) -> list[dict]:\n    \"\"\"\u30af\u30a8\u30ea\u306b\u95a2\u9023\u3059\u308b\u30c1\u30e3\u30f3\u30af\u3092\u30d9\u30af\u30bf\u30fcDB\u304b\u3089\u691c\u7d22\u3059\u308b\"\"\"\n    query_embedding = get_embedding(query)\n\n    results = index.query(\n        vector=query_embedding,\n        top_k=top_k,\n        include_metadata=True\n    )\n\n    # \u30b3\u30b5\u30a4\u30f3\u985e\u4f3c\u5ea6\u3067\u30d5\u30a3\u30eb\u30bf\u30ea\u30f3\u30b0\n    return [\n        {\n            \"text\": match[\"metadata\"][\"text\"],\n            \"source\": match[\"metadata\"][\"source\"],\n            \"score\": match[\"score\"]\n        }\n        for match in results[\"matches\"]\n        if match[\"score\"] >= score_threshold\n    ]\n\ndef generate_rag_response(query: str) -> dict:\n    \"\"\"\u691c\u7d22\u62e1\u5f35\u751f\u6210\u3092\u4f7f\u3063\u3066\u8cea\u554f\u306b\u56de\u7b54\u3059\u308b\"\"\"\n    chunks = retrieve_relevant_chunks(query)\n\n    if not chunks:\n        return {\n            \"answer\": \"\u95a2\u9023\u3059\u308b\u60c5\u5831\u304c\u898b\u3064\u304b\u308a\u307e\u305b\u3093\u3067\u3057\u305f\u3002\",\n            \"sources\": []\n        }\n\n    # \u53d6\u5f97\u3057\u305f\u30c1\u30e3\u30f3\u30af\u304b\u3089\u30b3\u30f3\u30c6\u30ad\u30b9\u30c8\u3092\u69cb\u7bc9\n    context = \"\\n\\n---\\n\\n\".join([\n        f\"\u3010\u51fa\u5178: {c['source']}\u3011\\n{c['text']}\"\n        for c in chunks\n    ])\n\n    system_prompt = \"\"\"\u3042\u306a\u305f\u306f\u5c02\u9580\u7684\u306a\u30a2\u30b7\u30b9\u30bf\u30f3\u30c8\u3067\u3059\u3002\n\u63d0\u4f9b\u3055\u308c\u305f\u30b3\u30f3\u30c6\u30ad\u30b9\u30c8\u306e\u307f\u3092\u4f7f\u7528\u3057\u3066\u8cea\u554f\u306b\u56de\u7b54\u3057\u3066\u304f\u3060\u3055\u3044\u3002\n\u30b3\u30f3\u30c6\u30ad\u30b9\u30c8\u306b\u542b\u307e\u308c\u3066\u3044\u306a\u3044\u60c5\u5831\u306b\u3064\u3044\u3066\u306f\u3001\u300c\u63d0\u4f9b\u3055\u308c\u305f\u60c5\u5831\u306b\u306f\u542b\u307e\u308c\u3066\u3044\u307e\u305b\u3093\u300d\u3068\u660e\u793a\u3057\u3066\u304f\u3060\u3055\u3044\u3002\n\u56de\u7b54\u306f\u7c21\u6f54\u304b\u3064\u6b63\u78ba\u306b\u307e\u3068\u3081\u3066\u304f\u3060\u3055\u3044\u3002\"\"\"\n\n    user_prompt = f\"\"\"\u30b3\u30f3\u30c6\u30ad\u30b9\u30c8:\n{context}\n\n\u8cea\u554f: {query}\n\n\u4e0a\u8a18\u306e\u30b3\u30f3\u30c6\u30ad\u30b9\u30c8\u306b\u57fa\u3065\u3044\u3066\u56de\u7b54\u3057\u3066\u304f\u3060\u3055\u3044\u3002\"\"\"\n\n    response = openai_client.chat.completions.create(\n        model=\"gpt-4o-mini\",\n        messages=[\n            {\"role\": \"system\", \"content\": system_prompt},\n            {\"role\": \"user\", \"content\": user_prompt}\n        ],\n        temperature=0.1,  # \u4e00\u8cab\u6027\u306e\u3042\u308b\u56de\u7b54\u306e\u305f\u3081\u4f4e\u3081\u306b\u8a2d\u5b9a\n        max_tokens=1024\n    )\n\n    return {\n        \"answer\": response.choices[0].message.content,\n        \"sources\": list({c[\"source\"] for c in chunks}),\n        \"chunks_used\": len(chunks)\n    }\n<\/code><\/pre>\n<hr \/>\n<h2>Weaviate\u3092\u4f7f\u3063\u305f\u4ee3\u66ff\u5b9f\u88c5<\/h2>\n<p>\u5c02\u9580\u7528\u8a9e\u304c\u591a\u3044\u696d\u52d9\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u2015\u6cd5\u5f8b\u6587\u66f8\u3084\u533b\u7642\u8a18\u9332\u306a\u3069\u2015\u3067\u306f\u30cf\u30a4\u30d6\u30ea\u30c3\u30c9\u691c\u7d22\u304c\u52b9\u304f\u3002\u30ad\u30fc\u30ef\u30fc\u30c9\u306e\u5b8c\u5168\u4e00\u81f4\uff08BM25\uff09\u3068\u30d9\u30af\u30c8\u30eb\u985e\u4f3c\u5ea6\u3092\u7d44\u307f\u5408\u308f\u305b\u3089\u308c\u308b\u306e\u304cWeaviate\u306e\u5f37\u307f\u3060\u3002\u79c1\u306e\u7d4c\u9a13\u3067\u306f\u3001\u7d14\u7c8b\u306a\u30d9\u30af\u30c8\u30eb\u691c\u7d22\u3060\u3051\u3060\u3068\u56fa\u6709\u540d\u8a5e\u3084\u578b\u756a\u306e\u691c\u7d22\u7cbe\u5ea6\u304c\u843d\u3061\u308b\u3053\u3068\u304c\u3042\u3063\u305f\u3002<\/p>\n<pre><code class=\"language-python\">import weaviate\nfrom weaviate.classes.config import Configure, Property, DataType\n\n# Weaviate Cloud\u30af\u30e9\u30a4\u30a2\u30f3\u30c8\u306e\u521d\u671f\u5316\nclient = weaviate.connect_to_weaviate_cloud(\n    cluster_url=os.getenv(\"WEAVIATE_URL\"),\n    auth_credentials=weaviate.auth.AuthApiKey(os.getenv(\"WEAVIATE_API_KEY\"))\n)\n\n# \u30b3\u30ec\u30af\u30b7\u30e7\u30f3\uff08\u30a4\u30f3\u30c7\u30c3\u30af\u30b9\uff09\u306e\u4f5c\u6210\nclient.collections.create(\n    name=\"Document\",\n    vectorizer_config=Configure.Vectorizer.text2vec_openai(\n        model=\"text-embedding-3-small\"\n    ),\n    generative_config=Configure.Generative.openai(\n        model=\"gpt-4o-mini\"\n    ),\n    properties=[\n        Property(name=\"content\", data_type=DataType.TEXT),\n        Property(name=\"source\", data_type=DataType.TEXT),\n        Property(name=\"doc_id\", data_type=DataType.TEXT),\n    ]\n)\n\ndef hybrid_search(query: str, alpha: float = 0.5, limit: int = 5) -> list:\n    \"\"\"\n    BM25\u3068\u30d9\u30af\u30c8\u30eb\u691c\u7d22\u3092\u7d44\u307f\u5408\u308f\u305b\u305f\u30cf\u30a4\u30d6\u30ea\u30c3\u30c9\u691c\u7d22\n    alpha=0.0: \u7d14\u7c8b\u306a\u30ad\u30fc\u30ef\u30fc\u30c9\u691c\u7d22\uff08BM25\uff09\n    alpha=1.0: \u7d14\u7c8b\u306a\u30d9\u30af\u30c8\u30eb\u691c\u7d22\n    alpha=0.5: \u30cf\u30a4\u30d6\u30ea\u30c3\u30c9\uff08\u4e00\u822c\u7684\u306a\u7528\u9014\u306b\u63a8\u5968\uff09\n    \"\"\"\n    collection = client.collections.get(\"Document\")\n    results = collection.query.hybrid(\n        query=query,\n        alpha=alpha,\n        limit=limit,\n        return_properties=[\"content\", \"source\"]\n    )\n    return [\n        {\"text\": obj.properties[\"content\"], \"source\": obj.properties[\"source\"]}\n        for obj in results.objects\n    ]\n<\/code><\/pre>\n<hr \/>\n<h2>\u672c\u756a\u74b0\u5883\u3067\u8a70\u307e\u308b\u30dd\u30a4\u30f3\u30c8\u3068\u5bfe\u7b56<\/h2>\n<p>\u958b\u767a\u74b0\u5883\u3067\u306f\u6c17\u3065\u304b\u306a\u3044\u304c\u3001\u672c\u756a\u306b\u51fa\u305f\u9014\u7aef\u306b\u554f\u984c\u306b\u306a\u308b\u3053\u3068\u304c3\u3064\u3042\u308b\u3002\u30ec\u30a4\u30c6\u30f3\u30b7\u3001\u5916\u90e8API\u969c\u5bb3\u3078\u306e\u8010\u6027\u3001\u305d\u3057\u3066\u53ef\u89b3\u6e2c\u6027\u3060\u3002<\/p>\n<h3>\u30ec\u30a4\u30c6\u30f3\u30b7\u306e\u6700\u9069\u5316\uff1a\u57cb\u3081\u8fbc\u307f\u30ad\u30e3\u30c3\u30b7\u30e5<\/h3>\n<p>\u30e6\u30fc\u30b6\u30fc\u306e\u8cea\u554f\u306f\u610f\u5916\u3068\u91cd\u8907\u3059\u308b\u3002\u540c\u3058\u30af\u30a8\u30ea\u3067\u6bce\u56de\u57cb\u3081\u8fbc\u307fAPI\u3092\u53e9\u304f\u306e\u306f\u7121\u99c4\u306a\u306e\u3067\u3001Redis\u3067\u30ad\u30e3\u30c3\u30b7\u30e5\u3059\u308b\u3002TTL\u306f24\u6642\u9593\u306b\u3057\u3066\u3044\u308b\u304c\u3001\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u66f4\u65b0\u983b\u5ea6\u306b\u5408\u308f\u305b\u3066\u8abf\u6574\u3057\u3066\u307b\u3057\u3044\u3002<\/p>\n<pre><code class=\"language-python\">import redis\nimport json\n\nredis_client = redis.Redis(host=\"localhost\", port=6379, decode_responses=True)\n\ndef get_embedding_cached(text: str, ttl: int = 86400) -> list[float]:\n    \"\"\"Redis\u30ad\u30e3\u30c3\u30b7\u30e5\u3092\u6d3b\u7528\u3057\u305f\u57cb\u3081\u8fbc\u307f\u53d6\u5f97\uff08TTL: 24\u6642\u9593\uff09\"\"\"\n    cache_key = f\"emb:{hashlib.md5(text.encode()).hexdigest()}\"\n\n    cached = redis_client.get(cache_key)\n    if cached:\n        return json.loads(cached)\n\n    embedding = get_embedding(text)\n    redis_client.setex(cache_key, ttl, json.dumps(embedding))\n    return embedding\n<\/code><\/pre>\n<h3>\u30a8\u30e9\u30fc\u30cf\u30f3\u30c9\u30ea\u30f3\u30b0\u3068\u30ea\u30c8\u30e9\u30a4\u30ed\u30b8\u30c3\u30af<\/h3>\n<p>\u5916\u90e8API\u3078\u306e\u4f9d\u5b58\u304c\u591a\u3044RAG\u30b7\u30b9\u30c6\u30e0\u3067\u3001\u30ea\u30c8\u30e9\u30a4\u306a\u3057\u3067\u672c\u756a\u904b\u7528\u3059\u308b\u306e\u306f\u5371\u967a\u3060\u3002OpenAI\u3082Pinecone\u3082\u4e00\u6642\u7684\u306a\u30a8\u30e9\u30fc\u304c\u8d77\u304d\u308b\u3002\u6307\u6570\u30d0\u30c3\u30af\u30aa\u30d5\u4ed8\u304d\u306e\u30c7\u30b3\u30ec\u30fc\u30bf\u3092\u4e00\u5ea6\u4f5c\u3063\u3066\u304a\u3051\u3070\u4f7f\u3044\u56de\u305b\u308b\u3002<\/p>\n<pre><code class=\"language-python\">import time\nfrom functools import wraps\n\ndef with_retry(max_retries: int = 3, backoff: float = 1.0):\n    \"\"\"\u6307\u6570\u30d0\u30c3\u30af\u30aa\u30d5\u4ed8\u304d\u30ea\u30c8\u30e9\u30a4\u30c7\u30b3\u30ec\u30fc\u30bf\"\"\"\n    def decorator(func):\n        @wraps(func)\n        def wrapper(*args, **kwargs):\n            for attempt in range(max_retries):\n                try:\n                    return func(*args, **kwargs)\n                except Exception as e:\n                    if attempt == max_retries - 1:\n                        raise\n                    wait_time = backoff * (2 ** attempt)\n                    print(f\"\u30a8\u30e9\u30fc\u767a\u751f (\u8a66\u884c {attempt+1}\/{max_retries}): {e}\")\n                    print(f\"{wait_time:.1f}\u79d2\u5f8c\u306b\u30ea\u30c8\u30e9\u30a4\u3057\u307e\u3059...\")\n                    time.sleep(wait_time)\n        return wrapper\n    return decorator\n\n@with_retry(max_retries=3, backoff=0.5)\ndef get_embedding_safe(text: str) -> list[float]:\n    return get_embedding_cached(text)\n<\/code><\/pre>\n<h3>\u30e2\u30cb\u30bf\u30ea\u30f3\u30b0\u3068\u53ef\u89b3\u6e2c\u6027<\/h3>\n<p>\u300c\u52d5\u3044\u3066\u3044\u308b\u3063\u307d\u3044\u300d\u3067\u306f\u672c\u756a\u904b\u7528\u306f\u6210\u308a\u7acb\u305f\u306a\u3044\u3002\u691c\u7d22\u7cbe\u5ea6\u304c\u9759\u304b\u306b\u52a3\u5316\u3057\u3066\u3044\u3066\u3082\u3001\u30e1\u30c8\u30ea\u30af\u30b9\u304c\u306a\u3051\u308c\u3070\u6c17\u3065\u3051\u306a\u3044\u3002<\/p>\n<pre><code class=\"language-python\">import time\nfrom dataclasses import dataclass\nfrom typing import Optional\n\n@dataclass\nclass RAGMetrics:\n    query: str\n    retrieval_latency_ms: float = 0.0\n    generation_latency_ms: float = 0.0\n    chunks_retrieved: int = 0\n    avg_similarity_score: float = 0.0\n    cache_hit: bool = False\n    error: Optional[str] = None\n\n    @property\n    def total_latency_ms(self) -> float:\n        return self.retrieval_latency_ms + self.generation_latency_ms\n\ndef generate_rag_response_monitored(query: str) -> tuple[dict, RAGMetrics]:\n    \"\"\"\u30e1\u30c8\u30ea\u30af\u30b9\u53ce\u96c6\u4ed8\u304dRAG\u5b9f\u884c\"\"\"\n    metrics = RAGMetrics(query=query)\n\n    # \u691c\u7d22\u30d5\u30a7\u30fc\u30ba\u306e\u8a08\u6e2c\n    t0 = time.time()\n    chunks = retrieve_relevant_chunks(query)\n    metrics.retrieval_latency_ms = (time.time() - t0) * 1000\n    metrics.chunks_retrieved = len(chunks)\n\n    if chunks:\n        metrics.avg_similarity_score = sum(\n            c[\"score\"] for c in chunks\n        ) \/ len(chunks)\n\n    # \u751f\u6210\u30d5\u30a7\u30fc\u30ba\u306e\u8a08\u6e2c\n    t1 = time.time()\n    result = generate_rag_response(query)\n    metrics.generation_latency_ms = (time.time() - t1) * 1000\n\n    # \u30ed\u30b0\u51fa\u529b\uff08\u672c\u756a\u3067\u306fDatadog\u7b49\u3078\u9001\u4fe1\uff09\n    print(f\"[RAG Metrics] \u691c\u7d22: {metrics.retrieval_latency_ms:.0f}ms | \"\n          f\"\u751f\u6210: {metrics.generation_latency_ms:.0f}ms | \"\n          f\"\u30c1\u30e3\u30f3\u30af\u6570: {metrics.chunks_retrieved} | \"\n          f\"\u5e73\u5747\u30b9\u30b3\u30a2: {metrics.avg_similarity_score:.3f}\")\n\n    return result, metrics\n<\/code><\/pre>\n<p><strong>\u8ffd\u8de1\u3059\u3079\u304d\u4e3b\u8981\u30e1\u30c8\u30ea\u30af\u30b9\uff1a<\/strong><\/p>\n<table>\n<thead>\n<tr>\n<th>\u30e1\u30c8\u30ea\u30af\u30b9<\/th>\n<th>\u76ee\u6a19\u5024<\/th>\n<th>\u8aac\u660e<\/th>\n<\/tr>\n<\/thead>\n<tbody>\n<tr>\n<td>\u691c\u7d22\u30ec\u30a4\u30c6\u30f3\u30b7P95<\/td>\n<td>&lt; 300ms<\/td>\n<td>\u30d9\u30af\u30bf\u30fcDB\u691c\u7d22\u6642\u9593<\/td>\n<\/tr>\n<tr>\n<td>\u751f\u6210\u30ec\u30a4\u30c6\u30f3\u30b7P95<\/td>\n<td>&lt; 3,000ms<\/td>\n<td>LLM\u5fdc\u7b54\u751f\u6210\u6642\u9593<\/td>\n<\/tr>\n<tr>\n<td>\u5e73\u5747\u985e\u4f3c\u5ea6\u30b9\u30b3\u30a2<\/td>\n<td>&gt; 0.75<\/td>\n<td>\u691c\u7d22\u7cbe\u5ea6\u306e\u6307\u6a19<\/td>\n<\/tr>\n<tr>\n<td>\u30ad\u30e3\u30c3\u30b7\u30e5\u30d2\u30c3\u30c8\u7387<\/td>\n<td>&gt; 30%<\/td>\n<td>\u57cb\u3081\u8fbc\u307f\u30ad\u30e3\u30c3\u30b7\u30e5\u52b9\u7387<\/td>\n<\/tr>\n<tr>\n<td>\u30a8\u30e9\u30fc\u30ec\u30fc\u30c8<\/td>\n<td>&lt; 0.1%<\/td>\n<td>API\u547c\u3073\u51fa\u3057\u5931\u6557\u7387<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<hr \/>\n<h2>\u30c1\u30e3\u30f3\u30af\u6226\u7565\u306e\u6700\u9069\u5316\u2015\u2015\u3053\u3053\u3067\u5dee\u304c\u3064\u304f<\/h2>\n<p>\u30c1\u30e3\u30f3\u30af\u5206\u5272\u306f\u5730\u5473\u3060\u304c\u3001RAG\u306e\u7cbe\u5ea6\u3092\u5de6\u53f3\u3059\u308b\u6700\u91cd\u8981\u30dd\u30a4\u30f3\u30c8\u3060\u3002\u56fa\u5b9a\u9577\u3067\u6a5f\u68b0\u7684\u306b\u5207\u308b\u3068\u3001\u610f\u5473\u7684\u306b\u95a2\u9023\u3059\u308b\u60c5\u5831\u304c\u9014\u5207\u308c\u3066\u3057\u307e\u3044\u3001\u691c\u7d22\u3067\u30d2\u30c3\u30c8\u3057\u3066\u3082\u6587\u8108\u304c\u4e2d\u9014\u534a\u7aef\u306b\u306a\u308b\u3002<\/p>\n<h3>\u30bb\u30de\u30f3\u30c6\u30a3\u30c3\u30af\u30c1\u30e3\u30f3\u30ad\u30f3\u30b0<\/h3>\n<pre><code class=\"language-python\">from langchain_experimental.text_splitter import SemanticChunker\nfrom langchain_openai import OpenAIEmbeddings\n\nembeddings = OpenAIEmbeddings(model=\"text-embedding-3-small\")\n\n# \u610f\u5473\u7684\u306a\u307e\u3068\u307e\u308a\u3092\u4fdd\u3063\u3066\u30c1\u30e3\u30f3\u30af\u5206\u5272\nsemantic_splitter = SemanticChunker(\n    embeddings=embeddings,\n    breakpoint_threshold_type=\"percentile\",\n    breakpoint_threshold_amount=95  # \u4e0a\u4f4d5%\u306e\u610f\u5473\u7684\u4e56\u96e2\u70b9\u3067\u5206\u5272\n)\n\nchunks = semantic_splitter.create_documents([long_document_text])\n<\/code><\/pre>\n<h3>\u89aa\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u53d6\u5f97\uff08Parent Document Retriever\uff09<\/h3>\n<p>\u5c0f\u3055\u3044\u30c1\u30e3\u30f3\u30af\u3067\u7cbe\u5ea6\u306e\u9ad8\u3044\u691c\u7d22\u3092\u884c\u3044\u3001\u30d2\u30c3\u30c8\u3057\u305f\u5834\u5408\u306f\u6587\u8108\u304c\u8c4a\u5bcc\u306a\u89aa\u30c1\u30e3\u30f3\u30af\u3092\u53d6\u5f97\u3059\u308b\u624b\u6cd5\u3060\u3002\u691c\u7d22\u7cbe\u5ea6\u3068\u30b3\u30f3\u30c6\u30ad\u30b9\u30c8\u306e\u8c4a\u5bcc\u3055\u3092\u4e21\u7acb\u3067\u304d\u308b\u2015\u500b\u4eba\u7684\u306b\u306f\u3053\u308c\u304c\u4e00\u756a\u30b3\u30b9\u30d1\u304c\u826f\u3044\u3068\u611f\u3058\u3066\u3044\u308b\u3002<\/p>\n<pre><code class=\"language-python\">from langchain.retrievers import ParentDocumentRetriever\nfrom langchain.storage import InMemoryStore\nfrom langchain_community.vectorstores import Chroma\n\n# \u5b50\u30c1\u30e3\u30f3\u30af\uff08\u691c\u7d22\u7528\uff09\uff1a\u5c0f\u3055\u304f\u3001\u7cbe\u5ea6\u306e\u9ad8\u3044\u30de\u30c3\u30c1\u30f3\u30b0\nchild_splitter = RecursiveCharacterTextSplitter(\n    chunk_size=200,\n    separators=[\"\\n\\n\", \"\\n\", \"\u3002\", \" \"]\n)\n# \u89aa\u30c1\u30e3\u30f3\u30af\uff08\u30b3\u30f3\u30c6\u30ad\u30b9\u30c8\u7528\uff09\uff1a\u5927\u304d\u304f\u3001\u60c5\u5831\u304c\u8c4a\u5bcc\nparent_splitter = RecursiveCharacterTextSplitter(\n    chunk_size=1000,\n    separators=[\"\\n\\n\", \"\\n\", \"\u3002\", \" \"]\n)\n\nvectorstore = Chroma(embedding_function=embeddings)\nstore = InMemoryStore()\n\nretriever = ParentDocumentRetriever(\n    vectorstore=vectorstore,\n    docstore=store,\n    child_splitter=child_splitter,\n    parent_splitter=parent_splitter,\n)\n<\/code><\/pre>\n<hr \/>\n<h2>\u672c\u756a\u74b0\u5883\u5bfe\u5fdcRAG\u30b7\u30b9\u30c6\u30e0\u306e\u69cb\u7bc9\u30c1\u30a7\u30c3\u30af\u30ea\u30b9\u30c8<\/h2>\n<p><strong>\u8a2d\u8a08\u30d5\u30a7\u30fc\u30ba<\/strong><br \/>\n&#8211; [ ] \u30e6\u30fc\u30b9\u30b1\u30fc\u30b9\u306b\u5408\u3063\u305f\u30d9\u30af\u30bf\u30fc\u30c7\u30fc\u30bf\u30d9\u30fc\u30b9\uff08Pinecone\u307e\u305f\u306fWeaviate\uff09\u306e\u9078\u5b9a<br \/>\n&#8211; [ ] \u30c1\u30e3\u30f3\u30af\u30b5\u30a4\u30ba\u3068\u91cd\u8907\u5e45\u306e\u6700\u9069\u5316\uff08\u76ee\u5b89\uff1a512\u301c1024\u30c8\u30fc\u30af\u30f3\u3001\u91cd\u890710\u301c20%\uff09<br \/>\n&#8211; [ ] \u57cb\u3081\u8fbc\u307f\u30e2\u30c7\u30eb\u306e\u9078\u5b9a\uff08\u30b3\u30b9\u30c8\u3068\u7cbe\u5ea6\u306e\u30d0\u30e9\u30f3\u30b9\uff09<br \/>\n&#8211; [ ] \u30cf\u30a4\u30d6\u30ea\u30c3\u30c9\u691c\u7d22\u306e\u63a1\u7528\u53ef\u5426\u306e\u691c\u8a0e<\/p>\n<p><strong>\u5b9f\u88c5\u30d5\u30a7\u30fc\u30ba<\/strong><br \/>\n&#8211; [ ] \u30d0\u30c3\u30c1\u51e6\u7406\u306b\u3088\u308b\u30a4\u30f3\u30c7\u30c3\u30af\u30b9\u767b\u9332\u306e\u52b9\u7387\u5316<br \/>\n&#8211; [ ] \u57cb\u3081\u8fbc\u307f\u30ad\u30e3\u30c3\u30b7\u30e5\u306e\u5b9f\u88c5\uff08Redis\u306a\u3069\uff09<br \/>\n&#8211; [ ] \u30a8\u30e9\u30fc\u30cf\u30f3\u30c9\u30ea\u30f3\u30b0\u3068\u30ea\u30c8\u30e9\u30a4\u30ed\u30b8\u30c3\u30af\u306e\u6574\u5099<br \/>\n&#8211; [ ] \u985e\u4f3c\u5ea6\u30b9\u30b3\u30a2\u306b\u3088\u308b\u30d5\u30a3\u30eb\u30bf\u30ea\u30f3\u30b0\u306e\u5b9f\u88c5<\/p>\n<p><strong>\u672c\u756a\u74b0\u5883<a href=\"https:\/\/m.do.co\/c\/06956e5e2802\" title=\"DigitalOcean\u3067\u30c7\u30d7\u30ed\u30a4\" rel=\"nofollow sponsored\" target=\"_blank\">\u30c7\u30d7\u30ed\u30a4<\/a>\u30d5\u30a7\u30fc\u30ba<\/strong><br \/>\n&#8211; [ ] \u30ec\u30a4\u30c6\u30f3\u30b7\u76ee\u6a19\u306e\u8a2d\u5b9a\u3068\u76e3\u8996\u30c0\u30c3\u30b7\u30e5\u30dc\u30fc\u30c9\u306e\u6574\u5099<br \/>\n&#8211; [ ] API\u30ec\u30fc\u30c8\u5236\u9650\u3068\u30b3\u30b9\u30c8\u4e0a\u9650\u306e\u8a2d\u5b9a<br \/>\n&#8211; [ ] \u5b9a\u671f\u7684\u306a\u30a4\u30f3\u30c7\u30c3\u30af\u30b9\u66f4\u65b0\u30d1\u30a4\u30d7\u30e9\u30a4\u30f3\u306e\u69cb\u7bc9<br \/>\n&#8211; [ ] A\/B\u30c6\u30b9\u30c8\u306b\u3088\u308b\u30c1\u30e3\u30f3\u30af\u6226\u7565\u306e\u7d99\u7d9a\u6539\u5584<\/p>\n<p><strong>\u691c\u7d22\u62e1\u5f35\u751f\u6210<\/strong>\u306fLLM\u30a2\u30d7\u30ea\u30b1\u30fc\u30b7\u30e7\u30f3\u306e\u4fe1\u983c\u6027\u3092\u5927\u304d\u304f\u5f15\u304d\u4e0a\u3052\u308b\u3002\u30cf\u30eb\u30b7\u30cd\u30fc\u30b7\u30e7\u30f3\u3092\u6291\u5236\u3057\u3064\u3064\u3001\u4f01\u696d\u56fa\u6709\u306e\u30ca\u30ec\u30c3\u30b8\u3092\u6d3b\u304b\u3057\u305f\u9ad8\u7cbe\u5ea6\u306aAI\u30b7\u30b9\u30c6\u30e0\u304c\u73fe\u5b9f\u7684\u306b\u4f5c\u308c\u308b\u3002\u307e\u305a\u5c0f\u3055\u3044\u30d7\u30ed\u30c8\u30bf\u30a4\u30d7\u3067\u52d5\u304b\u3057\u3001\u672c\u756a\u3067\u306e\u30d5\u30a3\u30fc\u30c9\u30d0\u30c3\u30af\u3092\u898b\u306a\u304c\u3089\u6bb5\u968e\u7684\u306b\u6539\u5584\u3057\u3066\u3044\u304f\u306e\u304c\u7d50\u5c40\u4e00\u756a\u65e9\u3044\u9053\u3060\u3002<\/p>\n<hr \/>\n<p><em>\u95a2\u9023\u30ad\u30fc\u30ef\u30fc\u30c9: RAG\u30c1\u30e5\u30fc\u30c8\u30ea\u30a2\u30eb, \u30d9\u30af\u30bf\u30fc\u30c7\u30fc\u30bf\u30d9\u30fc\u30b9, \u691c\u7d22\u62e1\u5f35\u751f\u6210, <a href=\"https:\/\/m.do.co\/c\/06956e5e2802\" title=\"DigitalOcean \u672c\u756a\u74b0\u5883\u30af\u30e9\u30a6\u30c9\" rel=\"nofollow sponsored\" target=\"_blank\">\u672c\u756a\u74b0\u5883<\/a>AI, Pinecone, Weaviate, LangChain, OpenAI, \u57cb\u3081\u8fbc\u307f\u30e2\u30c7\u30eb, \u30c1\u30e3\u30f3\u30ad\u30f3\u30b0<\/em><\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u30d9\u30af\u30bf\u30fc\u30c7\u30fc\u30bf\u30d9\u30fc\u30b9\u3092\u4f7f\u7528\u3057\u305f\u672c\u756a\u74b0\u5883\u5bfe\u5fdc\u306eRAG\u30a2\u30d7\u30ea\u30b1\u30fc\u30b7\u30e7\u30f3\u69cb\u7bc9 \u6b63\u76f4\u306a\u3068\u3053\u308d\u3001RAG\u3092\u521d\u3081\u3066\u5b9f\u88c5\u3057\u305f\u3068\u304d\u3001\u300c\u30d9\u30af\u30bf\u30fc\u691c\u7d22\u3055\u3048\u52d5\u3051\u3070\u307b\u307c\u5b8c\u6210\u3060\u300d\u3068\u7518\u304f\u898b\u3066\u3044\u305f\u3002\u5b9f\u969b\u306b\u306f\u3001\u30c1\u30e3\u30f3\u30af\u6226\u7565\u306e\u30df\u30b9\u3067\u691c\u7d22\u7cbe\u5ea6\u304c\u5d29\u58ca\u3057\u3001\u30ea\u30c8\u30e9\u30a4\u30ed\u30b8\u30c3\u30af\u306e\u6b20\u5982\u3067\u672c\u756a\u969c\u5bb3\u3092\u8d77\u3053\u3057\u305f\u3002\u3053\u306e\u8a18\u4e8b\u306f\u305d\u306e\u53cd\u7701\u3082\u8fbc\u3081\u3066\u66f8\u3044\u3066\u3044\u308b\u3002 LLM\u306b\u306f\u300c<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"site-sidebar-layout":"default","site-content-layout":"","ast-site-content-layout":"default","site-content-style":"default","site-sidebar-style":"default","ast-global-header-display":"","ast-banner-title-visibility":"","ast-main-header-display":"","ast-hfb-above-header-display":"","ast-hfb-below-header-display":"","ast-hfb-mobile-header-display":"","site-post-title":"","ast-breadcrumbs-content":"","ast-featured-img":"","footer-sml-layout":"","ast-disable-related-posts":"","theme-transparent-header-meta":"","adv-header-id-meta":"","stick-header-meta":"","header-above-stick-meta":"","header-main-stick-meta":"","header-below-stick-meta":"","astra-migrate-meta-layouts":"default","ast-page-background-enabled":"default","ast-page-background-meta":{"desktop":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"ast-content-background-meta":{"desktop":{"background-color":"var(--ast-global-color-4)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"var(--ast-global-color-4)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"var(--ast-global-color-4)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"footnotes":""},"categories":[2],"tags":[],"class_list":["post-4","post","type-post","status-publish","format-standard","hentry","category-ai"],"_links":{"self":[{"href":"https:\/\/blog.rebalai.com\/ja\/wp-json\/wp\/v2\/posts\/4","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/blog.rebalai.com\/ja\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/blog.rebalai.com\/ja\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/blog.rebalai.com\/ja\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/blog.rebalai.com\/ja\/wp-json\/wp\/v2\/comments?post=4"}],"version-history":[{"count":12,"href":"https:\/\/blog.rebalai.com\/ja\/wp-json\/wp\/v2\/posts\/4\/revisions"}],"predecessor-version":[{"id":124,"href":"https:\/\/blog.rebalai.com\/ja\/wp-json\/wp\/v2\/posts\/4\/revisions\/124"}],"wp:attachment":[{"href":"https:\/\/blog.rebalai.com\/ja\/wp-json\/wp\/v2\/media?parent=4"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/blog.rebalai.com\/ja\/wp-json\/wp\/v2\/categories?post=4"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/blog.rebalai.com\/ja\/wp-json\/wp\/v2\/tags?post=4"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}