From 766661ea97eac2d211f698f4e0f42fddc89850d1 Mon Sep 17 00:00:00 2001 From: Chen Ling <102888045+ChenLingD@users.noreply.github.com> Date: Mon, 6 Apr 2026 14:43:07 -0400 Subject: [PATCH] Add files via upload --- .../rag/main.ipynb" | 458 ++++++++++++++---- 1 file changed, 360 insertions(+), 98 deletions(-) diff --git "a/\344\275\277\347\224\250Python\346\236\204\345\273\272RAG\347\263\273\347\273\237/rag/main.ipynb" "b/\344\275\277\347\224\250Python\346\236\204\345\273\272RAG\347\263\273\347\273\237/rag/main.ipynb" index 7eca110..63aa596 100644 --- "a/\344\275\277\347\224\250Python\346\236\204\345\273\272RAG\347\263\273\347\273\237/rag/main.ipynb" +++ "b/\344\275\277\347\224\250Python\346\236\204\345\273\272RAG\347\263\273\347\273\237/rag/main.ipynb" @@ -2,14 +2,19 @@ "cells": [ { "cell_type": "code", - "execution_count": 18, - "id": "cf723ed9-d8e0-4f1a-911f-887b927f8569", - "metadata": { - "collapsed": true, - "jupyter": { - "outputs_hidden": true - } - }, + "execution_count": null, + "id": "01173ef8-faa1-4dfd-9ab0-711b4a9e6008", + "metadata": {}, + "outputs": [], + "source": [ + "# Before User Prompt" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "4b6d1b90-86f7-494f-af54-3b451ab51ca5", + "metadata": {}, "outputs": [ { "name": "stdout", @@ -42,7 +47,8 @@ "from typing import List\n", "\n", "def split_into_chunks(doc_file: str) -> List[str]:\n", - " with open(doc_file, 'r') as file:\n", + " # Add encoding='utf-8' right here!\n", + " with open(doc_file, 'r', encoding='utf-8') as file:\n", " content = file.read()\n", "\n", " return [chunk for chunk in content.split(\"\\n\\n\")]\n", @@ -55,22 +61,179 @@ }, { "cell_type": "code", - "execution_count": 19, - "id": "cfe9bf60-5d21-4696-99a5-7e7f3b94dd06", - "metadata": { - "collapsed": true, - "jupyter": { - "outputs_hidden": true - }, - "scrolled": true - }, + "execution_count": 4, + "id": "624003bb-0a5d-4dcb-a14d-fefee0b43fdf", + "metadata": {}, "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "2f97cd11c8a24a7ea7f2df4b21686590", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "modules.json: 0%| | 0.00/230 [00:00 str:\n", - " prompt = f\"\"\"你是一位知识助手,请根据用户的问题和下列片段生成准确的回答。\n", - "\n", - "用户问题: {query}\n", - "\n", - "相关片段:\n", - "{\"\\n\\n\".join(chunks)}\n", - "\n", - "请基于上述内容作答,不要编造信息。\"\"\"\n", - "\n", - " print(f\"{prompt}\\n\\n---\\n\")\n", - "\n", - " response = google_client.models.generate_content(\n", - " model=\"gemini-2.5-flash\",\n", - " contents=prompt\n", - " )\n", - "\n", - " return response.text\n", - "\n", - "answer = generate(query, reranked_chunks)\n", - "print(answer)" + "load_dotenv()" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "81bc5147-a5c5-4b9f-a9ed-f5f0ebc94b09", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -307,7 +569,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.9" + "version": "3.12.11" } }, "nbformat": 4,