diff --git a/private_gpt/server/chat/chat_service.py b/private_gpt/server/chat/chat_service.py index 45fadfd..cb2a674 100644 --- a/private_gpt/server/chat/chat_service.py +++ b/private_gpt/server/chat/chat_service.py @@ -70,7 +70,6 @@ class ChatEngineInput: chat_history=chat_history, ) - @singleton class ChatService: settings: Settings @@ -126,7 +125,7 @@ class ChatService: model=settings.rag.rerank.model, top_n=settings.rag.rerank.top_n ) node_postprocessors.append(rerank_postprocessor) - + return ContextChatEngine.from_defaults( system_prompt=system_prompt, retriever=vector_index_retriever, @@ -189,17 +188,19 @@ class ChatService: ) system_prompt = ( """ - You are a helpful, respectful and honest question-answering assistant. - Your role is to provide accurate and informative responses based solely - on the context provided for each query. If the answer cannot be found in - the given context, you must state that the answer is not present rather - than speculating or making up information. Always follow the user's - instructions carefully and answer as helpfully as possible while strictly - adhering to the context boundaries. Do not reference the instructions or - context you were given when generating responses. + You are QuickGPT, a helpful assistant by Quickfox Consulting. + + Responses should be based on the context documents provided + and should be relevant, informative, and easy to understand. + You should aim to deliver high-quality responses that are + respectful and helpful, using clear and concise language. + Avoid providing information outside of the context documents unless + it is necessary for clarity or completeness. Focus on providing + accurate and reliable answers based on the given context. + If answer is not in the context documents, just say I don't have answer + in respectful way. """ ) - chat_history = ( chat_engine_input.chat_history if chat_engine_input.chat_history else None ) diff --git a/private_gpt/server/completions/completions_router.py b/private_gpt/server/completions/completions_router.py index 45e5280..a0714c6 100644 --- a/private_gpt/server/completions/completions_router.py +++ b/private_gpt/server/completions/completions_router.py @@ -195,7 +195,9 @@ async def prompt_completion( ) ) return history_messages - user_message = OpenAIMessage(content=body.prompt, role="user") + message = body.prompt + # message = body.prompt + 'Only answer if there is answer in the provided documents' + user_message = OpenAIMessage(content=message, role="user") user_message_json = { 'text': body.prompt, } diff --git a/private_gpt/users/crud/document_crud.py b/private_gpt/users/crud/document_crud.py index ad409f0..9fb349b 100644 --- a/private_gpt/users/crud/document_crud.py +++ b/private_gpt/users/crud/document_crud.py @@ -28,11 +28,21 @@ class CRUDDocuments(CRUDBase[Document, DocumentCreate, DocumentUpdate]): def get_documents_by_departments( self, db: Session, *, department_id: int ) -> List[Document]: + all_department_id = 1 # department ID for "ALL" is 1 return ( db.query(self.model) .join(document_department_association) .join(Department) - .filter(document_department_association.c.department_id == department_id) + .filter( + or_( + and_( + document_department_association.c.department_id == department_id, + ), + and_( + document_department_association.c.department_id == all_department_id, + ), + ) + ) .order_by(desc(getattr(Document, 'uploaded_at'))) .all() ) diff --git a/settings.yaml b/settings.yaml index 20f2b86..8e9f30d 100644 --- a/settings.yaml +++ b/settings.yaml @@ -51,14 +51,14 @@ rag: #This value is disabled by default. If you enable this settings, the RAG will only use articles that meet a certain percentage score. rerank: enabled: true - model: cross-encoder/ms-marco-MiniLM-L-2-v2 + model: mixedbread-ai/mxbai-embed-large-v1 top_n: 2 llamacpp: # llm_hf_repo_id: bartowski/Meta-Llama-3-8B-Instruct-GGUF # llm_hf_model_file: Meta-Llama-3-8B-Instruct-Q6_K.gguf - llm_hf_repo_id: qwp4w3hyb/Hermes-2-Pro-Llama-3-8B-iMat-GGUF - llm_hf_model_file: hermes-2-pro-llama-3-8b-imat-Q6_K.gguf + llm_hf_repo_id: NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF + llm_hf_model_file: Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q6_K.gguf tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting top_k: 40 # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40) top_p: 0.9 # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9) @@ -68,11 +68,11 @@ llamacpp: embedding: # Should be matching the value above in most cases mode: huggingface - ingest_mode: simple + ingest_mode: parallel embed_dim: 384 # 384 is for BAAI/bge-small-en-v1.5 huggingface: - embedding_hf_model_name: Snowflake/snowflake-arctic-embed-l + embedding_hf_model_name: mixedbread-ai/mxbai-embed-large-v1 access_token: ${HUGGINGFACE_TOKEN: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} vectorstore: