| 
														 | 
														 | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														
  | 
														 | 
														 | 
														
  | 
													
													
												
													
														 | 
														 | 
														class XinferenceLLM(Xinference): | 
														 | 
														 | 
														class XinferenceLLM(Xinference): | 
													
													
												
													
														 | 
														 | 
														    def _call( | 
														 | 
														 | 
														    def _call( | 
													
													
												
													
														 | 
														 | 
														        self, | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														        prompt: str, | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														        stop: Optional[List[str]] = None, | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														        run_manager: Optional[CallbackManagerForLLMRun] = None, | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														        **kwargs: Any, | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														            self, | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														            prompt: str, | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														            stop: Optional[List[str]] = None, | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														            run_manager: Optional[CallbackManagerForLLMRun] = None, | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														            **kwargs: Any, | 
													
													
												
													
														 | 
														 | 
														    ) -> str: | 
														 | 
														 | 
														    ) -> str: | 
													
													
												
													
														 | 
														 | 
														        """Call the xinference model and return the output. | 
														 | 
														 | 
														        """Call the xinference model and return the output. | 
													
													
												
													
														 | 
														 | 
														
  | 
														 | 
														 | 
														
  | 
													
													
												
											
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														            if generate_config and generate_config.get("stream"): | 
														 | 
														 | 
														            if generate_config and generate_config.get("stream"): | 
													
													
												
													
														 | 
														 | 
														                combined_text_output = "" | 
														 | 
														 | 
														                combined_text_output = "" | 
													
													
												
													
														 | 
														 | 
														                for token in self._stream_generate( | 
														 | 
														 | 
														                for token in self._stream_generate( | 
													
													
												
													
														 | 
														 | 
														                    model=model, | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														                    prompt=prompt, | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														                    run_manager=run_manager, | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														                    generate_config=generate_config, | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														                        model=model, | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														                        prompt=prompt, | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														                        run_manager=run_manager, | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														                        generate_config=generate_config, | 
													
													
												
													
														 | 
														 | 
														                ): | 
														 | 
														 | 
														                ): | 
													
													
												
													
														 | 
														 | 
														                    combined_text_output += token | 
														 | 
														 | 
														                    combined_text_output += token | 
													
													
												
													
														 | 
														 | 
														                return combined_text_output | 
														 | 
														 | 
														                return combined_text_output | 
													
													
												
											
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														            if generate_config and generate_config.get("stream"): | 
														 | 
														 | 
														            if generate_config and generate_config.get("stream"): | 
													
													
												
													
														 | 
														 | 
														                combined_text_output = "" | 
														 | 
														 | 
														                combined_text_output = "" | 
													
													
												
													
														 | 
														 | 
														                for token in self._stream_generate( | 
														 | 
														 | 
														                for token in self._stream_generate( | 
													
													
												
													
														 | 
														 | 
														                    model=model, | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														                    prompt=prompt, | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														                    run_manager=run_manager, | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														                    generate_config=generate_config, | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														                        model=model, | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														                        prompt=prompt, | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														                        run_manager=run_manager, | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														                        generate_config=generate_config, | 
													
													
												
													
														 | 
														 | 
														                ): | 
														 | 
														 | 
														                ): | 
													
													
												
													
														 | 
														 | 
														                    combined_text_output += token | 
														 | 
														 | 
														                    combined_text_output += token | 
													
													
												
													
														 | 
														 | 
														                completion = combined_text_output | 
														 | 
														 | 
														                completion = combined_text_output | 
													
													
												
											
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														
  | 
														 | 
														 | 
														
  | 
													
													
												
													
														 | 
														 | 
														            return completion | 
														 | 
														 | 
														            return completion | 
													
													
												
													
														 | 
														 | 
														
  | 
														 | 
														 | 
														
  | 
													
													
												
													
														 | 
														 | 
														
  | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														    def _stream_generate( | 
														 | 
														 | 
														    def _stream_generate( | 
													
													
												
													
														 | 
														 | 
														        self, | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														        model: Union["RESTfulGenerateModelHandle", "RESTfulChatModelHandle", "RESTfulChatglmCppChatModelHandle"], | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														        prompt: str, | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														        run_manager: Optional[CallbackManagerForLLMRun] = None, | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														        generate_config: Optional[Union["LlamaCppGenerateConfig", "PytorchGenerateConfig", "ChatglmCppGenerateConfig"]] = None, | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														            self, | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														            model: Union["RESTfulGenerateModelHandle", "RESTfulChatModelHandle", "RESTfulChatglmCppChatModelHandle"], | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														            prompt: str, | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														            run_manager: Optional[CallbackManagerForLLMRun] = None, | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														            generate_config: Optional[ | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														                Union["LlamaCppGenerateConfig", "PytorchGenerateConfig", "ChatglmCppGenerateConfig"]] = None, | 
													
													
												
													
														 | 
														 | 
														    ) -> Generator[str, None, None]: | 
														 | 
														 | 
														    ) -> Generator[str, None, None]: | 
													
													
												
													
														 | 
														 | 
														        """ | 
														 | 
														 | 
														        """ | 
													
													
												
													
														 | 
														 | 
														        Args: | 
														 | 
														 | 
														        Args: | 
													
													
												
											
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														                if choices: | 
														 | 
														 | 
														                if choices: | 
													
													
												
													
														 | 
														 | 
														                    choice = choices[0] | 
														 | 
														 | 
														                    choice = choices[0] | 
													
													
												
													
														 | 
														 | 
														                    if isinstance(choice, dict): | 
														 | 
														 | 
														                    if isinstance(choice, dict): | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														                        if 'finish_reason' in choice and choice['finish_reason'] \ | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														                                and choice['finish_reason'] in ['stop', 'length']: | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														                            break | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														
  | 
													
													
												
													
														 | 
														 | 
														                        if 'text' in choice: | 
														 | 
														 | 
														                        if 'text' in choice: | 
													
													
												
													
														 | 
														 | 
														                            token = choice.get("text", "") | 
														 | 
														 | 
														                            token = choice.get("text", "") | 
													
													
												
													
														 | 
														 | 
														                        elif 'delta' in choice and 'content' in choice['delta']: | 
														 | 
														 | 
														                        elif 'delta' in choice and 'content' in choice['delta']: |