new: remove confusing arg 'import_mode' and set it automatically depending on if imported or launched from cli

thiswillbeyourgithub · thiswillbeyourgithub · commit e0cc6f72f7f8 · 2025-03-17T20:17:20.000+01:00
Signed-off-by: thiswillbeyourgithub
&lt;26625900+thiswillbeyourgithub@users.noreply.github.com&gt;
diff --git a/README.md b/README.md
@@ -108,7 +108,7 @@ wdoc --path=$link --task=summarize --filetype="online_pdf"
 * **Markdown formatted answers and summaries**: using [rich](https://github.com/Textualize/rich).
 * **Sane embeddings**: By default use sophisticated embeddings like [multi query retrievers](https://python.langchain.com/docs/how_to/MultiQueryRetriever) but also include SVM, KNN, parent retriever etc. Customizable.
 * **Fully documented** Lots of docstrings, lots of in code comments, detailed `--help` etc. Take a look at the [examples.md](https://github.com/thiswillbeyourgithub/wdoc/blob/main/wdoc/docs/examples.md) for a list of shell and python examples. The full help can be found in the file [help.md](https://github.com/thiswillbeyourgithub/wdoc/docs/help.md) or via `python -m wdoc --help`. I work hard to maintain an exhaustive documentation. The complete documentation in a single page is available [on the website](https://wdoc.readthedocs.io/en/latest/all_docs.html).
-* **Scriptable / Extensible**: You can use `wdoc` in other python project using `--import_mode`. Take a look at the scripts [below](#scripts-made-with-wdoc). There is even [an open-webui Tool](https://openwebui.com/t/qqqqqqqqqqqqqqqqqqqq/wdoctool).
+* **Scriptable / Extensible**: You can use `wdoc` as an executable or as a library. Take a look at the scripts [below](#scripts-made-with-wdoc). There is even [an open-webui Tool](https://openwebui.com/t/qqqqqqqqqqqqqqqqqqqq/wdoctool).
 * **Statically typed**: Runtime type checking. Opt out with an environment flag: `WDOC_TYPECHECKING="disabled / warn / crash" wdoc` (by default: `warn`). Thanks to [beartype](https://beartype.readthedocs.io/en/latest/) it shouldn't even slow down the code!
 * **LLM (and embeddings) caching**: speed things up, as well as index storing and loading (handy for large collections).
 * **Good PDF parsing** PDF parsers are notoriously unreliable, so 15 (!) different loaders are used, and the best according to a parsing scorer is kept. Including table support via [openparse](https://github.com/Filimoa/open-parse/) (no GPU needed by default) or via [UnstructuredPDFLoader](https://python.langchain.com/docs/integrations/document_loaders/unstructured_pdfloader/).
@@ -136,7 +136,6 @@ Click to read more
         - add test for each loader
     - the logit bias is wrong for openai models: the token is specific to a given family of model
     - rewrite the python API to make it more useable. (also related to https://github.com/thiswillbeyourgithub/wdoc/issues/13)
-        - be careful to how to use import_mode
         - pay attention to how to modify the init and main.py files
         - pay attention to how the --help flag works
         - pay attention to how the USAGE document is structured
diff --git a/scripts/AnkiFiltered/AnkiFilteredDeckCreator.py b/scripts/AnkiFiltered/AnkiFilteredDeckCreator.py
@@ -76,7 +76,6 @@ def __init__(
         instance = wdoc(
             query_eval_modelname=query_eval_modelname,
             task=task,
-            import_mode=True,
             query=query,
             **kwargs,
         )
diff --git a/scripts/TheFiche/TheFiche.py b/scripts/TheFiche/TheFiche.py
@@ -73,7 +73,6 @@ def run_wdoc(query: str, kwargs2: dict) -> Tuple[wdoc, dict]:
     "call to wdoc, optionaly cached"
     instance = wdoc(
         task="query",
-        import_mode=True,
         query=query,
         **kwargs2,
     )
diff --git a/tests/test_wdoc.py b/tests/test_wdoc.py
@@ -321,7 +321,6 @@ def test_summary_tim_urban():
         filetype="auto",
         debug=False,
         verbose=False,
-        import_mode=True,
     )
     out = inst.summary_task()
     assert "tim urban" in out["summary"].lower()
@@ -382,7 +381,6 @@ def test_query_tim_urban():
         filetype="auto",
         debug=False,
         verbose=False,
-        import_mode=True,
     )
     out = inst.query_task(
         query="What is the allegory used by the speaker",
@@ -407,7 +405,6 @@ def test_whisper_tim_urban():
         whisper_lang="en",
         debug=False,
         verbose=False,
-        import_mode=True,
     )
 
 
diff --git a/wdoc/__main__.py b/wdoc/__main__.py
@@ -18,6 +18,9 @@
 from .wdoc import is_verbose, wdoc, whi, deb
 from .utils.misc import piped_input
 
+# if __main__ is called, then we are using the cli instead of importing the class from python
+wdoc.__import_mode__ = False
+
 
 def cli_launcher() -> None:
     """entry point function, modifies arguments on the fly for easier
diff --git a/wdoc/docs/help.md b/wdoc/docs/help.md
@@ -351,11 +351,6 @@
     information to a remote server, you can use `---private`.
     Note that the values of `llms_api_bases` are whitelisted when using `private`.
 
-* `--import_mode`: bool, default `False`
-    * if True, will return the answer from query instead of printing it.
-    The idea is to use if when you import wdoc instead of running
-    it from the cli.
-
 * `--disable_md_printing`: bool, default `True` if in a pipe and `False` otherwise.
     * if True, instead of using rich to display some information, default to simpler colored prints.
     * Naturally this is disablef if we are in a pipe, for example if you want to
diff --git a/wdoc/wdoc.py b/wdoc/wdoc.py
@@ -122,6 +122,7 @@ class wdoc:
     VERSION: str = "2.8.0"
     allowed_extra_args = extra_args_types
     md_printer = md_printer
+    __import_mode__: bool = True
 
     @optional_typecheck
     @set_func_signature
@@ -154,7 +155,6 @@ def __init__(
         file_loader_n_jobs: int = -1,
         private: Union[bool, int] = False,
         llms_api_bases: Optional[Union[dict, str]] = None,
-        import_mode: Union[bool, int] = False,
         disable_md_printing: bool = is_piped,
         out_file: Optional[Union[str, Path]] = None,
         oneoff: bool = False,
@@ -445,7 +445,6 @@ def print_exception(exc_type, exc_value, exc_traceback):
         self.file_loader_parallel_backend = file_loader_parallel_backend
         self.file_loader_n_jobs = file_loader_n_jobs
         self.llms_api_bases = llms_api_bases
-        self.import_mode = import_mode
         self.oneoff = oneoff
 
         if disable_llm_cache:
@@ -593,7 +592,7 @@ def print_exception(exc_type, exc_value, exc_traceback):
         if self.task in ["query", "search", "summary_then_query"]:
             self.prepare_query_task()
 
-        if self.import_mode:
+        if self.__import_mode__:
             deb(
                 "Ready to query or summarize, call your_instance.query_task(your_question)"
             )
@@ -785,7 +784,7 @@ def summarize_documents(
             if self.summary_n_recursion > 0:
                 for n_recur in range(1, self.summary_n_recursion + 1):
                     summary_text = copy.deepcopy(recursive_summaries[n_recur - 1])
-                    if not self.import_mode:
+                    if not self.__import_mode__:
                         red(f"Doing summary check #{n_recur} of {item_name}")
 
                     # remove any chunk count that is not needed to summarize
@@ -866,7 +865,7 @@ def summarize_documents(
                     )
                     if prev_real_text is not MISSING:
                         if real_text == prev_real_text:
-                            if not self.import_mode:
+                            if not self.__import_mode__:
                                 red(
                                     f"Identical summary after {n_recur} "
                                     "recursion, adding more recursion will not "
@@ -878,7 +877,7 @@ def summarize_documents(
 
                     assert n_recur not in recursive_summaries
                     if summary_text not in recursive_summaries:
-                        if not self.import_mode:
+                        if not self.__import_mode__:
                             red(
                                 f"Identical summary after {n_recur} "
                                 "recursion, adding more recursion will not "
@@ -891,7 +890,7 @@ def summarize_documents(
 
             best_sum_i = max(list(recursive_summaries.keys()))
             doc_total_tokens = doc_total_tokens_in + doc_total_tokens_out
-            if not self.import_mode:
+            if not self.__import_mode__:
                 print("\n\n")
                 md_printer("# Summary")
                 md_printer(f"## {path}")
@@ -915,7 +914,9 @@ def summarize_documents(
 
             # save to output file
             if self.out_file:
-                assert not self.import_mode, "Can't use import_mode with --out_file"
+                assert (
+                    not self.__import_mode__
+                ), "Can't use __import_mode__ with --out_file"
                 for nrecur, sum in recursive_summaries.items():
                     out_file = Path(self.out_file)
                     if len(recursive_summaries) > 1 and nrecur < max(
@@ -958,7 +959,7 @@ def summarize_documents(
             relevant_docs=self.loaded_docs,
         )
 
-        if not self.import_mode:
+        if not self.__import_mode__:
             red(
                 self.ntfy(
                     f"Total cost of those summaries: {results['doc_total_tokens']} tokens for ${results['doc_total_cost']:.5f} (estimate was ${estimate_dol:.5f})"
@@ -1683,7 +1684,7 @@ def retrieve_documents(inputs):
                 if len(docs) < self.interaction_settings["top_k"]:
                     red(f"Only found {len(docs)} relevant documents")
 
-            if self.import_mode:
+            if self.__import_mode__:
                 if "unfiltered_docs" in output:
                     red(
                         f"Number of documents using embeddings: {len(output['unfiltered_docs'])}"

Original file line number	Diff line number	Diff line change
`@@ -76,7 +76,6 @@ def __init__(`
`76`	`76`	`instance = wdoc(`
`77`	`77`	`query_eval_modelname=query_eval_modelname,`
`78`	`78`	`task=task,`
`79`		`- import_mode=True,`
`80`	`79`	`query=query,`
`81`	`80`	`**kwargs,`
`82`	`81`	`)`
Original file line number	Diff line number	Diff line change
`@@ -73,7 +73,6 @@ def run_wdoc(query: str, kwargs2: dict) -> Tuple[wdoc, dict]:`
`73`	`73`	`"call to wdoc, optionaly cached"`
`74`	`74`	`instance = wdoc(`
`75`	`75`	`task="query",`
`76`		`- import_mode=True,`
`77`	`76`	`query=query,`
`78`	`77`	`**kwargs2,`
`79`	`78`	`)`