demo completed

2025-09-10 11:17:53 +02:00
parent a227795a1a
commit c899273827
6 changed files with 212 additions and 180 deletions
--- a/main_xai.py
+++ b/main_xai.py
@@ -2,26 +2,32 @@ from pathlib import Path

 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.prompts import ChatPromptTemplate
-from langchain_ollama.llms import OllamaLLM
+from langchain_openai import ChatOpenAI
+from problog import get_evaluatable
+from problog.program import PrologString

 from src.data import LabeledImage, load_data
 from src.img_utils import encode_base64_resized

 TESTING = 1

+EPSILON_PROB = 0.01
+

 def reasoning():
    template_reasoning = ChatPromptTemplate.from_messages([
        ("system", "{role_reasoning}"),
        ("human", "Question: {question_reasoning}"),
    ])
-
-    model_reasoning = OllamaLLM(model="hf.co/unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF:Q4_K_M")
+    model_reasoning = ChatOpenAI(
+        model="qwen3-thinking",
+        base_url="http://localhost:8080/v1",
+    )
    reasoning_chain = template_reasoning | model_reasoning | StrOutputParser()

    description = reasoning_chain.invoke({
-        "role_reasoning": "You are an expert in the classification of whether an animal is a cat or a dog",
-        "question_reasoning": "I want you to do a comparative analysis of cats and dogs. Your analysis must use the inherent traits and biological characteristics of each species. You should list each of these characteristics so that an informed decision can be made about whether a given animal, e.g., in the form of an image, is a cat or a dog. Please provide a detailed analysis, focusing on traits and characteristics that can be extracted from a given image.",
+        "role_reasoning": "You are a scientific expert in the classification of whether an animal is a cat or a dog. If tasked to answer questions, you shall adhere to scientific facts, think step-by-step, and explain your decision-making process. Focus on 'why' something is done, especially for complex logic, rather than *what* is done. Your answer should be concise and direct, and avoid conversational fillers. Format your answer appropriately for better understanding.",
+        "question_reasoning": "I want you to do a comparative analysis of cats and dogs. Your analysis must use the inherent traits and biological characteristics of each species. You should list each of these characteristics so that an informed decision can be made about whether a given animal depicted in an image is a cat or a dog. Please provide a detailed analysis, focusing on traits and characteristics that can be extracted from a given image. For formatting please use a list-like fashion.",
    })
    return description

@@ -31,25 +37,33 @@ def coding(description: str):
        ("system", "{role_coding}"),
        ("human", "Instructions: {instruction}\n Description: {description}"),
    ])
-    model_coding = OllamaLLM(model="hf.co/unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF:Q4_K_XL")
+    model_coding = ChatOpenAI(
+        model="qwen3-coder",
+        base_url="http://localhost:8080/v1",
+    )
    coding_chain = template_coding | model_coding | StrOutputParser()

    coding_description = coding_chain.invoke({
-        "role_coding": "You are an expert Prolog programmer with extended knowledge in reasoning and probabilities. Given instructions and a description, you can write a correct Prolog program that expresses the given task as a suitable logical program in Prolog",
-        "instruction": "Write a logical program for the following description",
+        "role_coding": """You are an expert Prolog programmer with extended knowledge in reasoning and probabilities. Given instructions and a description, you can write a correct Prolog program that expresses the given question as a suitable logical program in Prolog. You shall format your answer so that it can be directly used as an input for a Prolog interpreter. Do not incorporate example facts or queries into the knowledge base; these will be added later by the user. If necessary, add comments to your program to provide explanations to the user. The proposed facts should follow the form:
+            - <trait>.
+        There shall only be two rules of the following form present at the end:
+            - dog :- <trait1>; <trait2>; ...; <traitN>.
+            - cat :- <traitA>; <traitB>; ...; <traitX>.
+        Make sure to use ';' and not ',' for these two rules!
+        Lastly, the following two queries should be added:
+            - query(dog).
+            - query(cat).
+        """,
+        "instruction": "Write a logical program for the following description:",
        "description": description,
    })
    return coding_description


 def grounding(coding_description: str, labeled_image: LabeledImage):
-    model_vl = OllamaLLM(model="qwen2.5vl:7b")
-    model_vl_ctx = model_vl.bind(
-        images=[
-            encode_base64_resized(
-                Path(".tmp-data/highlight_spotlight_heatmap.jpg"), max_width=512, max_height=512, quality=70
-            )
-        ]
+    model_vl = ChatOpenAI(
+        model="minicpm-v-45",
+        base_url="http://localhost:8080/v1",
    )

    template_grounding = ChatPromptTemplate.from_messages([
@@ -61,40 +75,112 @@ def grounding(coding_description: str, labeled_image: LabeledImage):
            Description: {description}
            """,
        ),
+        ("placeholder", "{image}"),
    ])
-    grounding_chain = template_grounding | model_vl_ctx | StrOutputParser()
+    grounding_chain = template_grounding | model_vl | StrOutputParser()

    return grounding_chain.invoke(
        {
-            "role_vl": "You are an expert in analyzing an image to extract and match features of a given list.",
-            "instruction": """You are given a logic program in the following description and an image with a heatmap as input. Your task is to do the following steps:
-                1. Extract the list of features from the given Prolog program that contribute to deciding whether the image is a cat or a dog.
-                2. Match only the features highlighted by the heatmap in the given image with the features you retrieved from the Prolog program. Give a likelihood of how sure you are with your matching. Print your result in this format:
-                    - <feature from prolog program>: <likelihood>
+            "role_vl": "You are an expert in analyzing images to extract and match features of a given list. First, you look at the list of given features (facts written in Prolog), and then you analyze the given image for these features. If you are uncertain whether a feature matches, please acknowledge this and inform the user, but do not add the feature to the list of matched features. Please follow the user's instructions precisely.",
+            "instruction": """You are given a logic program in the following description and an image. Your task is to do the following steps:
+                1. Extract the list of features/facts from the given Prolog program that contribute to deciding whether the image is a cat or a dog.
+                2. Match only the features that are highlighted in the given image with the features (Prolog facts) you retrieved from the Prolog program. If no highlighting is visible consider the whole image. Give a likelihood, as decimal number, of how sure you are of your match. Print your result in this format:
+                    - <likelihood>:<trait1>
+                    - <likelihood>:<trait2>
                    - ...
            """,
            "description": coding_description,
+            "image": [
+                (
+                    "human",
+                    [
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": f"data:image/jpg;base64,{encode_base64_resized(Path('.tmp-data/dog.jpg'), max_width=512, max_height=512, quality=70)}"
+                            },
+                        }
+                    ],
+                )
+            ],
        },
    )


+def execute_logic_program(coding_description: str, grounding_results: str):
+    program = coding_description
+
+    # remove code-block notation
+    program = program.split("\n", 1)[-1]
+    program = program.rsplit("\n", 2)[0]
+
+    # extract evidence from grounding
+    evidence = []
+    for grounds in grounding_results.splitlines():
+        # e.g., '- 0.95:visible_nose_ride'
+        prob, fact = grounds[2:].split(":", maxsplit=2)
+        evidence.append((fact, prob))
+
+    # add probabilities
+    program_lines = program.splitlines()
+    for idx, line in enumerate(program_lines):
+        if len(line) <= 1 or line.startswith("%"):
+            continue
+        else:
+            if line.startswith("cat :-") or line.startswith("dog :-") or "query" in line:
+                continue
+            else:
+                matched = False
+                for fact, prob in evidence:
+                    if fact in line:
+                        program_lines[idx] = f"{prob}::" + line
+                        matched = True
+                if not matched:
+                    program_lines[idx] = f"{EPSILON_PROB}::" + line
+    program_sanitized = "\n".join(program_lines)
+
+    # evaluate logical program
+    print(program_sanitized)
+    result = get_evaluatable().create_from(PrologString(program_sanitized)).evaluate()
+
+    # get final probabilities
+    p_cat, p_dog = (0.0, 0.0)
+    for term in result.keys():
+        if str(term) == "dog":
+            p_dog = result[term]
+        elif str(term) == "cat":
+            p_cat = result[term]
+        else:
+            raise KeyError("Unknown key encountered!")
+    return p_cat, p_dog
+
+
 def main():
    print("Starting Abduction Demo")
    labeled_images = load_data()
    labeled_image = labeled_images[1]
-    # image = labeled_image.image
-    # mask = labeled_image.create_mask([labeled_image.labels[0]])

    if TESTING == 1:
+        reasoning_description = Path(".tmp-data/reasoning_description").open("r").read()
        coding_description = Path(".tmp-data/coding_description").open("r").read()
+        grounding_results = Path(".tmp-data/grounding_results").open("r").read()
    else:
-        coding_description = coding(reasoning())
+        reasoning_description = reasoning()
+        with open(".tmp-data/reasoning_description", "w") as f:
+            f.write(reasoning_description)
+
+        coding_description = coding(reasoning_description)
        with open(".tmp-data/coding_description", "w") as f:
            f.write(coding_description)

-    result = grounding(coding_description, labeled_image)
-    # TODO: Feed this into the Prolog program and execute to reach final verdict
-    print(result)
+        grounding_results = grounding(coding_description, labeled_image)
+        with open(".tmp-data/grounding_results", "w") as f:
+            f.write(grounding_results)
+        print(grounding_results)
+
+    p_cat, p_dog = execute_logic_program(coding_description, grounding_results)
+    print(f"Cat Probability: {p_cat}")
+    print(f"Dog Probability: {p_dog}")
    print("End Abduction Demo")