From 3d950929a17571a42991d43b38c82ac4c7d93798 Mon Sep 17 00:00:00 2001 From: Niklas Raesalmi Date: Tue, 10 Feb 2026 16:06:19 +0200 Subject: [PATCH 1/5] Modify structure to group together report subcategories --- avise/pipelines/language_model/schema.py | 8 ++-- avise/reportgen/reporters/json_reporter.py | 14 +++++-- avise/sets/languagemodel/prompt_injection.py | 44 ++++++++++++++++---- 3 files changed, 51 insertions(+), 15 deletions(-) diff --git a/avise/pipelines/language_model/schema.py b/avise/pipelines/language_model/schema.py index 7f6ec4b..5a38251 100644 --- a/avise/pipelines/language_model/schema.py +++ b/avise/pipelines/language_model/schema.py @@ -107,9 +107,9 @@ class ReportData: set_name: str timestamp: str execution_time_seconds: Optional[float] - summary: Dict[str, Any] # total tests ran, passed%, failed%, error% rates - results: List[AnalysisResult] # All analysis results - configuration: Dict[str, Any] = field(default_factory=dict) # Test config + summary: Dict[str, Any] # total tests ran, passed%, failed%, error% rates + results: Optional[List[AnalysisResult]] = field(default_factory=list) # All analysis results, optional + configuration: Dict[str, Any] = field(default_factory=dict) # Test config def to_dict(self) -> Dict[str, Any]: return { @@ -118,5 +118,5 @@ def to_dict(self) -> Dict[str, Any]: "execution_time_seconds": self.execution_time_seconds, "configuration": self.configuration, "summary": self.summary, - "results": [result.to_dict() for result in self.results] + "results": [result.to_dict() for result in self.results] if self.results else [] } \ No newline at end of file diff --git a/avise/reportgen/reporters/json_reporter.py b/avise/reportgen/reporters/json_reporter.py index 237a87a..1e2790a 100644 --- a/avise/reportgen/reporters/json_reporter.py +++ b/avise/reportgen/reporters/json_reporter.py @@ -11,7 +11,6 @@ class JSONReporter(BaseReporter): """ Writes reports in JSON format. - """ format_name = "json" @@ -19,11 +18,18 @@ class JSONReporter(BaseReporter): def write(self, report_data: ReportData, output_path: Path) -> None: """ - Write report data as JSON file. + Write report data as JSON file, including grouped results. Args: report_data: The report data to write output_path: Path to the output file / directory """ - with open(output_path, 'w') as f: - json.dump(report_data.to_dict(), f, indent=2) + + data = report_data.to_dict() + + # Add grouped_results + if hasattr(report_data, "grouped_results") and report_data.grouped_results: + data["grouped_results"] = report_data.grouped_results + + with open(output_path, 'w', encoding='utf-8') as f: + json.dump(data, f, indent=2) diff --git a/avise/sets/languagemodel/prompt_injection.py b/avise/sets/languagemodel/prompt_injection.py index 4d99d84..047acb3 100644 --- a/avise/sets/languagemodel/prompt_injection.py +++ b/avise/sets/languagemodel/prompt_injection.py @@ -11,6 +11,7 @@ from pathlib import Path from datetime import datetime from typing import List, Dict, Any, Optional, Tuple +from collections import defaultdict from ...utils import ConfigLoader, ReportFormat from ...pipelines.language_model import ( @@ -277,24 +278,24 @@ def report( if result.set_id in self.elm_evaluations: result.elm_evaluation = self.elm_evaluations[result.set_id] + grouped_results = self._group_results_by_subcategory(results) + # Build ReportData object report_data = ReportData( set_name=self.name, timestamp=datetime.now().strftime("%Y-%m-%d | %H:%M"), - execution_time_seconds=( - round((self.end_time - self.start_time).total_seconds(), 1) - if self.start_time and self.end_time else None - ), + execution_time_seconds=round((self.end_time - self.start_time).total_seconds(), 1) + if self.start_time and self.end_time else None, summary=self.calculate_passrates(results), - results=results, configuration={ "connector_config": Path(self.connector_config_path).name if self.connector_config_path else "", - "set_config": Path(self.set_config_path).name if self.set_config_path else "", - "target_model": self.target_model_name, + "test_config": Path(self.set_config_path).name if self.set_config_path else "", + "testable_model": self.target_model_name, "evaluation_model": self.evaluation_model_name or "", "elm_evaluation_used": self.evaluation_connector is not None } ) + report_data.grouped_results = grouped_results # Create output directory if none exist yet output_file = Path(output_path) @@ -308,3 +309,32 @@ def report( MarkdownReporter().write(report_data, output_file) logger.info(f"Report written to {output_path}") return report_data + + def _group_results_by_subcategory(self, results: List[AnalysisResult]) -> List[Dict[str, Any]]: + """ + Group AnalysisResults by vulnerability_subcategory. + Converts AnalysisResult objects to dicts for JSON serialization. + Returns a list of dicts like: + [ + { + "vulnerability_subcategory": "Direct Injection", + "tests": [ ...AnalysisResult dicts... ] + }, + ... + ] + """ + from collections import defaultdict + + grouped_results: Dict[str, List[Dict[str, Any]]] = defaultdict(list) + for result in results: + subcategory = result.metadata.get("vulnerability_subcategory", "Unknown") + grouped_results[subcategory].append(result.to_dict()) # <- convert to dict here + + # Convert to list of dicts for cleaner JSON + return [ + { + "vulnerability_subcategory": subcategory, + "tests": tests + } + for subcategory, tests in grouped_results.items() + ] From 6e56cfa543ecb97f26c9cec78afd59272b33c00b Mon Sep 17 00:00:00 2001 From: nraesalmi Date: Fri, 13 Feb 2026 11:28:24 +0200 Subject: [PATCH 2/5] rename tests with correct terminology --- avise/configs/connector/ollama.json | 4 ++-- avise/sets/languagemodel/prompt_injection.py | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/avise/configs/connector/ollama.json b/avise/configs/connector/ollama.json index be60bb5..483fc31 100644 --- a/avise/configs/connector/ollama.json +++ b/avise/configs/connector/ollama.json @@ -2,14 +2,14 @@ "target_model": { "connector": "ollama-lm", "type": "language_model", - "name": "phi3:latest", + "name": "phi4-mini:latest", "api_url": "http://localhost:11434", "api_key": null }, "eval_model": { "connector": "ollama-lm", "type": "language_model", - "name": "phi3:latest", + "name": "phi4-mini:latest", "api_url": "http://localhost:11434", "api_key": null } diff --git a/avise/sets/languagemodel/prompt_injection.py b/avise/sets/languagemodel/prompt_injection.py index 047acb3..de0d959 100644 --- a/avise/sets/languagemodel/prompt_injection.py +++ b/avise/sets/languagemodel/prompt_injection.py @@ -289,8 +289,8 @@ def report( summary=self.calculate_passrates(results), configuration={ "connector_config": Path(self.connector_config_path).name if self.connector_config_path else "", - "test_config": Path(self.set_config_path).name if self.set_config_path else "", - "testable_model": self.target_model_name, + "set_config": Path(self.set_config_path).name if self.set_config_path else "", + "target_model": self.target_model_name, "evaluation_model": self.evaluation_model_name or "", "elm_evaluation_used": self.evaluation_connector is not None } @@ -318,7 +318,7 @@ def _group_results_by_subcategory(self, results: List[AnalysisResult]) -> List[D [ { "vulnerability_subcategory": "Direct Injection", - "tests": [ ...AnalysisResult dicts... ] + "SETs": [ ...AnalysisResult dicts... ] }, ... ] @@ -334,7 +334,7 @@ def _group_results_by_subcategory(self, results: List[AnalysisResult]) -> List[D return [ { "vulnerability_subcategory": subcategory, - "tests": tests + "SETs": SETs } - for subcategory, tests in grouped_results.items() + for subcategory, SETs in grouped_results.items() ] From 93f67a59942de1217d8d5ead77726ae81d18428d Mon Sep 17 00:00:00 2001 From: nraesalmi Date: Fri, 13 Feb 2026 12:33:34 +0200 Subject: [PATCH 3/5] restructure JSON to include statistics for each subcategory, add remediations --- avise/cli.py | 6 +-- avise/pipelines/language_model/pipeline.py | 10 ++-- avise/pipelines/language_model/schema.py | 2 +- avise/sets/languagemodel/prompt_injection.py | 57 ++++++++++++++------ 4 files changed, 49 insertions(+), 26 deletions(-) diff --git a/avise/cli.py b/avise/cli.py index a78bb5d..792f160 100644 --- a/avise/cli.py +++ b/avise/cli.py @@ -156,9 +156,9 @@ def main(arguments=[]) -> None: print(f"\nSecurity Evaluation Test completed!") print(f" Format: {report_format.value.upper()}") print(f" Total: {report.summary['total_sets']}") - print(f" Passed: {report.summary['passed']} ({report.summary['pass_rate']}%)") - print(f" Failed: {report.summary['failed']} ({report.summary['fail_rate']}%)") - print(f" Errors: {report.summary['error']}") + print(f" Passed: {report.summary['total_passed']} ({report.summary['total_pass_rate']}%)") + print(f" Failed: {report.summary['total_failed']} ({report.summary['total_fail_rate']}%)") + print(f" Errors: {report.summary['total_error']}") except Exception as e: logger.error(f"Security Evaluation Test run failed: {e}", exc_info=True) diff --git a/avise/pipelines/language_model/pipeline.py b/avise/pipelines/language_model/pipeline.py index 654a38f..bf3640c 100644 --- a/avise/pipelines/language_model/pipeline.py +++ b/avise/pipelines/language_model/pipeline.py @@ -212,11 +212,11 @@ def calculate_passrates(results: List[AnalysisResult]) -> Dict[str, Any]: return { "total_sets": total_sets, - "passed": passed, - "failed": failed, - "error": errors, - "pass_rate": pass_rate, - "fail_rate": fail_rate, + "total_passed": passed, + "total_failed": failed, + "total_error": errors, + "total_pass_rate": pass_rate, + "total_fail_rate": fail_rate, "ci_lower_bound": confidence_interval[1], "ci_upper_bound": confidence_interval[2] } diff --git a/avise/pipelines/language_model/schema.py b/avise/pipelines/language_model/schema.py index 5a38251..f61a1e3 100644 --- a/avise/pipelines/language_model/schema.py +++ b/avise/pipelines/language_model/schema.py @@ -118,5 +118,5 @@ def to_dict(self) -> Dict[str, Any]: "execution_time_seconds": self.execution_time_seconds, "configuration": self.configuration, "summary": self.summary, - "results": [result.to_dict() for result in self.results] if self.results else [] + "results": [r if isinstance(r, dict) else r.to_dict() for r in self.results] if self.results else [] } \ No newline at end of file diff --git a/avise/sets/languagemodel/prompt_injection.py b/avise/sets/languagemodel/prompt_injection.py index de0d959..58b5471 100644 --- a/avise/sets/languagemodel/prompt_injection.py +++ b/avise/sets/languagemodel/prompt_injection.py @@ -295,7 +295,8 @@ def report( "elm_evaluation_used": self.evaluation_connector is not None } ) - report_data.grouped_results = grouped_results + report_data.results = grouped_results + report_data.remediation_recommendations = [] # empty for now # Create output directory if none exist yet output_file = Path(output_path) @@ -313,28 +314,50 @@ def report( def _group_results_by_subcategory(self, results: List[AnalysisResult]) -> List[Dict[str, Any]]: """ Group AnalysisResults by vulnerability_subcategory. - Converts AnalysisResult objects to dicts for JSON serialization. - Returns a list of dicts like: + + Returns structure: [ { - "vulnerability_subcategory": "Direct Injection", - "SETs": [ ...AnalysisResult dicts... ] - }, - ... + "vulnerability_subcategory": "...", + "total_runs": X, + "passed": X, + "failed": X, + "error": X, + "pass_rate": X, + "fail_rate": X, + "recommended_remediation": "", + "SETs": [...] + } ] """ - from collections import defaultdict + grouped_results: Dict[str, List[AnalysisResult]] = defaultdict(list) - grouped_results: Dict[str, List[Dict[str, Any]]] = defaultdict(list) for result in results: subcategory = result.metadata.get("vulnerability_subcategory", "Unknown") - grouped_results[subcategory].append(result.to_dict()) # <- convert to dict here + grouped_results[subcategory].append(result) - # Convert to list of dicts for cleaner JSON - return [ - { + grouped_output = [] + + for subcategory, sub_results in grouped_results.items(): + total_runs = len(sub_results) + passed = sum(1 for r in sub_results if r.status == "passed") + failed = sum(1 for r in sub_results if r.status == "failed") + error = sum(1 for r in sub_results if r.status == "error") + + pass_rate = (passed / total_runs * 100) if total_runs > 0 else 0.0 + fail_rate = (failed / total_runs * 100) if total_runs > 0 else 0.0 + + grouped_output.append({ "vulnerability_subcategory": subcategory, - "SETs": SETs - } - for subcategory, SETs in grouped_results.items() - ] + "total_runs": total_runs, + "passed": passed, + "failed": failed, + "error": error, + "pass_rate": round(pass_rate, 2), + "fail_rate": round(fail_rate, 2), + "recommended_remediation": "", + "SETs": [r.to_dict() for r in sub_results] + }) + + return grouped_output + From fff2c855ac4df1052cfaf9471a79c365abe31bc2 Mon Sep 17 00:00:00 2001 From: Niklas Raesalmi Date: Fri, 27 Feb 2026 14:38:36 +0200 Subject: [PATCH 4/5] Move grouping logic from individual test results to report generation phase --- avise/configs/connector/ollama.json | 4 +- avise/pipelines/language_model/pipeline.py | 62 +++++++++++++++++- avise/pipelines/language_model/schema.py | 29 ++++++++- avise/sets/languagemodel/prompt_injection.py | 67 +++----------------- 4 files changed, 98 insertions(+), 64 deletions(-) diff --git a/avise/configs/connector/ollama.json b/avise/configs/connector/ollama.json index 483fc31..be60bb5 100644 --- a/avise/configs/connector/ollama.json +++ b/avise/configs/connector/ollama.json @@ -2,14 +2,14 @@ "target_model": { "connector": "ollama-lm", "type": "language_model", - "name": "phi4-mini:latest", + "name": "phi3:latest", "api_url": "http://localhost:11434", "api_key": null }, "eval_model": { "connector": "ollama-lm", "type": "language_model", - "name": "phi4-mini:latest", + "name": "phi3:latest", "api_url": "http://localhost:11434", "api_key": null } diff --git a/avise/pipelines/language_model/pipeline.py b/avise/pipelines/language_model/pipeline.py index bf3640c..bb5bcd3 100644 --- a/avise/pipelines/language_model/pipeline.py +++ b/avise/pipelines/language_model/pipeline.py @@ -11,8 +11,9 @@ from typing import List, Dict, Any, Optional from datetime import datetime from math import sqrt +from collections import defaultdict -from .schema import LanguageModelSETCase, OutputData, AnalysisResult, ReportData +from .schema import LanguageModelSETCase, OutputData, AnalysisResult, ReportData, SubcategoryReport from ...connectors.languagemodel.base import BaseLMConnector from scipy.special import erfinv @@ -55,6 +56,7 @@ def __init__(self): self.set_config_path: Optional[str] = None self.target_model_name: Optional[str] = None self.evaluation_model_name: Optional[str] = None + self.group_by_metadata_key: Optional[str] = None @abstractmethod def initialize(self, set_config_path: str) -> List[LanguageModelSETCase]: @@ -261,3 +263,61 @@ def _calculate_confidence_interval(passed: int, upper_bound = min(1, upper_bound) return (p, lower_bound, upper_bound) + + def _group_results_by_metadata_key(self, + results: List[AnalysisResult], + key: str + ) -> List[SubcategoryReport]: + """ + Generic grouping utility. + Groups AnalysisResults by metadata[key]. + """ + + grouped = defaultdict(list) + + for result in results: + group_value = result.metadata.get(key, "Unknown") + grouped[group_value].append(result) + + grouped_reports: List[SubcategoryReport] = [] + + for group_name, group_results in grouped.items(): + total_runs = len(group_results) + passed = sum(1 for r in group_results if r.status == "passed") + failed = sum(1 for r in group_results if r.status == "failed") + error = sum(1 for r in group_results if r.status == "error") + + pass_rate = (passed / total_runs * 100) if total_runs else 0.0 + fail_rate = (failed / total_runs * 100) if total_runs else 0.0 + + grouped_reports.append( + SubcategoryReport( + subcategory_name=group_name, + total_runs=total_runs, + passed=passed, + failed=failed, + error=error, + pass_rate=round(pass_rate, 2), + fail_rate=round(fail_rate, 2), + recommended_remediation="", + SETs=group_results + ) + ) + + return grouped_reports + + def _prepare_report_results(self, + results: List[AnalysisResult] + ): + """ + Returns either flat results or grouped results + depending on group_by_metadata_key. + """ + + if self.group_by_metadata_key: + return self._group_results_by_metadata_key( + results, + self.group_by_metadata_key + ) + + return results diff --git a/avise/pipelines/language_model/schema.py b/avise/pipelines/language_model/schema.py index f61a1e3..ca49a2e 100644 --- a/avise/pipelines/language_model/schema.py +++ b/avise/pipelines/language_model/schema.py @@ -108,7 +108,7 @@ class ReportData: timestamp: str execution_time_seconds: Optional[float] summary: Dict[str, Any] # total tests ran, passed%, failed%, error% rates - results: Optional[List[AnalysisResult]] = field(default_factory=list) # All analysis results, optional + results: Optional[List[SubcategoryReport]] = field(default_factory=list) # All analysis results, optional configuration: Dict[str, Any] = field(default_factory=dict) # Test config def to_dict(self) -> Dict[str, Any]: @@ -118,5 +118,30 @@ def to_dict(self) -> Dict[str, Any]: "execution_time_seconds": self.execution_time_seconds, "configuration": self.configuration, "summary": self.summary, - "results": [r if isinstance(r, dict) else r.to_dict() for r in self.results] if self.results else [] + "results": [r.to_dict() for r in self.results] if self.results else [] + } + +@dataclass +class SubcategoryReport: + subcategory_name: str + total_runs: int + passed: int + failed: int + error: int + pass_rate: float + fail_rate: float + recommended_remediation: str + SETs: List[AnalysisResult] + + def to_dict(self) -> Dict[str, Any]: + return { + "subcategory_name": self.subcategory_name, + "total_runs": self.total_runs, + "passed": self.passed, + "failed": self.failed, + "error": self.error, + "pass_rate": self.pass_rate, + "fail_rate": self.fail_rate, + "recommended_remediation": self.recommended_remediation, + "SETs": [s.to_dict() for s in self.SETs] } \ No newline at end of file diff --git a/avise/sets/languagemodel/prompt_injection.py b/avise/sets/languagemodel/prompt_injection.py index 58b5471..1f52606 100644 --- a/avise/sets/languagemodel/prompt_injection.py +++ b/avise/sets/languagemodel/prompt_injection.py @@ -11,7 +11,6 @@ from pathlib import Path from datetime import datetime from typing import List, Dict, Any, Optional, Tuple -from collections import defaultdict from ...utils import ConfigLoader, ReportFormat from ...pipelines.language_model import ( @@ -63,6 +62,8 @@ def __init__(self): self.partial_compliance_evaluator = PartialComplianceEvaluator() self.suspicious_output_evaluator = SuspiciousOutputEvaluator() + self.group_by_metadata_key = "vulnerability_subcategory" + def initialize(self, set_config_path: str) -> List[LanguageModelSETCase]: """ Phase 1 of the test pipeline. Load prompt injection SET cases from configuration files. @@ -278,15 +279,16 @@ def report( if result.set_id in self.elm_evaluations: result.elm_evaluation = self.elm_evaluations[result.set_id] - grouped_results = self._group_results_by_subcategory(results) - # Build ReportData object report_data = ReportData( set_name=self.name, timestamp=datetime.now().strftime("%Y-%m-%d | %H:%M"), - execution_time_seconds=round((self.end_time - self.start_time).total_seconds(), 1) - if self.start_time and self.end_time else None, + execution_time_seconds=( + round((self.end_time - self.start_time).total_seconds(), 1) + if self.start_time and self.end_time else None + ), summary=self.calculate_passrates(results), + results=self._prepare_report_results(results), configuration={ "connector_config": Path(self.connector_config_path).name if self.connector_config_path else "", "set_config": Path(self.set_config_path).name if self.set_config_path else "", @@ -295,8 +297,6 @@ def report( "elm_evaluation_used": self.evaluation_connector is not None } ) - report_data.results = grouped_results - report_data.remediation_recommendations = [] # empty for now # Create output directory if none exist yet output_file = Path(output_path) @@ -309,55 +309,4 @@ def report( elif report_format == ReportFormat.MARKDOWN: MarkdownReporter().write(report_data, output_file) logger.info(f"Report written to {output_path}") - return report_data - - def _group_results_by_subcategory(self, results: List[AnalysisResult]) -> List[Dict[str, Any]]: - """ - Group AnalysisResults by vulnerability_subcategory. - - Returns structure: - [ - { - "vulnerability_subcategory": "...", - "total_runs": X, - "passed": X, - "failed": X, - "error": X, - "pass_rate": X, - "fail_rate": X, - "recommended_remediation": "", - "SETs": [...] - } - ] - """ - grouped_results: Dict[str, List[AnalysisResult]] = defaultdict(list) - - for result in results: - subcategory = result.metadata.get("vulnerability_subcategory", "Unknown") - grouped_results[subcategory].append(result) - - grouped_output = [] - - for subcategory, sub_results in grouped_results.items(): - total_runs = len(sub_results) - passed = sum(1 for r in sub_results if r.status == "passed") - failed = sum(1 for r in sub_results if r.status == "failed") - error = sum(1 for r in sub_results if r.status == "error") - - pass_rate = (passed / total_runs * 100) if total_runs > 0 else 0.0 - fail_rate = (failed / total_runs * 100) if total_runs > 0 else 0.0 - - grouped_output.append({ - "vulnerability_subcategory": subcategory, - "total_runs": total_runs, - "passed": passed, - "failed": failed, - "error": error, - "pass_rate": round(pass_rate, 2), - "fail_rate": round(fail_rate, 2), - "recommended_remediation": "", - "SETs": [r.to_dict() for r in sub_results] - }) - - return grouped_output - + return report_data \ No newline at end of file From 0601018818172a73df8e125b5f93d5c09c0a4cf9 Mon Sep 17 00:00:00 2001 From: Niklas Raesalmi Date: Fri, 27 Feb 2026 15:06:57 +0200 Subject: [PATCH 5/5] fix html_reporter to accept grouping --- avise/reportgen/reporters/html_reporter.py | 77 ++++++++++++---------- 1 file changed, 44 insertions(+), 33 deletions(-) diff --git a/avise/reportgen/reporters/html_reporter.py b/avise/reportgen/reporters/html_reporter.py index 72a5492..767aa94 100644 --- a/avise/reportgen/reporters/html_reporter.py +++ b/avise/reportgen/reporters/html_reporter.py @@ -181,47 +181,57 @@ def _get_summary_section(self, report_data: ReportData) -> str:
Total Security Evaluation Tests
-
{summary['passed']}
-
Passed ({summary['pass_rate']}%)
+
{summary['total_passed']}
+
Passed ({summary['total_pass_rate']}%)
-
-
{summary['failed']}
-
Failed ({summary['fail_rate']}%)
+
+
{summary['total_failed']}
+
Failed ({summary['total_fail_rate']}%)
-
{summary['error']}
+
{summary['total_error']}
Inconclusive
""" def _get_results(self, results: list) -> str: - """Generate list of results.""" - html = """ -
-
-

Security Evaluation Test Results

-
-""" - for result in results: - if isinstance(result, AnalysisResult): - set_ = { - "set_id": result.set_id, - "prompt": result.prompt, - "response": result.response, - "status": result.status, - "reason": result.reason, - "attack_type": result.metadata.get("attack_type", ""), - "detections": result.detections, - "full_conversation": result.metadata.get("full_conversation", []), - "description": result.metadata.get("description", "") + """Generate HTML for grouped subcategory results.""" + html = "" + for group in results: + # Access attributes instead of dictionary keys + subcategory_name = getattr(group, "subcategory_name", "") + total_passed = getattr(group, "passed", 0) + total_failed = getattr(group, "failed", 0) + total_error = getattr(group, "error", 0) + + html += f""" +
+
+

{self.escape_html(subcategory_name)}

+
Passed: {total_passed}, Failed: {total_failed}, Inconclusive: {total_error}
+
+ """ + + # Loop through the SETs in this subcategory + for set_ in getattr(group, "SETs", []): + # Build a dict to pass to _get_set_item + set_dict = { + "set_id": getattr(set_, "set_id", ""), + "prompt": getattr(set_, "prompt", ""), + "response": getattr(set_, "response", ""), + "status": getattr(set_, "status", ""), + "reason": getattr(set_, "reason", ""), + "detections": getattr(set_, "detections", {}), + "metadata": getattr(set_, "metadata", {}), + "full_conversation": getattr(set_, "full_conversation", []), + "elm_evaluation": getattr(set_, "elm_evaluation", "") } - if result.elm_evaluation: - set_["elm_evaluation"] = result.elm_evaluation - else: - set_ = result - html += self._get_set_item(set_) - html += "
\n" + # Add attack_type for _get_set_item + set_dict["attack_type"] = set_dict["metadata"].get("attack_type", "") + html += self._get_set_item(set_dict) + + html += "
\n" return html def _get_set_item(self, set_: Dict[str, Any]) -> str: @@ -231,11 +241,12 @@ def _get_set_item(self, set_: Dict[str, Any]) -> str: set_label = f" - {set_label}" elm_html = "" - if "elm_evaluation" in set_: + elm_eval = set_.get('elm_evaluation') or "" + if elm_eval: elm_html = f"""
ELM Evaluation
- {self.escape_html(set_['elm_evaluation'])} + {self.escape_html(elm_eval)}
""" # Check for conversation format (memory test)