Simplify JSON parsing

This commit is contained in:
Dunedan 2024-09-07 06:38:56 +02:00
parent 0e84957979
commit ea4b580527
Signed by untrusted user: Dunedan
GPG Key ID: 885B16854284E0B2

View File

@ -338,60 +338,32 @@ class JsonExtractor(Extractor):
def extract_from_string(self, string): def extract_from_string(self, string):
json_document = json.loads(string) json_document = json.loads(string)
if isinstance(json_document, list): yield from self.parse(json_document)
for message, context in self.parse_list(json_document):
if message: # Skip empty strings.
yield message, context
elif isinstance(json_document, dict):
for message, context in self.parse_dictionary(json_document):
if message: # Skip empty strings.
yield message, context
else:
raise Exception(
"Unexpected JSON document parent structure (not a list or a dictionary). "
"You must extend the JSON extractor to support it."
)
def parse_list(self, items_list): def parse(self, data, key=None):
for list_item in items_list: """Recursively parse JSON data and extract strings."""
if isinstance(list_item, list): if isinstance(data, list):
for message, context in self.parse_list(list_item): for item in data:
yield message, context yield from self.parse(item)
elif isinstance(list_item, dict): elif isinstance(data, dict):
for message, context in self.parse_dictionary(list_item): for key2, value in data.items():
yield message, context if key2 in self.keywords:
if isinstance(value, str):
def parse_dictionary(self, dictionary): yield self.extract_string(value, key2)
for keyword in dictionary: elif isinstance(value, list):
if keyword in self.keywords: yield from self.extract_list(value, key2)
if isinstance(dictionary[keyword], str): elif isinstance(value, dict):
yield self.extract_string(dictionary[keyword], keyword) if self.keywords[key2].get("extractFromInnerKeys"):
elif isinstance(dictionary[keyword], list): for value2 in value.values():
for message, context in self.extract_list(dictionary[keyword], keyword): yield from self.parse(value2, key2)
yield message, context else:
elif isinstance(dictionary[keyword], dict): yield from self.extract_dictionary(value, key2)
extract = None else:
if ( yield from self.parse(value, key2)
"extractFromInnerKeys" in self.keywords[keyword] elif isinstance(data, str) and key in self.keywords:
and self.keywords[keyword]["extractFromInnerKeys"] yield self.extract_string(data, key)
):
for message, context in self.extract_dictionary_inner_keys(
dictionary[keyword], keyword
):
yield message, context
else:
extract = self.extract_dictionary(dictionary[keyword], keyword)
if extract:
yield extract
elif isinstance(dictionary[keyword], list):
for message, context in self.parse_list(dictionary[keyword]):
yield message, context
elif isinstance(dictionary[keyword], dict):
for message, context in self.parse_dictionary(dictionary[keyword]):
yield message, context
def extract_string(self, string, keyword): def extract_string(self, string, keyword):
context = None
if "tagAsContext" in self.keywords[keyword]: if "tagAsContext" in self.keywords[keyword]:
context = keyword context = keyword
elif "customContext" in self.keywords[keyword]: elif "customContext" in self.keywords[keyword]:
@ -412,7 +384,6 @@ class JsonExtractor(Extractor):
def extract_dictionary(self, dictionary, keyword): def extract_dictionary(self, dictionary, keyword):
message = dictionary.get("_string", None) message = dictionary.get("_string", None)
if message and isinstance(message, str): if message and isinstance(message, str):
context = None
if "context" in dictionary: if "context" in dictionary:
context = str(dictionary["context"]) context = str(dictionary["context"])
elif "tagAsContext" in self.keywords[keyword]: elif "tagAsContext" in self.keywords[keyword]:
@ -421,19 +392,7 @@ class JsonExtractor(Extractor):
context = self.keywords[keyword]["customContext"] context = self.keywords[keyword]["customContext"]
else: else:
context = self.context context = self.context
return message, context yield message, context
return None
def extract_dictionary_inner_keys(self, dictionary, keyword):
for inner_keyword in dictionary:
if isinstance(dictionary[inner_keyword], str):
yield self.extract_string(dictionary[inner_keyword], keyword)
elif isinstance(dictionary[inner_keyword], list):
yield from self.extract_list(dictionary[inner_keyword], keyword)
elif isinstance(dictionary[inner_keyword], dict):
extract = self.extract_dictionary(dictionary[inner_keyword], keyword)
if extract:
yield extract
class XmlExtractor(Extractor): class XmlExtractor(Extractor):