Source code for wishbone.module.modify

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
#  modify.py
#
#  Copyright 2018 Jelle Smet <development@smetj.net>
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
#  MA 02110-1301, USA.
#
#

from wishbone.actor import Actor
from wishbone.module import ProcessModule
from copy import deepcopy
import re
import arrow

VALID_EXPRESSIONS = ["add_item",
                     "copy",
                     "del_item",
                     "delete",
                     "extract",
                     "join",
                     "lowercase",
                     "merge",
                     "replace",
                     "set",
                     "template",
                     "uppercase",
                     "time",
                     ]


[docs]class Modify(ProcessModule): '''**Modify and manipulate datastructures.** This module modifies the data of an event using a **sequential** list of expressions. Expressions are dictionaries containing 1 item. The key is a string and the value is a list of parameters accepted by the expression. For example:: {"set": ["hi", "data.one"]} Sets the value *"hi"* to key *"data.one"*. In the the YAML formatted bootstrap file that would look like:: module: wishbone.function.modify arguments: expressions: - set: [ "hi", "data.one" ] Valid expressions are: - **add_item**:: add_item: [<item>, <key>] Adds *<item>* to the list stored under *<key>*. - **copy**:: copy: [<source_key>, <destination_key>, <default_value>] Copies *<source_key>* to *<destination_key>* and overwrites *<destination_key>* when it exists. If <source_key> does not exist, <default_value> is taken instead. - **del_item**:: del_item: [<key>, <item>] Deletes first occurance of *<item>* from the list stored under *<source_key>*. - **delete**:: delete: [<key>] Deletes *<key>* from the event. - **extract**:: extract: [<destination>, <regex>, <source>] Makes use of Python re module to extract named groups from *<source>* using *<regex>* and add the resulting matches to *<destination>*. The following example would extract the words "one" and "two" from "data.test" and add the to data.extract: expression:: extract: ["data.extract", '(?P<first>.*?);(?P<second>.*)', "data.test"] result:: {"data":{"test:"one;two", extract:{"first": "one", "second": "two"}}} - **join**:: join: [<array>, <join>, <destination>] Joins an array into a string using the <join> value. - **lowercase**:: lowercase: [<key>] Turns the string stored under *<key>* to lowercase. - **merge**:: merge: [<object_one>, <object_two>, <destination>] Merges 2 arrays into <destination> - **replace**:: replace: [<regex>, <value>, <key>] replaces every occurance of <regex> of the value stored in <key> with <value> - **set**:: set: [<value>, <key>] Sets *<value>* to the event *<key>*. - **uppercase**:: uppercase: [<key>] Turns the string stored under *<key>* to uppercase. - **template**:: template: [<destination_key>, <template>, <source_key>] Uses the dictionary stored in *<source_key>* to complete *<template>* and stores the results into key *<destination_key>*. The templating language used is Python's builtin string format one. - **time**:: time: [<destination_key>, <format>] Modifies the *<timestamp>* value according the the *<format>* specification and stores it into *<destination_key>*. See http://crsmithdev.com/arrow/#format for the format. Parameters: - expressions(list)([]) | A list of expressions to apply. Queues: - inbox: | Incoming messages - outbox: | Outgoing modified messages ''' def __init__(self, actor_config, expressions=[]): Actor.__init__(self, actor_config) self.pool.createQueue("inbox") self.pool.createQueue("outbox") self.registerConsumer(self.consume, "inbox") def consume(self, event): for expression in self.kwargs.expressions: try: command, args = self.__extractExpr(expression) event = getattr(self, "command_%s" % (command))(event, *args) except Exception as err: self.logging.error("Failed to process expression '%s'. Reason: %s Skipped." % (expression, err)) self.submit(event, "outbox") def command_add_item(self, event, item, key): event.get(key).append(item) return event def command_copy(self, event, source, destination, default_value): try: event.copy(source, destination) except KeyError: event.set(default_value, destination) return event def command_del_item(self, event, item, key): event.get(key).remove(item) return event def command_delete(self, event, key): event.delete(key) return event def command_extract(self, event, destination, regex, source): matches = re.match(regex, event.get(source)) event.set(matches.groupdict(), destination) return event def command_join(self, event, array, j, destination): result = j.join(event.get(array)) event.set(result, destination) return event def command_lowercase(self, event, key): event.set(event.get(key).lower(), key) return event def command_merge(self, event, one, two, destination): result = event.get(one) + event.get(two) event.set(result, destination) return event def command_replace(self, event, regex, value, key): result = re.sub("{}".format(regex), value, str(event.get(key))) event.set(str(result), key) return event def command_set(self, event, value, key): event.set(deepcopy(value), key) return event def command_template(self, event, destination, template, key): result = template.format(**event.get(key)) event.set(result, destination) return event def command_time(self, event, destination_key, f): result = arrow.get(event.get("timestamp")).format(f) event.set(result, destination_key) return event def command_uppercase(self, event, key): event.set(event.get(key).upper(), key) return event def __extractExpr(self, e): assert isinstance(e, dict), "The expression should be a dict." assert len(list(e.keys())) == 1, "The expression should only contain 1 value" c = list(e.keys())[0] assert c in VALID_EXPRESSIONS, "'%s' is an invalid expression." % c assert isinstance(e[c], list), "The expression value must be a list." return c, e[c]