#!/usr/bin/python # # Copyright (C) 2005 Todd Troxell # # Logcheck Rulefiles Analyzer - Get statistics about rule effectiveness # Logcheck Rulefiles Analyzer is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # Logcheck is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with Logcheck; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import os import subprocess import getopt class ruleAnalyzer: """ The Main Applicaiton """ def __init__(self): # Root rulefiles directory self.ruleDir = "rulefiles/" # Display debug output self.debugOutput = False # File to scan self.logfile = "/var/log/syslog" def run(self): self.makeDataStructures() self.readRuleFiles() for x in self.ruleFiles: self.readRuleFile(x) self.countLines() self.analyzeLogFile(self.logfile) self.report() def debug(self, output): """ Print debug output """ if self.debugOutput: print output def countLines(self): f = open(self.logfile, 'r') self.lineCount = len(f.readlines()) f.close() def makeDataStructures(self): self.ruleFiles = dict() # This is the big one self.lineCount = 0 self.egrepCallCount = 0 self.ruleCount = 0 self.numRuleFiles = 0 def readRuleFiles(self): """ Reads the rulefiles into self.ruleFiles """ for root, dirs, files in os.walk(self.ruleDir): if 'CVS' in dirs: # Don't scan CVS dirs dirs.remove('CVS') for file in files: path = os.path.join(root, file) self.debug(path) self.ruleFiles[path] = dict() self.numRuleFiles = len(self.ruleFiles.keys()) def readRuleFile(self, file): """ Reads a single rulefile into self.rulefiles """ self.debug("reading rulefile: %s" % (file)) try: f = open(file, "r") except: print "Error opening %s" % (file) sys.exit(-1) for l in f.readlines(): l = l[:-1] self.ruleFiles[file][l] = 0 self.ruleCount += 1 f.close() def analyzeLogFile(self, path): """ Checks every rule against every logline """ # We call egrep directly here, many many times. # This is because python's re is not 100% compatible # with GNU grep, and we have user-contributed rules. for file, regex in self.ruleFiles.items(): for r in regex.keys(): self.debug("trying regex %s\n\n\n\n\n" % (r)) p = subprocess.Popen(("/bin/egrep", "--text", \ "-c", "-e", r, path), shell=False, \ stdout=subprocess.PIPE) p.wait() count = p.stdout.read()[:-1] p.stdout.close() if p.returncode == 0: self.ruleFiles[file][r] += int(count) self.egrepCallCount += 1 print "analyzed %s." % (file) def report(self): """ Reports on analyzed data. """ # TODO: #mostMatched = self.calculateMostMatched() #leastMatched = self.calculateLeastMatched() self.printList(self.ruleFiles) print "*** Summary ***" print "Total rulefiles:\t%i" % (self.numRuleFiles) print "Total rules\t\t%i" % (self.ruleCount) print "Egrep invocations:\t%i" % (self.egrepCallCount) print "Total comparisons made:\t%i" % (self.egrepCallCount * self.lineCount) print "Line count:\t\t%i" % (self.lineCount) def printList(self, list): """ Rudimentary output function """ for file, regex in self.ruleFiles.items(): print "file: %s:" % (file) print regex.values() print "Note: this is beta code." app = ruleAnalyzer() app.run()