#!/usr/bin/python
#
# Copyright (C) 2005 Todd Troxell <ttroxell@debian.org>
#
# Logcheck Rulefiles Analyzer - Get statistics about rule effectiveness

# Logcheck Rulefiles Analyzer is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.

# Logcheck is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with Logcheck; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

import os
import subprocess
import getopt

class ruleAnalyzer:
	""" The Main Applicaiton """
	def __init__(self):
		# Root rulefiles directory
		self.ruleDir = "rulefiles/"
		# Display debug output	
		self.debugOutput = False
		# File to scan
		self.logfile = "/var/log/syslog"

	def run(self):
		self.makeDataStructures()
		self.readRuleFiles()
		for x in self.ruleFiles:
			self.readRuleFile(x)

		self.countLines()
		self.analyzeLogFile(self.logfile)
		self.report()

	def debug(self, output):
		""" Print debug output """
		if self.debugOutput:
			print output

	def countLines(self):
		f = open(self.logfile, 'r')
		self.lineCount = len(f.readlines())
		f.close()

	def makeDataStructures(self):
		self.ruleFiles = dict() # This is the big one
		self.lineCount = 0
		self.egrepCallCount = 0
		self.ruleCount = 0
		self.numRuleFiles = 0

	def readRuleFiles(self):
		""" Reads the rulefiles into self.ruleFiles """
		for root, dirs, files in os.walk(self.ruleDir):
			if 'CVS' in dirs: # Don't scan CVS dirs
				dirs.remove('CVS')
			for file in files:
				path = os.path.join(root, file)
				self.debug(path)
				self.ruleFiles[path] = dict()
		self.numRuleFiles = len(self.ruleFiles.keys())

	def readRuleFile(self, file):
		""" Reads a single rulefile into self.rulefiles """
		self.debug("reading rulefile: %s" % (file))
		try:
			f = open(file, "r")
		except:
			print "Error opening %s" % (file)
			sys.exit(-1)
		for l in f.readlines():
			l = l[:-1]
			self.ruleFiles[file][l] = 0
			self.ruleCount += 1
		f.close()

	def analyzeLogFile(self, path):
		""" Checks every rule against every logline """
		# We call egrep directly here, many many times.
		# This is because python's re is not 100% compatible
		# with GNU grep, and we have user-contributed rules.
		for file, regex in self.ruleFiles.items():
			for r in regex.keys():
				self.debug("trying regex %s\n\n\n\n\n" % (r))
				p = subprocess.Popen(("/bin/egrep",  "--text", \
					"-c", "-e", r, path), shell=False, \
					stdout=subprocess.PIPE)
				p.wait()
				count = p.stdout.read()[:-1]
				p.stdout.close()
				if p.returncode == 0:
					self.ruleFiles[file][r] += int(count)
				self.egrepCallCount += 1
			
			print "analyzed %s." % (file)

	def report(self):
		""" Reports on analyzed data. """
		# TODO: 
		#mostMatched = self.calculateMostMatched()
		#leastMatched = self.calculateLeastMatched()

		self.printList(self.ruleFiles)
		print "*** Summary ***"
		print "Total rulefiles:\t%i" % (self.numRuleFiles)
		print "Total rules\t\t%i" % (self.ruleCount)
		print "Egrep invocations:\t%i" % (self.egrepCallCount)
		print "Total comparisons made:\t%i" % (self.egrepCallCount * self.lineCount)
		print "Line count:\t\t%i" % (self.lineCount)

	def printList(self, list):
		""" Rudimentary output function """
		for file, regex in self.ruleFiles.items():
			print "file: %s:" % (file)
			print regex.values()

print "Note: this is beta code."
app = ruleAnalyzer()
app.run()
