Showing posts with label python. Show all posts
Showing posts with label python. Show all posts

2010/07/07

A small python script to have automated audit trail of war files deployed on a jboss server that runs on windows

The requirements were:
  1. Must be able to tell if the application war file was changed.
  2. Must run on windows.
  3. Must log to a centralized searchable repository.

For #1 an easy way to identify a file is hash it, sha1 is a safe enough hash algorithm.
For #3 we use splunk as log server with syslog-ng as network log server, we could use the splunk listeners but if splunk goes down for any reason we would lose logging so syslog-ng logs to a file and then splunk indexes that file. win-win

The following script runs daily and the results (hashes) submitted to a log server and the change management paperwork for the deployment includes the hash of the file to do a control.

You can find the source in my github

#!/usr/bin/env python
# encoding: utf-8
'''
   do-audit is a small script to create hashes from web applications in a jboss
   server for audit purposes. 
'''
import hashlib
import os
import stat
# time convertions
import time
# for hostname
import platform
# logging facilities
import logging
import logging.handlers
# email facilities
import smtplib
from email.mime.text import MIMEText

def sha1(_filename):
  '''Returns a sha1 of the file received'''
  if os.path.isfile(_filename):
    return hashlib.sha1(open(_filename).read()).hexdigest()
  else:
    return "Cannot hash file: " + _filename

def md5(_filename):
  '''Returns a sha1 of the file received'''
  if os.path.isfile(_filename):
    return hashlib.md5(open(_filename).read()).hexdigest()
  else:
    return "Cannot hash file: " + _filename

def get_fileinfo(_filename):
  '''Returns the file size in bytes and the Last modified attribute'''
  if os.path.isfile(_filename):
    file_stats = os.stat(_filename)
    file_info = {
      'fsize': file_stats [stat.ST_SIZE],
      'f_lm': time.strftime("%Y%m%d-%H:%M",time.localtime(file_stats[stat.ST_MTIME])),
      'f_ct': time.strftime("%Y%m%d-%H:%M",time.localtime(file_stats[stat.ST_CTIME]))
    }
  return 'Size=%(fsize)s LastMod=%(f_lm)s' % file_info

#This is where we setup which instances (jboss profiles) we are going to be monitoring
audited_instances = [
'APPSRV1',
'APPSRV2',
'APPSRV3',
'APPSRV4'
]

# these are the default locations but we want to be able to change this
jboss_basedir = '/srv/jboss-eap/server'
deployment_directory = 'theappdir'
# we only audit war files but we could audit more than that
audited_extensions = ['war']
host = platform.node()


my_logger = logging.getLogger(host)
my_logger.setLevel(logging.INFO)
# We setup the logging host
handler_syslog = logging.handlers.SysLogHandler(address=('syslogserver', 514))
my_logger.addHandler(handler_syslog)

period = time.strftime("%Y%m" , time.localtime())
tmp_email = []

for instance in audited_instances:
  print instance
  fullsrvdir = os.path.join(jboss_basedir, instance, deployment_directory)
  for root, dirs, files in os.walk(fullsrvdir):
    for f in files:
      if f[-3:].lower() in audited_extensions:
        filename = os.path.join(root, f)
        msg = "APPAUDIT Host=%s Instance=%s Period=%s Artifact=%s %s SHA1=%s MD5=%s" % \
          (host, instance, period, f, get_fileinfo(filename), sha1(filename), md5(filename))
        print msg
        # now we submit the log line to splunk or the syslog server
        my_logger.info(msg)
        tmp_email.append("%s\n" % (msg))

# We setup the email settings
email_from = "[email protected]"
email_to = "[email protected]" 
# we do unauthenticated smtp delivery
email_server = "smtp.server.name" 

# and we build the email
email = MIMEText(''.join(tmp_email))
email['Subject'] = "App Audit - %s %s %s" % (time.strftime("%Y %B", time.localtime()), 
                                             period, host)
email['From'] = email_from
email['To'] = email_to

# and we send the email
s = smtplib.SMTP(email_server)
s.sendmail(email_from, email_to, email.as_string())
s.close()

2010/01/21

So it starts like this

A couple of days ago, I needed to create a list of the third party libraries provided by jboss eap 4.3, that list was going to be used to allow the developers to know which versions of the libraries they can configure in their applications and it should be put in our wiki for everyone to see.

We have the following requirements:
  • ant 1.6.5+
  • subversion
  • python 2.5+
  • a running version of confluence

The first step is to checkout some files from the corresponding tag from the jboss public repository
mkdir ~/temp
cd ~/temp
svn co http://anonsvn.jboss.org/repos/jbossas/tags/JBPAPP_4_3_0_GA/build
svn co http://anonsvn.jboss.org/repos/jbossas/tags/JBPAPP_4_3_0_GA/tools
svn co http://anonsvn.jboss.org/repos/jbossas/tags/JBPAPP_4_3_0_GA/thirdparty

The second step is to run the corresponding ant build to generate the components info files
cd ~/temp/build
ant -f build-thirdparty.xml

Download this script and replace CHANGEME with the path to the correct location.
import os
import urllib2
import re
from BeautifulSoup import BeautifulSoup


basedir = '/CHANGEME/temp/thirdparty' # this is the directory that needs to be changed
infofile = 'component-info.xml'
ts = '|'
header = '||Component||Library||Version||Description||Comment||'

print header

for dir in sorted(os.listdir(basedir)):
    if dir == '':
        print 'The directory structure is not adequate'
        break
    filepath = ''.join([basedir, '/', dir, '/', infofile])
    if os.path.exists(filepath):
        filepath = ''.join(['file://',filepath])
        file = urllib2.urlopen(filepath)
    else:
        continue

    xml = file.read()
    soup = BeautifulSoup(xml)

    for attr, value in soup.find('component').attrs:
        if attr == 'id':
            c_attr = value
        elif attr == 'description':
            d_attr = str(value).replace('\n', '').replace('  ', '')
        elif attr == 'version':
            v_attr = value
        else:
            continue

    artifacts = soup.findAll('artifact')
    for line in artifacts:
        ids = re.split('"', str(line))
        print ts + c_attr + ts + ids[1] + ts + v_attr + ts + d_attr + ts + " " + ts

And finally run the script
python /path/to/the/script/jbjarversion.py > list

In 'list' you have the list formatted to be a confluence table.