Source code for lib.MessageNumberBinsCSV
import os.path
import re
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
import pylab
import pygraphviz as pygraphviz
import os
import csv
import ext.util
[docs]def createMessageNumberBinsCSV(log_directory, channel_name, output_directory, startingDate, startingMonth, endingDate, endingMonth):
""" creates a CSV file which tracks the number of message exchanged in a channel
for 48 bins of half an hour each distributed all over the day
aggragated over the year.
Args:
log_directory (str): Location of the logs (Assumed to be arranged in directory structure as : <year>/<month>/<day>/<log-file-for-channel>.txt)
channel_name (str): Channel to be perform analysis on
output_directory (str): Location of output directory
startingDate (int): Date to start the analysis (in conjunction with startingMonth)
startingMonth (int): Date to start the analysis (in conjunction with startingDate)
endingDate (int): Date to end the analysis (in conjunction with endingMonth)
endingMonth (int): Date to end the analysis (in conjunction with endingDate)
Returns:
null
"""
output_file = output_directory + channel_name+"_2013_"+str(startingMonth)+"_"+str(endingMonth)+"_output-parser-bins.csv"
if not os.path.exists(os.path.dirname(output_file)):
try:
os.makedirs(os.path.dirname(output_file))
except OSError as exc: # Guard against race condition
if exc.errno != errno.EEXIST:
raise
ans = [ 0 for i in range(48)]
for folderiterator in range(startingMonth, endingMonth + 1):
temp1 = "0" if folderiterator < 10 else ""
for fileiterator in range(startingDate if folderiterator == startingMonth else 1, endingDate + 1 if folderiterator == endingMonth else 32):
temp2 = "0" if fileiterator < 10 else ""
filePath=log_directory+temp1+str(folderiterator)+"/"+temp2+str(fileiterator)+"/"+channel_name+".txt"
if not os.path.exists(filePath):
if not((folderiterator==2 and (fileiterator ==29 or fileiterator ==30 or fileiterator ==31)) or ((folderiterator==4 or folderiterator==6 or folderiterator==9 or folderiterator==11) and fileiterator==31 )):
print "[Error] Path "+filePath+" doesn't exist"
continue
with open(filePath) as f:
content = f.readlines() #contents stores all the lines of the file channel_name
nicks = [] #list of all the nicknames
bins = []
for i in range(0,48):
bins.append(0)
#code for getting all the nicknames in a list
for i in content:
if(i[0] != '=' and "] <" in i and "> " in i):
m = re.search(r"\<(.*?)\>", i)
if m.group(0) not in nicks:
nicks.append(m.group(0)) #used regex to get the string between <> and appended it to the nicks list
for i in xrange(0,len(nicks)):
nicks[i] = nicks[i][1:-1] #removed <> from the nicknames
for i in xrange(0,len(nicks)):
nicks[i]=ext.util.correctLastCharCR(nicks[i])
for line in content:
if(line[0]=='=' and "changed the topic of" not in line):
nick1=ext.util.correctLastCharCR(line[line.find("=")+1:line.find(" is")][3:])
nick2=ext.util.correctLastCharCR(line[line.find("wn as")+1:line.find("\n")][5:])
if nick1 not in nicks:
nicks.append(nick1)
if nick2 not in nicks:
nicks.append(nick2)
for line in content:
if(line[0] != '='):
time_in_min=int(line[1:3])*60+int(line[4:6])
if(time_in_min < int(line[1:3])*60+30):
bin_index=int(line[1:3])*2
else:
bin_index=int(line[1:3])*2+1
flag_comma = 0
if(line[0] != '=' and "] <" in line and "> " in line):
m = re.search(r"\<(.*?)\>", line)
var = m.group(0)[1:-1]
var = ext.util.correctLastCharCR(var)
for i in nicks:
rec_list=[e.strip() for e in line.split(':')]
rec_list[1]=rec_list[1][rec_list[1].find(">")+1:len(rec_list[1])]
rec_list[1]=rec_list[1][1:]
if not rec_list[1]:
break
for k in xrange(0,len(rec_list)):
if(rec_list[k]):
rec_list[k] = ext.util.correctLastCharCR(rec_list[k])
for z in rec_list:
if(z==i):
if(var != i):
bins[bin_index]=bins[bin_index]+1
if "," in rec_list[1]:
flag_comma = 1
rec_list_2=[e.strip() for e in rec_list[1].split(',')]
for x in xrange(0,len(rec_list_2)):
if(rec_list_2[x]):
rec_list_2[x] = ext.util.correctLastCharCR(rec_list_2[x])
for j in rec_list_2:
if(j==i):
if(var != i):
bins[bin_index]=bins[bin_index]+1
if(flag_comma == 0):
rec=line[line.find(">")+1:line.find(", ")]
rec=rec[1:]
rec = ext.util.correctLastCharCR(rec)
if(rec==i):
if(var != i):
bins[bin_index]=bins[bin_index]+1
# print "Working on "+filePath
# print bins
# print bins
with open(output_file, 'a+') as myfile:
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
wr.writerow(bins)
ans = [ans[i] + bins[i] for i in range(len(bins))]
print sum(ans)