Source code for lib.NickChangeGraph

import os.path
import re
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
import pylab
import pygraphviz as pygraphviz
import os
import ext.util

[docs]def createNickChangesGraph(log_directory, channel_name, output_directory, startingDate, startingMonth, endingDate, endingMonth): """ creates a graph which tracks the nick changes of the users where each edge has a time stamp denoting the time at which the nick was changed by the user Args: log_directory (str): Location of the logs (Assumed to be arranged in directory structure as : <year>/<month>/<day>/<log-file-for-channel>.txt) channel_name (str): Channel to be perform analysis on output_directory (str): Location of output directory startingDate (int): Date to start the analysis (in conjunction with startingMonth) startingMonth (int): Date to start the analysis (in conjunction with startingDate) endingDate (int): Date to end the analysis (in conjunction with endingMonth) endingMonth (int): Date to end the analysis (in conjunction with endingDate) Returns: null """ # out_dir_nick_change = output_directory+"nick-changes/" out_dir_nick_change = output_directory if not os.path.exists(os.path.dirname(out_dir_nick_change)): try: os.makedirs(os.path.dirname(out_dir_nick_change)) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: raise rem_time= None #remembers the time of the last message of the file parsed before the current file for folderiterator in range(startingMonth, endingMonth + 1): temp1 = "0" if folderiterator < 10 else "" for fileiterator in range(startingDate if folderiterator == startingMonth else 1, endingDate + 1 if folderiterator == endingMonth else 32): temp2 = "0" if fileiterator < 10 else "" filePath=log_directory+temp1+str(folderiterator)+"/"+temp2+str(fileiterator)+"/"+channel_name+".txt" if not os.path.exists(filePath): if not((folderiterator==2 and (fileiterator ==29 or fileiterator ==30 or fileiterator ==31)) or ((folderiterator==4 or folderiterator==6 or folderiterator==9 or folderiterator==11) and fileiterator==31 )): print "[Error] Path "+filePath+" doesn't exist" continue with open(filePath) as f: content = f.readlines() #contents stores all the lines of the file channel_name nicks = [] #list of all the nicknames ''' Getting all the nicknames in a list nicks[] ''' for i in content: if(i[0] != '=' and "] <" in i and "> " in i): m = re.search(r"\<(.*?)\>", i) if m.group(0) not in nicks: nicks.append(m.group(0)) #used regex to get the string between <> and appended it to the nicks list for i in xrange(0,len(nicks)): nicks[i] = nicks[i][1:-1] #removed <> from the nicknames for i in xrange(0,len(nicks)): nicks[i]=ext.util.correctLastCharCR(nicks[i]) for line in content: if(line[0]=='=' and "changed the topic of" not in line): #excluding the condition when user changes the topic. Search for only nick changes nick1=ext.util.correctLastCharCR(line[line.find("=")+1:line.find(" is")][3:]) nick2=ext.util.correctLastCharCR(line[line.find("wn as")+1:line.find("\n")][5:]) if nick1 not in nicks: nicks.append(nick1) if nick2 not in nicks: nicks.append(nick2) #print("printing nicks***********************************") #print(nicks) ''' Forming list of lists for avoiding nickname duplicacy ''' nick_same_list=[[] for i in range(len(nicks))] #list of list with each list having all the nicks for that particular person for line in content: if(line[0]=='=' and "changed the topic of" not in line): line1=line[line.find("=")+1:line.find(" is")][3:] line2=line[line.find("wn as")+1:line.find("\n")][5:] line1=ext.util.correctLastCharCR(line1) line2=ext.util.correctLastCharCR(line2) for i in range(5000): if line1 in nick_same_list[i] or line2 in nick_same_list[i]: nick_same_list[i].append(line1) nick_same_list[i].append(line2) break if not nick_same_list[i]: nick_same_list[i].append(line1) nick_same_list[i].append(line2) break #print("printing nick_same_list****************************") #print(nick_same_list) '''=========================== Plotting the nickname changes graph =========================== ''' graph_nickchanges=nx.MultiDiGraph() #using networkx y=-1 for i in content: y=y+1 if(i[0] =='=' and "changed the topic of" not in i): #excluding the condition when user changes the topic. Search for only nick changes nick1=ext.util.correctLastCharCR(i[i.find("=")+1:i.find(" is")][3:]) nick2=ext.util.correctLastCharCR(i[i.find("wn as")+1:i.find("\n")][5:]) z=y while z>=0: z=z-1 if(content[z][0]!='='): graph_nickchanges.add_edge(nick1,nick2,weight=content[z][1:6]) break # these lines extract the from-to nicknames and strip them appropriately to make if(z==-1): graph_nickchanges.add_edge(nick1,nick2,weight=rem_time) #edge between them count=len(content)-1 #setting up the rem_time for next file, by noting the last message sent on that file. while(count>=0): if(content[count][0]!='='): rem_time=content[count][1:6] break count=count-1 for u,v,d in graph_nickchanges.edges(data=True): d['label'] = d.get('weight','') output_file=out_dir_nick_change+channel_name+"_"+str(folderiterator)+"_"+str(fileiterator)+"_nick_change.png" print "Generated "+ output_file A = nx.drawing.nx_agraph.to_agraph(graph_nickchanges) A.layout(prog='dot') A.draw(output_file) #graphviz helps to convert a dot file to PNG format for visualization