import os.path
import re
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
import pylab
import pygraphviz as pygraphviz
import os
import csv
import ext.util
[docs]def degreeMessageNumberCSV(log_directory, channel_name, output_directory, startingDate, startingMonth, endingDate, endingMonth):
""" creates two csv files having no. of nodes with a certain in and out-degree for number of messages respectively
Args:
log_directory (str): Location of the logs (Assumed to be arranged in directory structure as : <year>/<month>/<day>/<log-file-for-channel>.txt)
channel_name (str): Channel to be perform analysis on
output_directory (str): Location of output directory
startingDate (int): Date to start the analysis (in conjunction with startingMonth)
startingMonth (int): Date to start the analysis (in conjunction with startingDate)
endingDate (int): Date to end the analysis (in conjunction with endingMonth)
endingMonth (int): Date to end the analysis (in conjunction with endingDate)
Returns:
null
"""
nodes_with_OUT_degree_per_day = []
nodes_with_IN_degree_per_day = []
nodes_with_TOTAL_degree_per_day = []
max_degree_possible = 1000
# output_dir_degree = output_directory+"degreeMessageNumberCSV/"
output_dir_degree = output_directory
output_file_out_degree = output_dir_degree + "msg_no_out_degree.csv"
output_file_in_degree = output_dir_degree + "msg_no_in_degree.csv"
output_file_total_degree = output_dir_degree + "msg_no_total_degree.csv"
# print "Creating a new output folder"
# os.system("rm -rf "+output_dir_degree)
# os.system("mkdir "+output_dir_degree)
if not os.path.exists(os.path.dirname(output_dir_degree)):
try:
os.makedirs(os.path.dirname(output_dir_degree))
os.system("rm "+output_file_out_degree)
os.system("touch "+output_file_out_degree)
os.system("rm "+output_file_in_degree)
os.system("touch "+output_file_in_degree)
os.system("rm "+output_file_total_degree)
os.system("touch "+output_file_total_degree)
except OSError as exc: # Guard against race condition
if exc.errno != errno.EEXIST:
raise
rem_time= None #remembers the time of the last message of the file parsed before the current file
for folderiterator in range(startingMonth, endingMonth + 1):
temp1 = "0" if folderiterator < 10 else ""
for fileiterator in range(startingDate if folderiterator == startingMonth else 1, endingDate + 1 if folderiterator == endingMonth else 32):
temp2 = "0" if fileiterator < 10 else ""
filePath=log_directory+temp1+str(folderiterator)+"/"+temp2+str(fileiterator)+"/"+channel_name+".txt"
if not os.path.exists(filePath):
if not((folderiterator==2 and (fileiterator ==29 or fileiterator ==30 or fileiterator ==31)) or ((folderiterator==4 or folderiterator==6 or folderiterator==9 or folderiterator==11) and fileiterator==31 )):
print "[Error] Path "+filePath+" doesn't exist"
continue
with open(filePath) as f:
content = f.readlines() #contents stores all the lines of the file channel_name
nicks = [] #list of all the nicknames
'''
Getting all the nicknames in a list nicks[]
'''
for i in content:
if(i[0] != '=' and "] <" in i and "> " in i):
m = re.search(r"\<(.*?)\>", i)
if m.group(0) not in nicks:
nicks.append(m.group(0)) #used regex to get the string between <> and appended it to the nicks list
for i in xrange(0,len(nicks)):
nicks[i] = nicks[i][1:-1] #removed <> from the nicknames
for i in xrange(0,len(nicks)):
nicks[i]=ext.util.correctLastCharCR(nicks[i])
for line in content:
if(line[0]=='=' and "changed the topic of" not in line): #excluding the condition when user changes the topic. Search for only nick changes
nick1=ext.util.correctLastCharCR(line[line.find("=")+1:line.find(" is")][3:])
nick2=ext.util.correctLastCharCR(line[line.find("wn as")+1:line.find("\n")][5:])
if nick1 not in nicks:
nicks.append(nick1)
if nick2 not in nicks:
nicks.append(nick2)
#print("printing nicks***********************************")
#print(nicks)
'''
Forming list of lists for avoiding nickname duplicacy
'''
nick_same_list=[[] for i in range(len(nicks))] #list of list with each list having all the nicks for that particular person
for line in content:
if(line[0]=='=' and "changed the topic of" not in line):
line1=line[line.find("=")+1:line.find(" is")][3:]
line2=line[line.find("wn as")+1:line.find("\n")][5:]
line1=ext.util.correctLastCharCR(line1)
line2=ext.util.correctLastCharCR(line2)
for i in range(5000):
if line1 in nick_same_list[i] or line2 in nick_same_list[i]:
nick_same_list[i].append(line1)
nick_same_list[i].append(line2)
break
if not nick_same_list[i]:
nick_same_list[i].append(line1)
nick_same_list[i].append(line2)
break
#print("printing nick_same_list****************************")
#print(nick_same_list)
'''=========================== Plotting the conversation graph =========================== '''
graph_conversation = nx.MultiDiGraph() #graph with multiple directed edges between clients used
for line in content:
flag_comma = 0
if(line[0] != '=' and "] <" in line and "> " in line):
m = re.search(r"\<(.*?)\>", line)
var = m.group(0)[1:-1]
var = ext.util.correctLastCharCR(var)
for d in range(len(nicks)):
if var in nick_same_list[d]:
nick_sender = nick_same_list[d][0]
break
else:
nick_sender = var
for i in nicks:
rec_list=[e.strip() for e in line.split(':')] #receiver list splited about :
rec_list[1]=rec_list[1][rec_list[1].find(">")+1:len(rec_list[1])]
rec_list[1]=rec_list[1][1:]
if not rec_list[1]: #index 0 will contain time 14:02
break
for k in xrange(0,len(rec_list)):
if(rec_list[k]): #checking for \
rec_list[k] = ext.util.correctLastCharCR(rec_list[k])
for z in rec_list:
if(z==i):
if(var != i):
for d in range(len(nicks)):
if i in nick_same_list[d]:
nick_receiver=nick_same_list[d][0]
break
else:
nick_receiver=i
graph_conversation.add_edge(nick_sender,nick_receiver,weight=line[1:6])
if "," in rec_list[1]: #receiver list may of the form <Dhruv> Rohan, Ram :
flag_comma = 1
rec_list_2=[e.strip() for e in rec_list[1].split(',')]
for y in xrange(0,len(rec_list_2)):
if(rec_list_2[y]): #checking for \
rec_list_2[y]=ext.util.correctLastCharCR(rec_list_2[y])
for j in rec_list_2:
if(j==i):
if(var != i):
for d in range(len(nicks)):
if i in nick_same_list[d]:
nick_receiver=nick_same_list[d][0]
break
else:
nick_receiver=i
graph_conversation.add_edge(nick_sender,nick_receiver,weight=line[1:6])
if(flag_comma == 0): #receiver list can be <Dhruv> Rohan, Hi!
rec=line[line.find(">")+1:line.find(", ")]
rec=rec[1:]
rec=ext.util.correctLastCharCR(rec)
if(rec==i):
if(var != i):
for d in range(len(nicks)):
if i in nick_same_list[d]:
nick_receiver=nick_same_list[d][0]
break
else:
nick_receiver=i
graph_conversation.add_edge(nick_sender,nick_receiver,weight=line[1:6])
for u,v,d in graph_conversation.edges(data=True):
d['label'] = d.get('weight','')
# output_file_out_degree=out_dir_msg_time+channel_name+"_2013_"+str(folderiterator)+"_"+str(fileiterator)+"_msg_time.png"
# print "Generated " + output_file_out_degree
# A = nx.drawing.nx_agraph.to_agraph(graph_conversation)
# A.layout(prog='dot')
# A.draw(output_file_out_degree)
nodes_with_OUT_degree = [0]*max_degree_possible
nodes_with_IN_degree = [0]*max_degree_possible
nodes_with_TOTAL_degree = [0]*max_degree_possible
print graph_conversation.out_degree(), graph_conversation.in_degree(), graph_conversation.degree()
print graph_conversation.out_degree().values()
print graph_conversation.in_degree().values()
print graph_conversation.degree().values()
for degree in graph_conversation.out_degree().values():
nodes_with_OUT_degree[degree]+=1
for degree in graph_conversation.in_degree().values():
nodes_with_IN_degree[degree]+=1
for degree in graph_conversation.degree().values():
nodes_with_TOTAL_degree[degree]+=1
print "\n"
nodes_with_OUT_degree.insert(0, sum(nodes_with_OUT_degree))
nodes_with_OUT_degree.insert(0, str(folderiterator)+"-"+str(fileiterator))
nodes_with_OUT_degree_per_day.append(nodes_with_OUT_degree)
nodes_with_IN_degree.insert(0, sum(nodes_with_IN_degree))
nodes_with_IN_degree.insert(0, str(folderiterator)+"-"+str(fileiterator))
nodes_with_IN_degree_per_day.append(nodes_with_IN_degree)
nodes_with_TOTAL_degree.insert(0, sum(nodes_with_TOTAL_degree))
nodes_with_TOTAL_degree.insert(0, str(folderiterator)+"-"+str(fileiterator))
nodes_with_TOTAL_degree_per_day.append(nodes_with_TOTAL_degree)
# print nodes_with_OUT_degree_per_day
# print nodes_with_IN_degree_per_day
# print nodes_with_TOTAL_degree_per_day
temp = ['deg'+str(i) for i in xrange(max_degree_possible)]
temp.insert(0, 'total')
temp.insert(0, 'out-degree/day>')
nodes_with_OUT_degree_per_day.insert(0, temp)
column_wise = zip(*nodes_with_OUT_degree_per_day)
with open(output_file_out_degree, 'wb') as myfile:
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
for col in column_wise:
wr.writerow(col)
temp = ['deg'+str(i) for i in xrange(max_degree_possible)]
temp.insert(0, 'total')
temp.insert(0, 'in-degree/day>')
nodes_with_IN_degree_per_day.insert(0, temp)
column_wise = zip(*nodes_with_IN_degree_per_day)
with open(output_file_in_degree, 'wb') as myfile2:
wr = csv.writer(myfile2, quoting=csv.QUOTE_ALL)
for col in column_wise:
wr.writerow(col)
temp = ['deg'+str(i) for i in xrange(max_degree_possible)]
temp.insert(0, 'total')
temp.insert(0, 'degree/day>')
nodes_with_TOTAL_degree_per_day.insert(0, temp)
column_wise = zip(*nodes_with_TOTAL_degree_per_day)
with open(output_file_total_degree, 'wb') as myfile3:
wr = csv.writer(myfile3, quoting=csv.QUOTE_ALL)
for col in column_wise:
wr.writerow(col)