"""
This is the function that does the betweenness calculation
"""
try:
import argparse
except ImportError:
raise ImportError("require argparse")
#numerical / data packages
try:
import numpy as np
np.set_printoptions(threshold=10)
except ImportError:
raise ImportError("require numpy")
try:
import pandas as pd
except ImportError:
raise ImportError("require pandas")
try:
import scipy
import scipy.sparse
import scipy as sp
except ImportError:
raise ImportError("require scipy")
#utilities
import os
import sys
import gc
import copy
import time
#self defined functions
if __name__ == "__main__":
from functions import betweenness_calc as bt_calc
else:
from .functions import betweenness_calc as bt_calc
if __name__ == "__main__":
# Do something if this file is invoked on its own
print("Invoking original")
parser=argparse.ArgumentParser(description="Loads the specified GB interaction network and calculates the corresponding flow betweenness network")
parser.add_argument(
'-indir','--inputDirectory',default='.',dest='inDir',
help='Path to the directory containing the interaction network file. Defaults to currently active directory'
)
parser.add_argument(
'-i', '--interactionFileName',
help='Name of the GB interaction energy data file to load (required). This data file should contain a'+\
'\nsingle interaction network. If there are more networks / interactions present, you will need to'+\
'\nuse the "-selectionQueryStrings" argument to specify a pandas.DataFrame.query search query string'+\
'\nthat can select a single network from the data. If duplicate edges are present, the program'+\
'\nwill crash or give unpredictable results'
)
parser.add_argument(
'-q','--selectionQueryStrings',nargs='*',
help='List of query strings to select entries from the interaction data that specify a single network'+\
'\nto be analyzed. If multiple strings are provided, the results are concatenated using pd.concat'
)
parser.add_argument(
'-c','--NodeColumns',default=['Resid_1','Resid_2'],dest='nodeColumns',nargs='*',
help='Names of the columns containing the names of the interacting nodes for each interaction entry'+\
'\nexactly two arguments should be given. If not only the first two entries will get used.'
)
parser.add_argument(
'-e','--energyColumn',default='TOTAL',
help='Name of the column containing the energy values of each interaction used to compute betweenness weights'
)
parser.add_argument(
'-s','--sourceNodeNames',nargs='+',dest='sourceNodeNames',
help='string to be fed to the pandas DataFrame.query function to collect a list of source node names'+\
'\nif multiple entries are given, each will be fed and the results aggregated into a list of'+\
'\nof unique node names'
)
parser.add_argument(
'-t','--targetNodeNames',nargs='+',dest='targetNodeNames',
help='string to be fed to the pandas DataFrame.query function to collect a list of target node names'+\
'\nif multiple entries are given, each will be fed and the results aggregated into a list of'+\
'\nof unique node names. Note: if there is any overlap. I.e. sourceNodes and targetNodes litst'+\
'\ncontain some of the same node(s) then a warning will be thrown and the common nodes will be put'+\
'\ninto the source node list.'
)
parser.add_argument(
'-outdir','--outputFileDirectory',default='.',dest='outDir',
help='Path of the directory to write output files to'
)
parser.add_argument(
'-o','--outputFileNameBase',
help='Base of the filenames e.g. edge betweenness would be in "outputFileNameBase.EdgeBetweenness.csv" (required)'
)
parser.add_argument(
'-ft','--writeFullTable',nargs='?',default=False,const=True,
help='If this flag is set, the output table will contain all data from each row of the input dataframe'+\
'otherwise it will only contain the node columns and betweenness column'
)
parser.add_argument(
'-wnvec','--writeNodeVector',nargs='?',const=True,default=False,
help='If flag is given, node betweenness will also be computed written to "outputFileNameBase.NodeBetweenness.csv"'
)
parser.add_argument(
'-windmap','--writeMatrixIndexToNodeNameMap',nargs='?',const=True,default=False,
help='If this flag is given, a data frame containging the columns "MatInd" and "NodeName" is written to'+\
'\n"outputFileNameBase.IndToNameMap.csv"'
)
parser.add_argument(
'-dryrun',nargs='?',const=True,default=False,
help='Dont run anything, jsut print out input argument namespace and end program'
)
parser.add_argument(
'-v','--verbose',nargs='?',const=True,default=False,
help='controls printing of progress / information to stdout during run'
)
parser.add_argument(
'-vl','--verboseLevel',default=0,
help='when verbose flag is given, controls the amount of detail printed'
)
args=parser.parse_args()
if args.verbose or args.dryrun:
print('Input arguments:',args)
if not args.dryrun:
verbose=args.verbose
verboseLevel=int(args.verboseLevel)
outFileBase=args.outputFileNameBase
inputFile=args.inDir+'/'+args.interactionFileName
if verbose:
print('loading data',end='\n' if args.verboseLevel==0 else ",")
tempData=pd.read_csv(inputFile)
if (not (args.selectionQueryStrings is None)) and len(args.selectionQueryStrings) > 0:
if verbose and (verboseLevel > 0):
print('-filtering loaded data')
interactionData=pd.concat([
tempData.query(selectionQuery).copy() \
for selectionQuery in args.selectionQueryStrings
])
tempData=[]
else:
interactionData=tempData.copy()
tempData=[]
nodeColumn_1,nodeColumn_2=args.nodeColumns
if verbose and (verboseLevel > 0):
print('Building node name to index maps')
nodeNames=np.unique(np.sort(np.concatenate([
interactionData[nodeColumn_1].unique(),
interactionData[nodeColumn_2].unique()])))
nameToIndTable=pd.DataFrame({
'NodeNames':np.array(nodeNames,dtype=str),
'NodeInds':np.arange(len(nodeNames))
})
if args.writeMatrixIndexToNodeNameMap:
if verbose and (verboseLevel>0):
print('saving node name indexing map')
nameToIndTable.to_csv(
args.outDir+'/'+outFileBase+'.IndToNameMap.csv',
index=False
)
if verbose and (verboseLevel > 1):
print(interactionData.head())
if verbose and (verboseLevel>0):
print('building source node list')
sourceNodeNames=np.array(args.sourceNodeNames)
sourceNodes=np.array([
nameToIndTable.set_index('NodeNames')['NodeInds'].loc[sourceNodeName] \
for sourceNodeName in sourceNodeNames
])
if verbose and (verboseLevel>1):
print('source nodes:')
print(pd.DataFrame({'NodeNames':sourceNodes,'MatrixIndices':sourceNodes}))
if verbose and (verboseLevel>0):
print('building target node list')
targetNodeNames=np.array(args.targetNodeNames)
targetNodes=np.array([
nameToIndTable.set_index('NodeNames')['NodeInds'].loc[targetNodeName] \
for targetNodeName in targetNodeNames
])
if verbose and (verboseLevel>1):
print('target nodes:')
print(pd.DataFrame({'NodeNames':targetNodeNames,'MatrixIndices':targetNodes}))
if verbose:
print('Constructing network matrix')
netMat=np.array(sp.sparse.coo_matrix(
(interactionData[args.energyColumn].abs(),
(nameToIndTable.set_index('NodeNames')['NodeInds'].loc[interactionData[nodeColumn_1].map(str)],
nameToIndTable.set_index('NodeNames')['NodeInds'].loc[interactionData[nodeColumn_2].map(str)])),
shape=(len(nameToIndTable),len(nameToIndTable))
).todense())
btwMat=np.array(bt_calc.getBtwMat(
mat=netMat,sources=sourceNodes,targets=targetNodes,
verbose=verbose,verboseLevel=verboseLevel,
useProgressBar=False,useLegacyAlgorithm=False
))
if verbose:
print('Compiling betweenness table')
btwTable=pd.DataFrame({
nodeColumn_1:interactionData[nodeColumn_1],
nodeColumn_2:interactionData[nodeColumn_2],
'Betweenness':btwMat[(
nameToIndTable.set_index('NodeNames')['NodeInds'].loc[
interactionData[nodeColumn_1].map(str)],
nameToIndTable.set_index('NodeNames')['NodeInds'].loc[
interactionData[nodeColumn_2].map(str)]
)]
})
if args.writeFullTable:
if verbose:
print('Joining Betweenness data to interaction data')
btwTable=btwTable.set_index([nodeColumn_1,nodeColumn_2]).join(
other=interactionData.set_index([nodeColumn_1,nodeColumn_2]),
how='right')
if verbose:
print('Saving betweenness data')
btwTable.to_csv(args.outDir+'/'+args.outputFileNameBase+'.EdgeBetweenness.csv',index=False)
if args.writeNodeVector:
if verbose:
print('Computing node betweenness')
nodeBtw=np.sum(btwMat,axis=1)/2.
nodeTable=pd.DataFrame({
'NodeName':nameToIndTable['NodeNames'],
'Betweenness':nodeBtw[nameToIndTable['NodeInds']]
})
if verbose:
print('Saving node betweenness data')
nodeTable.to_csv(args.outDir+'/'+args.outputFileNameBase+'.NodeBetweenness.csv',index=False)
[docs]def betweenness(inDir,outDir,interactionFileName,outputFileNameBase='NO_NAME',selectionQueryStrings=None,nodeColumns=['Resid_1','Resid_2'],energyColumn='TOTAL',sourceNodeNames=None,targetNodeNames=None,writeFullTable=False,writeNodeVector=True,writeMatrixIndexToNodeNameMap=True,dryrun=False,verbose=True,verboseLevel=0):
"""
This function is the main function to call for the betweenness calculation.
NOTE: This function take a total of 15 variables, make sure to give them a check and see what each option does
Default
-------
inDir INPUT SHOULD BE GIVEN
outDir INPUT SHOULD BE GIVEN
interactionFileName INPUT MUST BE GIVEN
outputFileNameBase 'NO_NAME'
selectionQueryStrings None
nodeColumns ['Resid_1','Resid_2']
energyColumn 'TOTAL'
sourceNodeNames None ### INPUT MUST BE SET
targetNodeNames None ### INPUT MUST BE SET
writeFullTable False
writeNodeVector True
writeMatrixIndexToNodeNameMap True
dryrun False
verbose True
verboseLevel 0
Example
-------
current_flow_allostery.betweenness(\
'{1}',\
'{2}',\
'{3}.csv',\
'{3}.Betweenness',\
sourceNodeNames=['14','240','466','692','918','1144'],\
targetNodeNames=['47','273','499','725','951','1177'],\
writeFullTable=True,\
verboseLevel=2\
)
Other notes
-----------
If there are multiple files to be run, this can be ran parallel following the example in Step1_Run_GB_Network_Betweenness.slurm.bash.
sourceNodeNames & targetNodeNames format is :
['#','#','#','#']
keep in mind if following format is used it will produce "key error"
[#,#,#,#]
"""
#####Default Settings of the Variables
if inDir == None:
inDir = '.'
if outDir == None:
outDir = '.'
if interactionFileName == None:
print('INPUT FILENAME MISSING')
if sourceNodeNames == None:
print('CANNOT BE BLANK: check sourceNodeNames input')
if targetNodeNames == None:
print('CANNOT BE BLANK: check targetNodeNames input')
####################
if verbose or dryrun:
print('Input arguments:',inDir,outDir,interactionFileName,outputFileNameBase,selectionQueryStrings,nodeColumns,energyColumn,sourceNodeNames,targetNodeNames,writeFullTable,writeNodeVector,writeMatrixIndexToNodeNameMap,dryrun,verbose,verboseLevel)
if not dryrun:
verbose=verbose
verboseLevel=int(verboseLevel)
outFileBase=outputFileNameBase
inputFile=inDir+'/'+interactionFileName
if verbose:
print('loading data',end='\n' if verboseLevel==0 else ",")
tempData=pd.read_csv(inputFile)
if (not (selectionQueryStrings is None)) and len(selectionQueryStrings) > 0:
if verbose and (verboseLevel > 0):
print('-filtering loaded data')
interactionData=pd.concat([
tempData.query(selectionQuery).copy() \
for selectionQuery in selectionQueryStrings
])
tempData=[]
else:
interactionData=tempData.copy()
tempData=[]
nodeColumn_1=nodeColumns[0]
nodeColumn_2=nodeColumns[1]
if verbose and (verboseLevel > 0):
print('Building node name to index maps')
nodeNames=np.unique(np.sort(np.concatenate([
interactionData[nodeColumn_1].unique(),
interactionData[nodeColumn_2].unique()])))
nameToIndTable=pd.DataFrame({
'NodeNames':np.array(nodeNames,dtype=str),
'NodeInds':np.arange(len(nodeNames))
})
if writeMatrixIndexToNodeNameMap:
if verbose and (verboseLevel>0):
print('saving node name indexing map')
nameToIndTable.to_csv(
outDir+'/'+outFileBase+'.IndToNameMap.csv',
index=False
)
if verbose and (verboseLevel > 1):
print(interactionData.head())
if verbose and (verboseLevel>0):
print('building source node list')
sourceNodeNames=np.array(sourceNodeNames)
sourceNodes=np.array([
nameToIndTable.set_index('NodeNames')['NodeInds'].loc[sourceNodeName] \
for sourceNodeName in sourceNodeNames
])
if verbose and (verboseLevel>1):
print('source nodes:')
print(pd.DataFrame({'NodeNames':sourceNodes,'MatrixIndices':sourceNodes}))
if verbose and (verboseLevel>0):
print('building target node list')
targetNodeNames=np.array(targetNodeNames)
targetNodes=np.array([
nameToIndTable.set_index('NodeNames')['NodeInds'].loc[targetNodeName] \
for targetNodeName in targetNodeNames
])
if verbose and (verboseLevel>1):
print('target nodes:')
print(pd.DataFrame({'NodeNames':targetNodeNames,'MatrixIndices':targetNodes}))
if verbose:
print('Constructing network matrix')
netMat=np.array(sp.sparse.coo_matrix(
(interactionData[energyColumn].abs(),
(nameToIndTable.set_index('NodeNames')['NodeInds'].loc[interactionData[nodeColumn_1].map(str)],
nameToIndTable.set_index('NodeNames')['NodeInds'].loc[interactionData[nodeColumn_2].map(str)])),
shape=(len(nameToIndTable),len(nameToIndTable))
).todense())
btwMat=np.array(bt_calc.getBtwMat(
mat=netMat,sources=sourceNodes,targets=targetNodes,
verbose=verbose,verboseLevel=verboseLevel,
useProgressBar=False,useLegacyAlgorithm=False
))
if verbose:
print('Compiling betweenness table')
btwTable=pd.DataFrame({
nodeColumn_1:interactionData[nodeColumn_1],
nodeColumn_2:interactionData[nodeColumn_2],
'Betweenness':btwMat[(
nameToIndTable.set_index('NodeNames')['NodeInds'].loc[
interactionData[nodeColumn_1].map(str)],
nameToIndTable.set_index('NodeNames')['NodeInds'].loc[
interactionData[nodeColumn_2].map(str)]
)]
})
if writeFullTable:
if verbose:
print('Joining Betweenness data to interaction data')
btwTable=btwTable.set_index([nodeColumn_1,nodeColumn_2]).join(
other=interactionData.set_index([nodeColumn_1,nodeColumn_2]),
how='right')
if verbose:
print('Saving betweenness data')
btwTable.to_csv(outDir+'/'+outputFileNameBase+'.EdgeBetweenness.csv',index=False)
if writeNodeVector:
if verbose:
print('Computing node betweenness')
nodeBtw=np.sum(btwMat,axis=1)/2.
nodeTable=pd.DataFrame({
'NodeName':nameToIndTable['NodeNames'],
'Betweenness':nodeBtw[nameToIndTable['NodeInds']]
})
if verbose:
print('Saving node betweenness data')
nodeTable.to_csv(outDir+'/'+outputFileNameBase+'.NodeBetweenness.csv',index=False)