from ptm_pose import helpers, pose_config, project, flanking_sequences, nease_runner
from ptm_pose.splicing_tools.base import GenericDataset
import pandas as pd
import json
def get_skipped_region(MATS_SE, include_flanking = False):
"""
Given skipped exon data from MATS, identify the differentially spliced region and flanking sequences if desired
Parameters
----------
MATS_SE : pandas.DataFrame
DataFrame containing skipped exon data from MATS
include_flanking : bool, optional
Whether to include flanking sequence columns (default is False)
Returns
-------
pandas.DataFrame
Updated DataFrame with event and flanking region information
"""
MATS_SE['event_start'] = MATS_SE['exonStart_0base']
MATS_SE['event_end'] = MATS_SE['exonEnd']
if include_flanking:
if 'upstreamES' in MATS_SE.columns:
MATS_SE['first_flank_start'] = MATS_SE['upstreamES']
MATS_SE['first_flank_end'] = MATS_SE['upstreamEE']
MATS_SE['second_flank_start'] = MATS_SE['downstreamES']
MATS_SE['second_flank_end'] = MATS_SE['downstreamEE']
elif 'firstFlankingES' in MATS_SE.columns:
MATS_SE['first_flank_start'] = MATS_SE['firstFlankingES']
MATS_SE['first_flank_end'] = MATS_SE['firstFlankingEE']
MATS_SE['second_flank_start'] = MATS_SE['secondFlankingES']
MATS_SE['second_flank_end'] = MATS_SE['secondFlankingEE']
return MATS_SE
def get_A3SS_region(MATS_A3SS, include_flanking = False):
"""
Given alternative 3' splice site data from MATS, identify the differentially spliced region and flanking sequences if desired
Parameters
----------
MATS_A3SS : pandas.DataFrame
DataFrame containing alternative 3' splice site data from MATS
include_flanking : bool, optional
Whether to include flanking sequence columns (default is False)
Returns
-------
pandas.DataFrame
Updated DataFrame with event and flanking region information
"""
#set the relevent start and end regions of the spliced out region, which are different depending on the strand
region_start = []
region_end = []
first_flank_start = []
first_flank_end = []
second_flank_start = []
second_flank_end = []
#iterate through events and identify spliced region, which depends on DNA strand
for i, row in MATS_A3SS.iterrows():
strand = row['strand'] #check strand
if strand == '+':
region_start.append(row['longExonStart_0base'])
region_end.append(row['shortES'])
if include_flanking:
second_flank_start.append(row['flankingES'])
second_flank_end.append(row['flankingEE'])
first_flank_start.append(row['shortES'])
first_flank_end.append(row['shortEE'])
else:
region_start.append(row['shortEE'])
region_end.append(row['longExonEnd'])
if include_flanking:
second_flank_start.append(row['flankingES'])
second_flank_end.append(row['flankingEE'])
first_flank_start.append(row['shortES'])
first_flank_end.append(row['shortEE'])
#save region info
MATS_A3SS['event_start'] = region_start
MATS_A3SS['event_end'] = region_end
if include_flanking:
MATS_A3SS['first_flank_start'] = first_flank_start
MATS_A3SS['first_flank_end'] = first_flank_end
MATS_A3SS['second_flank_start'] = second_flank_start
MATS_A3SS['second_flank_end'] = second_flank_end
return MATS_A3SS
def get_A5SS_region(MATS_A5SS, include_flanking = False):
"""
Given alternative 5' splice site data from MATS, identify the differentially spliced region and flanking sequences if desired
Parameters
----------
MATS_A5SS : pandas.DataFrame
DataFrame containing alternative 5' splice site data from MATS
include_flanking : bool, optional
Whether to include flanking sequence columns (default is False)
Returns
-------
pandas.DataFrame
Updated DataFrame with event and flanking region information
"""
#set the relevent start and end regions of the spliced out region, which are different depending on the strand
region_start = []
region_end = []
first_flank_start = []
first_flank_end = []
second_flank_start = []
second_flank_end = []
for i, row in MATS_A5SS.iterrows():
strand = row['strand']
if strand == '+':
region_start.append(row['shortEE'])
region_end.append(row['longExonEnd'])
if include_flanking:
first_flank_start.append(row['shortES'])
first_flank_end.append(row['shortEE'])
second_flank_start.append(row['flankingES'])
second_flank_end.append(row['flankingEE'])
else:
region_start.append(row['longExonStart_0base'])
region_end.append(row['shortES'])
if include_flanking:
second_flank_start.append(row['shortES'])
second_flank_end.append(row['shortEE'])
first_flank_start.append(row['flankingES'])
first_flank_end.append(row['flankingEE'])
MATS_A5SS['event_start'] = region_start
MATS_A5SS['event_end'] = region_end
if include_flanking:
MATS_A5SS['first_flank_start'] = first_flank_start
MATS_A5SS['first_flank_end'] = first_flank_end
MATS_A5SS['second_flank_start'] = second_flank_start
MATS_A5SS['second_flank_end'] = second_flank_end
return MATS_A5SS
def get_RI_region(MATS_RI, include_flanking = False):
"""
Given retained intron data from MATS, identify the differentially spliced region and flanking sequences if desired
Parameters
----------
MATS_RI : pandas.DataFrame
DataFrame containing retained intron splice site data from MATS
include_flanking : bool, optional
Whether to include flanking sequence columns (default is False)
Returns
-------
pandas.DataFrame
Updated DataFrame with event and flanking region information
"""
MATS_RI['event_start'] = MATS_RI['upstreamES']
MATS_RI['event_end'] = MATS_RI['downstreamEE']
if include_flanking:
MATS_RI['first_flank_start'] = MATS_RI['upstreamES']
MATS_RI['first_flank_end'] = MATS_RI['upstreamEE']
MATS_RI['second_flank_start'] = MATS_RI['downstreamES']
MATS_RI['second_flank_end'] = MATS_RI['downstreamEE']
return MATS_RI
def get_spliced_region(MATS, event_type, include_flanking = False):
if event_type == 'SE':
return get_skipped_region(MATS, include_flanking = include_flanking)
elif event_type == 'A3SS':
return get_A3SS_region(MATS, include_flanking = include_flanking)
elif event_type == 'A5SS':
return get_A5SS_region(MATS, include_flanking = include_flanking)
elif event_type == 'RI':
return get_RI_region(MATS, include_flanking = include_flanking)
elif event_type == 'MXE':
print('MXE does not currently work')
return MATS
else:
raise ValueError("Invalid event type")
def process_MATS_data(MATS, event_type, min_junction_counts = None, sig_col = 'FDR', dPSI_col = 'IncLevelDifference', min_dpsi = 0, alpha = 0.05, include_flanking = True):
#restrict to significant events if indicated
if alpha:
MATS = MATS[MATS[sig_col] <= alpha].copy()
if min_dpsi:
MATS = MATS[MATS[dPSI_col].abs() >= min_dpsi].copy()
#filter by junction counts in experiment if provided
if min_junction_counts is not None:
print(f'Filtering {event_type} events based on minimum junction counts.')
MATS = helpers.get_junction_counts(MATS, quant_type = 'MATS')
MATS = MATS[(MATS['TJC_SAMPLE_1'] >= min_junction_counts) & (MATS['TJC_SAMPLE_2'] >= min_junction_counts)]
if MATS['chr'].str.contains('chr').any():
MATS['chr'] = MATS['chr'].apply(lambda x: x[3:])
#add ID column
MATS['AS ID'] = f"{event_type}_" + MATS.index.astype(str)
#extract information about where splice region is
MATS = get_spliced_region(MATS, event_type, include_flanking = include_flanking)
return MATS
def project_on_MATS_data(processed_MATS, event_type, sig_col = 'FDR', dPSI_col = 'IncLevelDifference', coordinate_type = 'hg38', extra_cols = None, separate_modification_types = False, PROCESSES = 1, **kwargs):
#check to make sure there is enough information to do multiprocessing if that is desired
if PROCESSES*4 > processed_MATS.shape[0]:
processes = 1
else:
processes = PROCESSES
# Project the spliced region onto the original MATS data
processed_MATS, ptms = project.project_ptms_onto_splice_events(processed_MATS, chromosome_col = 'chr', strand_col = 'strand', region_start_col = 'event_start', region_end_col = 'event_end', event_id_col = 'AS ID', dPSI_col=dPSI_col, sig_col = sig_col, gene_col = 'geneSymbol', coordinate_type=coordinate_type, start_coordinate_system='0-based', extra_cols = extra_cols, taskbar_label = f'{event_type} Events', separate_modification_types=separate_modification_types, PROCESSES = processes, **kwargs)
#record event type
ptms['Event Type'] = event_type
return processed_MATS, ptms
def run_nease_on_mats(processed_MATS, dpsi_col = 'IncLevelDifference', ):
"""
Given MATS event data and type, run NEASE analysis, return NEASE object
Parameters
----------
MATS : pandas.DataFrame
DataFrame containing MATS event data
event_type : str
Type of the event (e.g., 'SE', 'A3SS', 'A5SS', 'RI')
dpsi_col : str, optional
Column name for delta PSI values (default is 'IncLevelDifference')
Returns
-------
nease_output : NEASE object
The output of the NEASE analysis
"""
# Run NEASE on the tmp DataFrame
nease_output = nease_runner.run_nease(processed_MATS, region_start_col = 'event_start', region_end_col = 'event_end', gene_col = 'geneSymbol', dpsi_col = dpsi_col)
return nease_output
[docs]
class MATS_Dataset(GenericDataset):
"""
Class for handling MATS splicing event data, projecting PTMs onto spliced regions, and running NEASE analysis on the events. This includes filtering events based on significance, dPSI, and junction counts, as well as identifying flanking sequence changes and their associated PTMs. The class can handle multiple types of splicing events (SE, A3SS, A5SS, RI, MXE) and store the results for each type separately.
Parameters
----------
SE : pandas.DataFrame, optional
DataFrame containing skipped exon (SE) event data from MATS (default is None)
A3SS : pandas.DataFrame, optional
DataFrame containing alternative 3' splice site (A3SS) event data from MATS (default is None)
A5SS : pandas.DataFrame, optional
DataFrame containing alternative 5' splice site (A5SS) event data from MATS (default is None)
RI : pandas.DataFrame, optional
DataFrame containing retained intron (RI) event data from MATS (default is None)
MXE : pandas.DataFrame, optional
DataFrame containing mutually exclusive exon (MXE) event data from MATS (default is None)
min_dpsi : float, optional
Minimum absolute delta PSI value for filtering events (default is 0)
alpha : float, optional
Significance threshold for filtering events based on FDR (default is 0.05)
min_junction_counts : int, optional
Minimum junction counts in both conditions for filtering events (default is None, which means no filtering based on junction counts)
dpsi_col : str, optional
Column name for delta PSI values (default is 'IncLevelDifference')
sig_col : str, optional
Column name for significance values (default is 'FDR')
coordinate_type : str, optional
Coordinate system (default is 'hg38')
Attributes
----------
splice_data : dict
Dictionary containing processed MATS data for each event type
"""
def __init__(self, SE = None, A3SS = None, A5SS = None, RI = None, MXE = None, min_dpsi = 0, alpha = 0.05, min_junction_counts = None, dpsi_col = 'IncLevelDifference', sig_col = 'FDR', coordinate_type = 'hg38'):
splice_data = {}
for event_type, data in zip(['SE', 'A3SS', 'A5SS', 'RI', 'MXE'], [SE, A3SS, A5SS, RI, MXE]):
if data is not None:
#filter by significance
data = process_MATS_data(data, event_type = event_type, min_junction_counts=min_junction_counts, min_dpsi = min_dpsi, alpha = alpha, sig_col = sig_col, dPSI_col = dpsi_col)
splice_data[event_type] = data.copy()
super().__init__(splice_data=splice_data, min_dpsi=min_dpsi, alpha=alpha, dpsi_col=dpsi_col, sig_col=sig_col, coordinate_type=coordinate_type, chromosome_col = 'chr', strand_col = 'strand', first_flank_start_col = 'first_flank_start', first_flank_end_col = 'first_flank_end', second_flank_start_col = 'second_flank_start', second_flank_end_col = 'second_flank_end', event_id_col = 'AS ID', gene_col = 'geneSymbol', start_coordinate_system = '0-based')
def run_pose(self, identify_altered_flanks = True, extra_cols = None, PROCESSES = 1, **kwargs):
#check for any keyword arguments to use for filtering
self.project_ptms_generic(extra_cols = extra_cols, PROCESSES = PROCESSES, **kwargs)
if identify_altered_flanks:
self.get_altered_flanks_generic(extra_cols = extra_cols, **kwargs)
def get_altered_flanks(self, extra_cols = None, **kwargs):
self.get_altered_flanks_generic(extra_cols = extra_cols, **kwargs)
[docs]
def run_nease(self):
"""
Run NEASE analysis on the spliced regions of the MATS events for each event type, saving the results in the class for later retrieval
"""
self.run_nease_generic(events_to_skip = ['MXE'])
#class MATS:
# """
# Class for handling MATS splicing event data, projecting PTMs onto spliced regions, and running NEASE analysis on the events. This includes filtering events based on significance, dPSI, and junction counts, as well as identifying flanking sequence changes and their associated PTMs. The class can handle multiple types of splicing events (SE, A3SS, A5SS, RI, MXE) and store the results for each type separately.
# Parameters
# ----------
# SE : pandas.DataFrame, optional
# DataFrame containing skipped exon (SE) event data from MATS (default is None)
# A3SS : pandas.DataFrame, optional
# DataFrame containing alternative 3' splice site (A3SS) event data from MATS (default is None)
# A5SS : pandas.DataFrame, optional
# DataFrame containing alternative 5' splice site (A5SS) event data from MATS (default is None)
# RI : pandas.DataFrame, optional
# DataFrame containing retained intron (RI) event data from MATS (default is None)
# MXE : pandas.DataFrame, optional
# DataFrame containing mutually exclusive exon (MXE) event data from MATS (default is None)
# min_dpsi : float, optional
# Minimum absolute delta PSI value for filtering events (default is 0)
# alpha : float, optional
# Significance threshold for filtering events based on FDR (default is 0.05)
# min_junction_counts : int, optional
# Minimum junction counts in both conditions for filtering events (default is None, which means no filtering based on junction counts)
# dpsi_col : str, optional
# Column name for delta PSI values (default is 'IncLevelDifference')
# sig_col : str, optional
# Column name for significance values (default is 'FDR')
# coordinate_type : str, optional
# Coordinate system (default is 'hg38')
#
# Attributes
# ----------
# splice_data : dict
# Dictionary containing processed MATS data for each event type
# """
# def __init__(self, SE = None, A3SS = None, A5SS = None, RI = None, MXE = None, min_dpsi = 0, alpha = 0.05, min_junction_counts = None, dpsi_col = 'IncLevelDifference', sig_col = 'FDR', coordinate_type = 'hg38'):
# self.splice_data = {}
# for event_type, data in zip(['SE', 'A3SS', 'A5SS', 'RI', 'MXE'], [SE, A3SS, A5SS, RI, MXE]):
# if data is not None:
# #filter by significance
# data = process_MATS_data(data, event_type = event_type, min_junction_counts=min_junction_counts, min_dpsi = min_dpsi, alpha = alpha, sig_col = sig_col, dPSI_col = dpsi_col)
# self.splice_data[event_type] = data.copy()
#save different parameters
# self.min_dpsi = min_dpsi
# self.alpha = alpha
# self.min_junction_counts = min_junction_counts
# self.dpsi_col = dpsi_col
# self.sig_col = sig_col
# self.coordinate_type = coordinate_type
# def project_ptms(self, extra_cols = None, separate_modification_types = False, PROCESSES = 1, **kwargs):
# """
# Project PTMs onto the spliced regions of the MATS events for each event type, saving an annotated version of the MATS data and dataframe of PTMs impacted by the splice events
#
# Parameters
# ----------
# extra_cols : list, optional
# List of additional column names from the MATS data to include in the output ptms dataframe (default is None)
# separate_modification_types : bool, optional
# Whether to separate different types of PTMs into different rows in the output dataframe (default is False)
# PROCESSES : int, optional
# Number of processes to use for multiprocessing (default is 1). If the number of events is small, multiprocessing will be automatically disabled to avoid overhead.
# kwargs:
# Additional keyword arguments to pass to the project_ptms_onto_splice_events function, such as filtering parameters to filter PTMs with lower evidence. For example, if you want to filter PTMs based on the number of MS observations, you can add 'min_MS_observations = 2' to the kwargs. This will filter out any PTMs that have less than 2 MS observations. See the project_ptms_onto_splice_events function for more options.
# Postconditions
# --------------
# self.ptms : pandas.DataFrame
# DataFrame containing information about PTMs projected onto the spliced regions of the MATS events, including the type of event, the associated PTMs, and any additional columns specified in extra_cols
# self.annotated_MATS : dict
# Dictionary containing the original MATS data for each event type with additional columns indicating the PTMs that are associated with each event
# """
# #check for any keyword arguments to use for filtering PTMs prior to projection
# if kwargs:
# filter_arguments = helpers.extract_filter_kwargs(**kwargs)
# #check any excess unused keyword arguments, report them
# helpers.check_filter_kwargs(filter_arguments)
# #filter ptm coordinates file to include only ptms with desired evidence
# ptm_coordinates = helpers.filter_ptms(pose_config.ptm_coordinates.copy(), **filter_arguments)
# #save filter arguments to class
# for farg in filter_arguments.keys():
# setattr(self, farg, filter_arguments[farg])
# else:
# ptm_coordinates = pose_config.ptm_coordinates.copy()
# self.ptms = []
# self.annotated_MATS = {}
# for event_type, data in self.splice_data.items():
# #check to make sure there is enough information to do multiprocessing if that is desired
# if PROCESSES*4 > data.shape[0]:
# processes = 1
# else:
# processes = PROCESSES
# # Project the spliced region onto the original MATS data
# processed_MATS, ptms = project.project_ptms_onto_splice_events(data, ptm_coordinates=ptm_coordinates, chromosome_col = 'chr', strand_col = 'strand', region_start_col = 'event_start', region_end_col = 'event_end', event_id_col = 'AS ID', dPSI_col=self.dpsi_col, sig_col = self.sig_col, gene_col = 'geneSymbol', coordinate_type=self.coordinate_type, start_coordinate_system='0-based', extra_cols = extra_cols, taskbar_label = f'{event_type} Events', separate_modification_types=separate_modification_types, PROCESSES = processes, **kwargs)
# self.splice_data[event_type] = processed_MATS
# # Save PTMs for later use
# self.annotated_MATS[event_type] = processed_MATS
# self.ptms.append(ptms)
# self.ptms = pd.concat(self.ptms, axis=0)
# def get_altered_flanks(self, extra_cols = None, separate_modification_types = False, PROCESSES = 1, **kwargs):
# """
# Identify changes to flanking sequences around PTMs resulting from splicing events, saving a dataframe of the altered flanking sequences and their associated PTMs. This function requires that the flanking sequence information be included in the original MATS data (either as upstream/downstream or first/second flanking regions)
# Parameters
# ----------
# extra_cols : list, optional
# List of additional column names from the MATS data to include in the output dataframe (default is None)
# separate_modification_types : bool, optional
# Whether to separate different types of PTMs into different rows in the output dataframe (default is False)
# PROCESSES : int, optional
# Number of processes to use for multiprocessing (default is 1). If the number of events
# is small, multiprocessing will be automatically disabled to avoid overhead.
# kwargs:
# Additional keyword arguments to pass to the get_flanking_changes_from_splice_data function
# Postconditions
# --------------
# self.altered_flanks : pandas.DataFrame
# DataFrame containing information about changes to flanking sequences around PTMs resulting from splicing events, including the type of event, the associated PTMs, and any additional columns specified in extra_cols
# """
# #check for any keyword arguments to use for filtering PTMs prior to projection
# if kwargs:
# filter_arguments = helpers.extract_filter_kwargs(**kwargs)
# #check any excess unused keyword arguments, report them
# helpers.check_filter_kwargs(filter_arguments)
# #filter ptm coordinates file to include only ptms with desired evidence
# ptm_coordinates = helpers.filter_ptms(pose_config.ptm_coordinates.copy(), **filter_arguments)
# #save filter arguments to class
# for farg in filter_arguments.keys():
# setattr(self, farg, filter_arguments[farg])
# else:
# ptm_coordinates = pose_config.ptm_coordinates.copy()
# spliced_flanks = []
# for event_type, data in self.splice_data.items():
# #check to make sure there is enough information to do multiprocessing if that is desired
# if PROCESSES*4 > data.shape[0]:
# processes = 1
# else:
# processes = PROCESSES
# event_flanks = flanking_sequences.get_flanking_changes_from_splice_data(data, ptm_coordinates = ptm_coordinates, chromosome_col = 'chr', strand_col = 'strand', spliced_region_start_col = 'event_start', spliced_region_end_col = 'event_end', first_flank_start_col = 'first_flank_start', first_flank_end_col = 'first_flank_end', second_flank_start_col = 'second_flank_start', second_flank_end_col = 'second_flank_end', dPSI_col=self.dpsi_col, sig_col = self.sig_col, gene_col = 'geneSymbol', event_id_col = 'AS ID', extra_cols = extra_cols, coordinate_type=self.coordinate_type, start_coordinate_system='0-based', **kwargs)
# event_flanks['Event Type'] = event_type
# spliced_flanks.append(event_flanks)
# self.altered_flanks = pd.concat(spliced_flanks, axis=0)
# def run_nease(self):
# """
# Run NEASE analysis on the spliced regions of the MATS events for each event type, saving the results in the class for later retrieval
# """
# nease = {}
# for event_type, data in self.splice_data.items():
# if event_type != 'MXE':
# # Run NEASE on the tmp DataFrame
# nease_output = nease_runner.run_nease(data, region_start_col = 'event_start', region_end_col = 'event_end', gene_col = 'geneSymbol', dpsi_col = self.dpsi_col)
# # Process the NEASE output as needed
# nease[event_type] = nease_output
# #save in class
# self.nease = nease
# def get_nease_edges(self):
# """
# Get the impacted protein-protein interactions from the NEASE analysis, combining results across event types into a single dataframe.
# """
# if not hasattr(self, 'nease'):
# self.run_nease()
# edge_list = []
# for event_type in self.nease.keys():
# # Process the NEASE output as needed
# if isinstance(self.nease[event_type].get_edges(), pd.DataFrame):
# edges = self.nease[event_type].get_edges()
# edges['Event Type'] = event_type
# edge_list.append(edges)
# if len(edge_list) > 0:
# edge_df = pd.concat(edge_list, axis=0)
# else:
# edge_df = pd.DataFrame()
# return edge_df
# def get_nease_domains(self):
# """
# Get the impacted protein domains from the NEASE analysis, combining results across event types into a single dataframe.
# """
# if not hasattr(self, 'nease'):
# self.run_nease()
# domain_list = []
# for event_type in self.nease.keys():
# # Process the NEASE output as needed
# if isinstance(self.nease[event_type].get_domains(), pd.DataFrame):
# domains = self.nease[event_type].get_domains()
# domains['Event Type'] = event_type
# domain_list.append(domains)
# if len(domain_list) == 0:
# return pd.DataFrame()
# domain_df = pd.concat(domain_list, axis=0)
# return domain_df
# def get_nease_motifs(self):
# """
# Get the impacted linear motifs from ELM from the NEASE analysis, combining results across event types into a single dataframe.
# """
# if not hasattr(self, 'nease'):
# self.run_nease()
# motif_list = []
# for event_type in self.nease.keys():
# # Process the NEASE output as needed
# if isinstance(self.nease[event_type].get_elm(), pd.DataFrame):
# motifs = self.nease[event_type].get_elm()
# motifs['Event Type'] = event_type
# motif_list.append(motifs)
# if len(motif_list) == 0:
# return pd.DataFrame()
# motif_df = pd.concat(motif_list, axis=0)
# return motif_df
# def save(self, odir):
# """
# Save the results of the MATS analysis to the specified output directory. This includes the projected PTMs, annotated MATS data for each event type, altered flanking sequences, and NEASE results (impacted domains, PPIs, and motifs).
# Parameters
# ----------
# odir : str
# Output directory where the results will be saved. The function will create CSV files for each type of result in this directory.
# """
# if hasattr(self, 'ptms'):
# self.ptms.to_csv(f'{odir}/spliced_ptms.csv', index=False)
# if hasattr(self, 'annotated_MATS'):
# for event_type, data in self.annotated_MATS.items():
# data.to_csv(f'{odir}/{event_type}_annotated.csv', index=False)
# if hasattr(self, 'altered_flanks'):
# self.altered_flanks.to_csv(f'{odir}/altered_flanking_sequences.csv', index=False)
# if hasattr(self, 'nease'):
# domains = self.get_nease_domains()
# domains.to_csv(f'{odir}/nease_impacted_domains.csv', index=False)
# ppi = self.get_nease_edges()
# ppi.to_csv(f'{odir}/nease_impacted_ppi.csv', index=False)
# motifs = self.get_nease_motifs()
# motifs.to_csv(f'{odir}/nease_impacted_motifs.csv', index=False)
# #save the attributes (non dataframe/list) of the class as a json file for later retrieval
# attributes = {attr: getattr(self, attr) for attr in self.__dict__.keys() if isinstance(getattr(self, attr), (str, int, float, bool))}
# with open(f'{odir}/run_attributes.json', 'w') as f:
# json.dump(attributes, f)