EGF and HRG Stimulation of ERBB2 Overexpressing HMEC Cells (Wolf Yadlin 2006)

[1]:

#Supporting packages for analysis
import numpy as np
import pandas as pd

#KSTAR imports
from kstar import config, helpers, calculate
from kstar.plot import DotPlot



#Set matplotlib defaults for arial 12 point font
from matplotlib import rcParams
rcParams['font.family'] = 'sans-serif'
rcParams['font.sans-serif'] = ['Arial']
rcParams['font.size'] = 12
import matplotlib.pyplot as plt

#where supplementary data was downloaded to (From https://figshare.com/articles/dataset/KSTAR_Supplementary_Data/14919726)
SUPPLEMENTS_DIR = './'

#Directory where KSTAR Supplemental data was set
odir = SUPPLEMENTS_DIR+'Supplements/SupplementaryData/Control_Experiments/EGF_184A1_HER2_WolfYadlin2006/'

#load the Mann Whitney activities and FPR for Tyrosine predictions,
#it will be faster and less data than loading all KSTAR outputs
activities = pd.read_csv(odir+'/RESULTS/EGF_HER2_Y_mann_whitney_activities.tsv', sep='\t', index_col=0)
fpr = pd.read_csv(odir+'/RESULTS/EGF_HER2_Y_mann_whitney_fpr.tsv', sep='\t', index_col=0)


#load kinase map from supplementary data
KINASE_MAP =  pd.read_csv(SUPPLEMENTS_DIR+'SupplementaryData/Map/globalKinaseMap.csv', index_col = 0)
#set preferred kinase names from the kinase map (make a kinase_dict)
kinase_dict = {}
for kinase in activities.index:
    kinase_dict[kinase] = KINASE_MAP.loc[kinase,'Preferred Name']

[4]:

#set up a description table
cell_dict = {}
treatment_dict = {}
for col in activities.columns:
    label = col.split(':')
    descr = label[1]
    descr_labels = descr.split('_')
    cell_dict[col] = descr_labels[0]
    treatment_dict[col] = descr_labels[1]

[5]:

temp1 = pd.DataFrame.from_dict(cell_dict, orient='index', columns=['Cell Type'])
temp2 = pd.DataFrame.from_dict(treatment_dict, orient='index', columns=['Growth Factor'])
s1 = temp1.join(temp2)
s1['ID'] = s1.index

[6]:

s1

[6]:

	Cell Type	Growth Factor	ID
data:24H_EGF_0(min)	24H	EGF	data:24H_EGF_0(min)
data:24H_EGF_5(min)	24H	EGF	data:24H_EGF_5(min)
data:24H_EGF_10(min)	24H	EGF	data:24H_EGF_10(min)
data:24H_EGF_30(min)	24H	EGF	data:24H_EGF_30(min)
data:24H_HRG_0(min)	24H	HRG	data:24H_HRG_0(min)
data:24H_HRG_5(min)	24H	HRG	data:24H_HRG_5(min)
data:24H_HRG_10(min)	24H	HRG	data:24H_HRG_10(min)
data:24H_HRG_30(min)	24H	HRG	data:24H_HRG_30(min)
data:P_EGF_0(min)	P	EGF	data:P_EGF_0(min)
data:P_EGF_5(min)	P	EGF	data:P_EGF_5(min)
data:P_EGF_10(min)	P	EGF	data:P_EGF_10(min)
data:P_EGF_30(min)	P	EGF	data:P_EGF_30(min)
data:P_HRG_0(min)	P	HRG	data:P_HRG_0(min)
data:P_HRG_5(min)	P	HRG	data:P_HRG_5(min)
data:P_HRG_10(min)	P	HRG	data:P_HRG_10(min)
data:P_HRG_30(min)	P	HRG	data:P_HRG_30(min)

Plot all samples and kinases

[13]:

results = activities
sig=fpr
results = -np.log10(results)

#Setup a figure with a context strip at the top for HER2 status and activity dots on the below axis
fig, axes = plt.subplots(figsize = (9, 12),
        nrows = 1, ncols = 2,
        sharex = 'col',
        sharey = 'row',
        gridspec_kw = {
            'width_ratios':[0.1,1]
        },)
fig.subplots_adjust(wspace=0, hspace=0)

dots = DotPlot(results,
               sig,
               figsize = (9,12),
               dotsize = 10,
               legend_title='-log10(p-value)', kinase_dict=kinase_dict)
#Cluster changes the sorting of the values array, so be sure to plot context last so that it is in the same sort.
#dots.drop_kinases_with_no_significance()

dots.cluster(orientation = 'left', ax = axes[0], method='ward')
#dots.cluster(orientation = 'top', ax = axes[0,1], method='ward')
#dots.context(ax=axes[1,1], info = s1_temp, id_column = 'ID', context_columns = ['response', 'Patient ID'], orientation = 'top', dotsize =200, markersize= 15 )

dots.dotplot(ax = axes[1])
#plt.xlabel('Time (minutes)', FontSize=12)
#plt.xticks(rotation = 45, FontSize=12)
plt.yticks(FontSize=12)
plt.savefig(odir+'WolfYadlin2006_all.pdf', bbox_inches='tight')

../_images/Examples_EGF_HER2_WolfYadlin2006_6_0.png

Plot just EGF HMEC data for comparison to MRM experiment

[10]:

results = activities

#take the subset of headers
colDict = {}
colDict['data:P_EGF_0(min)'] = '0'
colDict['data:P_EGF_5(min)'] = '5'
colDict['data:P_EGF_10(min)'] = '10'
colDict['data:P_EGF_30(min)'] = '30'

results = results[colDict.keys()]
sig = fpr[colDict.keys()]
results = -np.log10(results)

#Setup a figure with a context strip at the top for HER2 status and activity dots on the below axis
fig, axes = plt.subplots(figsize = (9, 12),
        nrows = 1, ncols = 2,
        sharex = 'col',
        sharey = 'row',
        gridspec_kw = {
            'width_ratios':[0.1,1]
        },)
fig.subplots_adjust(wspace=0, hspace=0)

dots = DotPlot(results,
               sig,
               figsize = (9,12),
               dotsize = 10,
            legend_title='-log10(p-value)',x_label_dict=colDict, kinase_dict=kinase_dict)
#Cluster changes the sorting of the values array, so be sure to plot context last so that it is in the same sort.
dots.drop_kinases_with_no_significance()

dots.cluster(orientation = 'left', ax = axes[0], method='ward')
#dots.cluster(orientation = 'top', ax = axes[0,1], method='ward')
#dots.context(ax=axes[1,1], info = s1_temp, id_column = 'ID', context_columns = ['response', 'Patient ID'], orientation = 'top', dotsize =200, markersize= 15 )

dots.dotplot(ax = axes[1])
plt.xlabel('Time (minutes)', FontSize=12)
plt.xticks(rotation = 45, FontSize=12)
plt.yticks(FontSize=12)
plt.savefig(odir+'EGF_HMEC_4timepoint_all.pdf', bbox_inches='tight')

../_images/Examples_EGF_HER2_WolfYadlin2006_8_0.png

Reshape results and plot to compare kinase between conditions

[14]:

def reshape_results(df, kinase, order):
    """
    df could be activities or fpr, this will reshape for a specific kinase a new dataframe for plotting by  condition
    according to order

    """
    times = ['0(min)', '5(min)', '10(min)', '30(min)']
    series = df.loc[kinase]
        #reshape for each
    newDict = {}
    for name in order:
        #newName = kinase+' '+name
        newName = name #name is the experiment, such as P_EGF
        newName = newName.replace('_', ' ')
        oldName_base = 'data:'+name
        newDict[newName] = []
        for time in times:
            strName = oldName_base+'_'+time
            newDict[newName].append(series[strName])
    df_out = pd.DataFrame.from_dict(newDict, orient='index', columns=times)
    return df_out

[17]:

def plot_results(activities, fpr):
    results = activities
    results = -np.log10(results)


    #Setup a figure with a context strip at the top for HER2 status and activity dots on the below axis
    fig, axes = plt.subplots(figsize = (2, 2),
            nrows = 1, ncols = 1)
    fig.subplots_adjust(wspace=0, hspace=0)

    dots = DotPlot(results,
                   fpr,
                   figsize = (2,2),
                   dotsize = 5,
                   legend_title='-log10(p-value)',
                   x_label_dict = {'0(min)': '0', '5(min)': '5', '10(min)': '10', '30(min)': '30'})

    dots.dotplot(ax = axes, max_size=32)
    plt.xticks(rotation = 45, FontSize=12)
    plt.yticks(FontSize=12)
    plt.xlabel('Time (min)', FontSize=14)

[18]:

kinases = ['ERBB2', 'EGFR'] #'ERBB3']
for kinase in kinases:

    df = activities
    order = ['P_EGF', 'P_HRG', '24H_EGF', '24H_HRG']

    activities_new = reshape_results(df, kinase, order)
    fpr_new = reshape_results(fpr, kinase, order)

    plot_results(activities_new, fpr_new)
    plt.title(kinase)
    plt.savefig(odir+'Subset_'+kinase+'.pdf', bbox_inches='tight')

../_images/Examples_EGF_HER2_WolfYadlin2006_12_0.png

../_images/Examples_EGF_HER2_WolfYadlin2006_12_1.png