Analysis of HER2 Positive Patients using Microbiopsies (Satpathy 2021)

[8]:

#Supporting packages for analysis
import numpy as np
import pandas as pd

#KSTAR imports
from kstar import config, helpers, calculate
from kstar.plot import DotPlot


#Set matplotlib defaults for arial 12 point font
from matplotlib import rcParams
rcParams['font.family'] = 'sans-serif'
rcParams['font.sans-serif'] = ['Arial']
rcParams['font.size'] = 12
import matplotlib.pyplot as plt

#where supplementary data was downloaded to (From https://figshare.com/articles/dataset/KSTAR_Supplementary_Data/14919726)
SUPPLEMENTS_DIR = './'

#Directory where KSTAR Supplemental data was set
odir = SUPPLEMENTS_DIR+'Supplements/SupplementaryData/BreastCancer/Microbiopsy_Satpathy2020/'

#load the Mann Whitney activities and FPR for Tyrosine predictions,
#it will be faster and less data than loading all KSTAR outputs
activities = pd.read_csv(odir+'/RESULTS/micro_biopsy_Y_mann_whitney_activities.tsv', sep='\t', index_col=0)
fpr = pd.read_csv(odir+'/RESULTS/micro_biopsy_Y_mann_whitney_fpr.tsv', sep='\t', index_col=0)



#load kinase map from supplementary data
KINASE_MAP =  pd.read_csv(SUPPLEMENTS_DIR+'SupplementaryData/Map/globalKinaseMap.csv', index_col = 0)
#set preferred kinase names from the kinase map (make a kinase_dict)
kinase_dict = {}
for kinase in activities.index:
    kinase_dict[kinase] = KINASE_MAP.loc[kinase,'Preferred Name']

Agglomerative clustering of all patients and kinases, with context

[5]:

s1 = pd.read_csv(odir+'sample_metadata_mirobiopsy.csv')
#add data to the Sample ID column so they can be matched
id_col = []
for index, row in s1.iterrows():
    id_col.append('data:'+row['Sample ID'])
s1['ID'] = id_col

[11]:

# PLot all samples with their context information about treatment and pCR status
fig, axes = plt.subplots(figsize = (12, 15),
        nrows = 3, ncols = 2,
        sharex = 'col',
        sharey = 'row',
        gridspec_kw = {
            'height_ratios':[.1,0.1, 1],
            'width_ratios':[0.1,1]
        },)
fig.subplots_adjust(wspace=0, hspace=0)

dots = DotPlot(results,
               fpr,
               figsize = (10,15),
               dotsize = 5,
               legend_title='-log10(p-value)',
               kinase_dict=kinase_dict)
#Cluster changes the sorting of the values array, so be sure to plot context last so that it is in the same sort.
#dots.drop_kinases_with_no_significance()

dots.cluster(orientation = 'left', ax = axes[2,0], method='ward')
dots.cluster(orientation = 'top', ax = axes[0,1], method='ward')
dots.context(ax=axes[1,1],info = s1, id_column = 'ID', context_columns = ['Treatment', 'response'], orientation = 'top', dotsize =200, markersize= 15 )

dots.dotplot(ax = axes[2,1])

plt.xticks(rotation = 80, FontSize=12)
plt.yticks(FontSize=12)
plt.savefig(odir+'microbiopsy_all.pdf', bbox_inches='tight')

../_images/Examples_Microbiopsy_Analysis_4_0.png

Agglomerative clustering of all patients and all significant kinases, with context

[12]:

# PLot all samples with their context information about treatment and pCR status
fig, axes = plt.subplots(figsize = (12, 15),
        nrows = 3, ncols = 2,
        sharex = 'col',
        sharey = 'row',
        gridspec_kw = {
            'height_ratios':[.1,0.1, 1],
            'width_ratios':[0.1,1]
        },)
fig.subplots_adjust(wspace=0, hspace=0)

dots = DotPlot(results,
               fpr,
               figsize = (10,15),
               dotsize = 5,
               legend_title='-log10(p-value)',
               kinase_dict=kinase_dict)
#Cluster changes the sorting of the values array, so be sure to plot context last so that it is in the same sort.
dots.drop_kinases_with_no_significance()

dots.cluster(orientation = 'left', ax = axes[2,0], method='ward')
dots.cluster(orientation = 'top', ax = axes[0,1], method='ward')
dots.context(ax=axes[1,1],info = s1, id_column = 'ID', context_columns = ['Treatment', 'response'], orientation = 'top', dotsize =200, markersize= 15 )

dots.dotplot(ax = axes[2,1])

plt.xticks(rotation = 80, FontSize=12)
plt.yticks(FontSize=12)
plt.savefig(odir+'microbiopsy_sigKinases.pdf', bbox_inches='tight')

../_images/Examples_Microbiopsy_Analysis_6_0.png

Plot only pre-treatment samples and provide treatment response context

[25]:

#Let's look just at pre-treatment samples
#given the metadata, pull pre-treatment only and take subsets for plotting, sort by ERBB2 activity prediction
s1_temp = s1[s1['Treatment']=='pre-treatment']
#sample_ids = list(s1_temp['ID'])

nameDict = {}
for name, group in s1_temp.groupby('Patient ID'):
    #print(name, group['Sample ID'])
    nameDict[group['Sample ID'].values[0]] = name

s2 = s1_temp[s1_temp['Sample ID'].isin(list(nameDict.keys()))]
sample_ids = list(s2['ID'].values)

#create an x_label_dict
x_label_dict = {}
for index, row in s2.iterrows():
    x_label_dict[row['ID']] = row['Patient ID']
# How about we take just one sample of each of the patient IDS, when there are multiple pre-treatment samples


results = activities[sample_ids]
sig = fpr[sample_ids]
results = -np.log10(results)


#Setup a figure with a context strip at the top for HER2 status and activity dots on the below axis
fig, axes = plt.subplots(figsize = (12, 15),
        nrows = 3, ncols = 2,
        sharex = 'col',
        sharey = 'row',
        gridspec_kw = {
            'height_ratios':[.1,0.1, 1],
            'width_ratios':[0.1,1]
        },)
fig.subplots_adjust(wspace=0, hspace=0)

dots = DotPlot(results,
               sig,
               alpha=0.05,
               figsize = (10,15),
               dotsize = 20,
               legend_title='-log10(p-value)',
               x_label_dict = x_label_dict,
               kinase_dict=kinase_dict)
#Cluster changes the sorting of the values array, so be sure to plot context last so that it is in the same sort.
dots.drop_kinases_with_no_significance()
method = 'ward'
dots.cluster(orientation = 'left', ax = axes[2,0], method=method)
dots.cluster(orientation = 'top', ax = axes[0,1], method=method)
dots.context(ax=axes[1,1], info = s2, id_column = 'ID', context_columns = ['response'], orientation = 'top', dotsize =200, markersize= 15 )

dots.dotplot(ax = axes[2,1])

plt.xticks(rotation = 80, FontSize=12)
plt.yticks(FontSize=12)
plt.savefig(odir+'microbiopsy_PreTreatment_all.pdf', bbox_inches='tight')

../_images/Examples_Microbiopsy_Analysis_8_0.png

Plot specific kinases for each patient (all samples)

[18]:

kinases = list(activities.index)
kinases.remove('ERBB2')
kinases.remove('EGFR')

[22]:

# What happens if we take, for each patient that has pre- and on-treatment, the difference in ERBB2 activity?
#find the paired sample ids
for name, group in s1.groupby('Patient ID'):
    #let's just plot those results (ERBB2 kinase)
    #Let's look just at pre-treatment samples
#given the metadata, pull pre-treatment only and take subsets for plotting, sort by ERBB2 activity prediction
    sample_ids = list(group['ID'])

    results = activities[sample_ids]
    results = results.sort_values('ERBB2', axis=1)
    sig = fpr[sample_ids]
    significance = fpr[sample_ids] <= 0.05
    significant = (results < significance.reindex(results.index)[results.columns]) * 1
    results = -np.log10(results)
#Setup a figure with a context strip at the top for HER2 status and activity dots on the below axis
    fig, axes = plt.subplots(figsize = (2,2),
        nrows = 2, ncols = 1,
        sharex = 'col',
        sharey = 'row',
        gridspec_kw = {
            'height_ratios':[0.2,1],
        },)
    fig.subplots_adjust(wspace=0, hspace=0)

    dots = dotplot.DotPlot(results,
                       sig,
                       figsize = (2,2),
                       dotsize = 20,
                       legend_title='-log10(p-value)')
#Cluster changes the sorting of the values array, so be sure to plot context last so that it is in the same sort.
    dots.drop_kinases(kinases)
    #dots.drop_kinases_with_no_significance()

    dots.context(ax=axes[0], info = group, id_column = 'ID', context_columns = ['Treatment'], orientation = 'top', dotsize =200, markersize= 15 )

    dots.dotplot(ax = axes[1], max_size=25)
    plt.title(name)

    plt.savefig(odir+'microbiopsy_patient_'+ name +'.pdf', bbox_inches='tight')

../_images/Examples_Microbiopsy_Analysis_11_0.png

../_images/Examples_Microbiopsy_Analysis_11_1.png

../_images/Examples_Microbiopsy_Analysis_11_2.png

../_images/Examples_Microbiopsy_Analysis_11_3.png

../_images/Examples_Microbiopsy_Analysis_11_4.png

../_images/Examples_Microbiopsy_Analysis_11_5.png

../_images/Examples_Microbiopsy_Analysis_11_6.png

../_images/Examples_Microbiopsy_Analysis_11_7.png

../_images/Examples_Microbiopsy_Analysis_11_8.png

../_images/Examples_Microbiopsy_Analysis_11_9.png

../_images/Examples_Microbiopsy_Analysis_11_10.png

../_images/Examples_Microbiopsy_Analysis_11_11.png

../_images/Examples_Microbiopsy_Analysis_11_12.png

../_images/Examples_Microbiopsy_Analysis_11_13.png

[163]:

## Create a report by patient of change in ERBB2 values

for name, group in s1.groupby('Patient ID'):
    sample_ids = list(group['ID'])

    results = activities[sample_ids]
    ERBB2 = results.loc['ERBB2']

    print(name)

    ERBB2.values
    for sample in ERBB2.index.values:
        treatment = s1[s1['ID']==sample]['Treatment'].values[0]
        print("%s\t%s\t%.2e"%(treatment, sample, ERBB2[sample]))

BCN 1300
pre-treatment   data:X1401794   5.77e-12
on-treatment    data:X1402967   1.37e-14
pre-treatment   data:X1401795   1.44e-25
on-treatment    data:X1402968   7.01e-21
BCN 1303
pre-treatment   data:X1410753P  4.09e-25
on-treatment    data:X1411848P  1.82e-05
BCN 1325
pre-treatment   data:X1405799P  1.15e-20
on-treatment    data:X1406353P  1.33e-05
BCN 1326
pre-treatment   data:X1407999P  9.61e-01
on-treatment    data:X1410003P  9.36e-01
pre-treatment   data:X1408000P  8.66e-04
pre-treatment   data:X1408001P  6.69e-01
on-treatment    data:X1410004P  6.63e-03
on-treatment    data:X1410005P  7.36e-01
BCN 1331
pre-treatment   data:X1413835P  2.10e-05
on-treatment    data:X1414049P  9.81e-01
BCN 1335
pre-treatment   data:X1420025P  7.67e-03
BCN 1357
pre-treatment   data:X1422184P  1.28e-12
pre-treatment   data:X1422185P  7.23e-23
BCN 1358
pre-treatment   data:X1509146   4.12e-11
on-treatment    data:X1509534   9.51e-01
BCN 1359
pre-treatment   data:X1514458   9.94e-04
on-treatment    data:X1514936   8.47e-17
BCN 1365
pre-treatment   data:X1523169   6.43e-19
on-treatment    data:X1523623   1.10e-03
BCN 1367
pre-treatment   data:X1518280   2.38e-21
on-treatment    data:X1518399   9.99e-01
pre-treatment   data:X1518282   9.72e-12
on-treatment    data:X1518400   3.12e-01
BCN 1368
pre-treatment   data:X1518729   2.10e-13
BCN 1369
pre-treatment   data:X1539756   3.83e-16
pre-treatment   data:X1539757   3.45e-18
on-treatment    data:X1539910   6.61e-19
on-treatment    data:X1539911   3.41e-07
BCN 1371
pre-treatment   data:X1525224   5.12e-20