Plot feature importance

This is how you can plot feature importances.

def plot_feature_importances(df):
    df = df.sort_values('importance', ascending=False).reset_index().head(10)
    plt.figure(figsize=(16, 10))
    fig, ax = plt.subplots()
    ax.barh(df['feature'], df['importance'], align="center", color='green')
    ax.set_title('Feature importance')

Pair plots for EDA

# Copy the data for plotting
plot_data = ext_data.drop(columns = ['DAYS_BIRTH']).copy()
# Add in the age of the client in years
plot_data['YEARS_BIRTH'] = age_data['YEARS_BIRTH']
# Drop na values and limit to first 100000 rows
plot_data = plot_data.dropna().loc[:100000, :]
# Function to calculate correlation coefficient between two columns
def corr_func(x, y, **kwargs):
    r = np.corrcoef(x, y)[0][1]
    ax = plt.gca()
    ax.annotate("r = {:.2f}".format(r),
                xy=(.2, .8), xycoords=ax.transAxes,
                size = 20)
# Create the pairgrid object
grid = sns.PairGrid(data = plot_data, size = 3, diag_sharey=False,
                    hue = 'TARGET', 
                    vars = [x for x in list(plot_data.columns) if x != 'TARGET'])
# Upper is a scatter plot
grid.map_upper(plt.scatter, alpha = 0.2)
Pranay Aryal

Pranay Aryal

Software Developer

Read More