Jupyter notebook for Datatel customer churn analysis

 

Datatel
In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
url = 'https://assets.datacamp.com/production/repositories/5952/datasets/6f60d0b204b01be5cb9541b3e4e1dcda02f20868/Databel%20-%20Data.csv'
df = pd.read_csv(url)
print(df.info())
total_customers = len(pd.unique(df['Customer ID']))
print('# of unique Customer ID = ', total_customers)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6687 entries, 0 to 6686
Data columns (total 29 columns):
Customer ID                          6687 non-null object
Churn Label                          6687 non-null object
Account Length (in months)           6687 non-null int64
Local Calls                          6687 non-null int64
Local Mins                           6687 non-null float64
Intl Calls                           6687 non-null float64
Intl Mins                            6687 non-null float64
Intl Active                          6687 non-null object
Intl Plan                            6687 non-null object
Extra International Charges          6687 non-null float64
Customer Service Calls               6687 non-null int64
Avg Monthly GB Download              6687 non-null int64
Unlimited Data Plan                  6687 non-null object
Extra Data Charges                   6687 non-null int64
State                                6687 non-null object
Phone Number                         6687 non-null object
Gender                               6687 non-null object
Age                                  6687 non-null int64
Under 30                             6687 non-null object
Senior                               6687 non-null object
Group                                6687 non-null object
Number of Customers in Group         6687 non-null int64
Device Protection & Online Backup    6687 non-null object
Contract Type                        6687 non-null object
Payment Method                       6687 non-null object
Monthly Charge                       6687 non-null int64
Total Charges                        6687 non-null int64
Churn Category                       1769 non-null object
Churn Reason                         1769 non-null object
dtypes: float64(4), int64(9), object(16)
memory usage: 1.5+ MB
None
# of unique Customer ID =  6687
In [2]:
# create different grouping
# age bins
ranges = [0, 14, 19, 24, 29, 34, 39, 44, 49, 54, 59, 64, 69, 74, 79, 84, np.inf]
age_groups = ['0-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40-44', 
              '45-49', '50-54', '55-59', '60-64', '65-69', '70-74', '75-79', '80-84', '85+']
df['Age group'] = pd.cut(df['Age'], bins = ranges, labels=age_groups)
print(df[['Age', 'Age group']].head())

# data usage bins
data_ranges = [-np.inf, 4, 9, np.inf]
data_groups = ['0-4', '5-9', '10+']
df['Data group'] = pd.cut(df['Avg Monthly GB Download'], bins = data_ranges, labels=data_groups)
print(df[['Unlimited Data Plan','Avg Monthly GB Download', 'Data group']].head())
   Age Age group
0   35     35-39
1   49     45-49
2   51     50-54
3   41     40-44
4   51     50-54
  Unlimited Data Plan  Avg Monthly GB Download Data group
0                 Yes                        3        0-4
1                 Yes                        3        0-4
2                 Yes                        3        0-4
3                 Yes                        2        0-4
4                 Yes                        3        0-4
In [3]:
# review data
print(df.describe())
print(df.head())
       Account Length (in months)  Local Calls   Local Mins   Intl Calls  \
count                 6687.000000  6687.000000  6687.000000  6687.000000   
mean                    32.337820   130.974129   322.752864    51.097524   
std                     24.595689   121.893966   288.619931   103.592369   
min                      1.000000     1.000000     4.000000     0.000000   
25%                      9.000000    31.000000    76.900000     0.000000   
50%                     29.000000    98.000000   250.500000     0.000000   
75%                     55.000000   199.000000   498.050000    52.000000   
max                     77.000000   918.000000  1234.200000  1120.000000   

         Intl Mins  Extra International Charges  Customer Service Calls  \
count  6687.000000                  6687.000000             6687.000000   
mean    130.070624                    33.641783                0.915657   
std     243.527828                    76.346828                1.411484   
min       0.000000                     0.000000                0.000000   
25%       0.000000                     0.000000                0.000000   
50%       0.000000                     0.000000                0.000000   
75%     140.400000                    16.400000                2.000000   
max    1372.500000                   585.800000                5.000000   

       Avg Monthly GB Download  Extra Data Charges          Age  \
count              6687.000000         6687.000000  6687.000000   
mean                  6.696276            3.374458    47.448632   
std                   7.454339           12.565309    16.969893   
min                   0.000000            0.000000    19.000000   
25%                   1.000000            0.000000    33.000000   
50%                   5.000000            0.000000    47.000000   
75%                   9.000000            0.000000    60.000000   
max                  43.000000           99.000000    85.000000   

       Number of Customers in Group  Monthly Charge  Total Charges  
count                   6687.000000     6687.000000    6687.000000  
mean                       0.837894       31.030357    1083.755645  
std                        1.700780       16.288147    1127.074863  
min                        0.000000        5.000000       6.000000  
25%                        0.000000       16.000000     181.000000  
50%                        0.000000       31.000000     647.000000  
75%                        0.000000       43.000000    1732.500000  
max                        6.000000       78.000000    5574.000000  
  Customer ID Churn Label  Account Length (in months)  Local Calls  \
0   4444-BZPU          No                           1            3   
1   5676-PTZX          No                          33          179   
2   8532-ZEKQ          No                          44           82   
3   1314-SMPJ          No                          10           47   
4   2956-TXCJ          No                          62          184   

   Local Mins  Intl Calls  Intl Mins Intl Active Intl Plan  \
0         8.0         0.0        0.0          No        no   
1       431.3         0.0        0.0          No        no   
2       217.6         0.0        0.0          No       yes   
3       111.6        60.0       71.0         Yes       yes   
4       621.2       310.0      694.4         Yes       yes   

   Extra International Charges  ...  Number of Customers in Group  \
0                          0.0  ...                             0   
1                          0.0  ...                             0   
2                          0.0  ...                             0   
3                          0.0  ...                             0   
4                          0.0  ...                             0   

   Device Protection & Online Backup   Contract Type  Payment Method  \
0                                 No  Month-to-Month    Direct Debit   
1                                Yes        One Year     Paper Check   
2                                Yes        One Year    Direct Debit   
3                                 No  Month-to-Month     Paper Check   
4                                 No        One Year    Direct Debit   

  Monthly Charge Total Charges Churn Category  Churn Reason Age group  \
0             10            10            NaN           NaN     35-39   
1             21           703            NaN           NaN     45-49   
2             23          1014            NaN           NaN     50-54   
3             17           177            NaN           NaN     40-44   
4             28          1720            NaN           NaN     50-54   

  Data group  
0        0-4  
1        0-4  
2        0-4  
3        0-4  
4        0-4  

[5 rows x 31 columns]
In [4]:
# review what kind of unique category values in columns
print("Churn Label =",pd.unique(df["Churn Label"]))
print("Intl Active =",pd.unique(df["Intl Active"]))
print("Intl Plan =",pd.unique(df["Intl Plan"]))
print("Group =",pd.unique(df["Group"]))
print("Contract Type =",pd.unique(df["Contract Type"]))
print("Payment Method =",pd.unique(df["Payment Method"]))
print("Churn Category =",pd.unique(df["Churn Category"]))
print("Churn Reason =",pd.unique(df["Churn Reason"]))
Churn Label = ['No' 'Yes']
Intl Active = ['No' 'Yes']
Intl Plan = ['no' 'yes']
Group = ['No' 'Yes']
Contract Type = ['Month-to-Month' 'One Year' 'Two Year']
Payment Method = ['Direct Debit' 'Paper Check' 'Credit Card']
Churn Category = [nan 'Competitor' 'Other' 'Attitude' 'Dissatisfaction' 'Price']
Churn Reason = [nan 'Competitor made better offer' 'Moved'
 'Competitor had better devices'
 'Competitor offered higher download speeds' 'Attitude of support person'
 'Network reliability' "Don't know" 'Service dissatisfaction'
 'Product dissatisfaction' 'Poor expertise of online support'
 'Price too high' 'Limited range of services'
 'Lack of affordable download/upload speed' 'Long distance charges'
 'Competitor offered more data' 'Attitude of service provider'
 'Poor expertise of phone support' 'Extra data charges' 'Deceased'
 'Lack of self-service on Website']
In [5]:
# clean up data for consistency
# add Churn column to calculate # of churned customers and churn rate
df['Intl Plan'] = df['Intl Plan'].replace(['no', 'yes'],['No','Yes'])
df['Churn'] = df['Churn Label']
df['Churn'] = df['Churn'].replace(["No", "Yes"],[0, 1])
print('# of Churned customers =', df['Churn'].sum())
print('Total # of Customers = ', total_customers)
print('Churned rate = ', ("{:.2%}".format(df['Churn'].sum() / total_customers)))
# of Churned customers = 1796
Total # of Customers =  6687
Churned rate =  26.86%
In [6]:
# this block of code fixes the issue where some churn customers do not have
# a churn reason or churn category in the original data.  i assigned those
# NaN to "No info given" so the total churn customers will be the same regardless
# if the # is calculated based on churn reason / churn category / churn label
# #learned .loc assignment can change the database in place!
condition = (df['Churn Label'] == "Yes") & (df['Churn Category'].isnull())
df.loc[condition, ['Churn Category', 'Churn Reason']] = 'No info given'
In [7]:
# review churn cateogry using groupby
churn_by_category = df.groupby("Churn Category")['Churn'].sum().reset_index()
print(churn_by_category)
fig, ax = plt.subplots(figsize=(6,6))
ax.pie(churn_by_category['Churn'], labels = churn_by_category['Churn Category'], autopct='%1.1f%%')
ax.set_title('Churn Category')

plt.show()
    Churn Category  Churn
0         Attitude    287
1       Competitor    805
2  Dissatisfaction    286
3    No info given     27
4            Other    191
5            Price    200
In [8]:
fig, ax = plt.subplots(figsize=(12,12))
print(df['Churn Reason'].value_counts())
churn_by_reason = df.groupby("Churn Reason")['Churn'].sum()
churn_by_reason_sorted = churn_by_reason.sort_values(ascending = True)
print(type(churn_by_reason_sorted))
print(churn_by_reason_sorted.sum())
churn_by_reason_sorted = churn_by_reason_sorted / churn_by_reason_sorted.sum() * 100
churn_by_reason_sorted.plot(kind='barh')
ax.set_xlabel('Percentage of total')
#ax.set_xticklabels(churn_by_reason_sorted.index, rotation = 90)
plt.show()
Competitor made better offer                 303
Competitor had better devices                297
Attitude of support person                   203
Don't know                                   123
Competitor offered more data                 110
Competitor offered higher download speeds     95
Attitude of service provider                  84
Price too high                                74
Product dissatisfaction                       73
Network reliability                           69
Long distance charges                         61
Service dissatisfaction                       60
Moved                                         44
Extra data charges                            37
Limited range of services                     35
Poor expertise of online support              30
Lack of affordable download/upload speed      28
No info given                                 27
Lack of self-service on Website               26
Poor expertise of phone support               11
Deceased                                       6
Name: Churn Reason, dtype: int64
<class 'pandas.core.series.Series'>
1796
In [9]:
# this section is to separate out churn and Age group, and add churn rate
# calculate churned customers by age group using pivot table
df_age_group = df.pivot_table(values="Churn", index = "Age group", columns = "Churn Label", aggfunc='sum').reset_index()
df_age_group.columns = ('Age group', 'drop', 'Churn customers')
# df_age_group = df_age_group['Age group', 'Churn customers']
print(df_age_group)
# all customers by age group
customer_by_age_group = df.groupby('Age group')['Customer ID'].count().reset_index()
customer_by_age_group.columns = ('Age group', 'Number of customers')

# avg customer service call by age group
avg_service_call_by_age_group = df.groupby('Age group')['Customer Service Calls'].mean().reset_index()
# avg monthly bill by age group
avg_monthly_bill_by_age_group = df.groupby('Age group')['Monthly Charge'].mean().reset_index()

print(avg_service_call_by_age_group)
print(avg_monthly_bill_by_age_group)

df_age_group = df_age_group.merge(customer_by_age_group, how = 'left', left_on = 'Age group', right_on =
                "Age group").merge(avg_service_call_by_age_group, how = 'left', left_on = 'Age group', 
                right_on = "Age group").merge(avg_monthly_bill_by_age_group, how = 'left', left_on = 'Age group',
                right_on = "Age group")


df_age_group = df_age_group[['Age group', 'Churn customers', 'Number of customers', 'Monthly Charge', 'Customer Service Calls']]
print(df_age_group['Number of customers'].sum())
df_age_group['Churn customers'] = df_age_group['Churn customers'].fillna(0)
df_age_group['Churn rate in %'] = df_age_group['Churn customers'] / df_age_group['Number of customers'] * 100
print(df_age_group)

# plot dual axis plot with churn rate on one side and # of customers on the other
fig, ax1 = plt.subplots()

color = 'tab:blue'
ax1.set_xlabel('Age group')
ax1.xaxis.set(ticks=range(0, 16), ticklabels=df_age_group['Age group'])
ax1.tick_params(labelrotation=45)
ax1.set_ylabel('# of customers', color=color)
ax1.bar(df_age_group.index, df_age_group['Number of customers'], color=color)
ax1.tick_params(axis='y', labelcolor=color)

# instantiate a second axes that shares the same x-axis
ax2 = ax1.twinx()  

color = 'tab:red'

ax2.set_ylabel('Percentage %', color=color)
ax2.plot(df_age_group.index, df_age_group['Churn rate in %'], color=color)
ax2.tick_params(axis='y', labelcolor=color)


# otherwise the right y-label is slightly clipped
fig.tight_layout()

plt.show()
   Age group  drop  Churn customers
0      15-19     0                7
1      20-24     0              125
2      25-29     0              164
3      30-34     0              134
4      35-39     0              134
5      40-44     0              157
6      45-49     0              172
7      50-54     0              129
8      55-59     0              158
9      60-64     0              133
10     65-69     0              158
11     70-74     0              132
12     75-79     0              104
13     80-84     0               76
14       85+     0               13
   Age group  Customer Service Calls
0       0-14                     NaN
1      15-19                0.600000
2      20-24                0.866785
3      25-29                0.829377
4      30-34                0.849638
5      35-39                0.841402
6      40-44                0.891761
7      45-49                0.857349
8      50-54                0.806569
9      55-59                0.855462
10     60-64                0.899441
11     65-69                1.118143
12     70-74                1.066445
13     75-79                1.292419
14     80-84                1.329609
15       85+                1.800000
   Age group  Monthly Charge
0       0-14             NaN
1      15-19       28.600000
2      20-24       28.699822
3      25-29       30.336795
4      30-34       29.199275
5      35-39       29.499165
6      40-44       29.508885
7      45-49       28.576369
8      50-54       29.655109
9      55-59       30.243697
10     60-64       30.756052
11     65-69       33.822785
12     70-74       40.518272
13     75-79       38.097473
14     80-84       39.653631
15       85+       37.520000
6687
   Age group  Churn customers  Number of customers  Monthly Charge  \
0      15-19                7                   50       28.600000   
1      20-24              125                  563       28.699822   
2      25-29              164                  674       30.336795   
3      30-34              134                  552       29.199275   
4      35-39              134                  599       29.499165   
5      40-44              157                  619       29.508885   
6      45-49              172                  694       28.576369   
7      50-54              129                  548       29.655109   
8      55-59              158                  595       30.243697   
9      60-64              133                  537       30.756052   
10     65-69              158                  474       33.822785   
11     70-74              132                  301       40.518272   
12     75-79              104                  277       38.097473   
13     80-84               76                  179       39.653631   
14       85+               13                   25       37.520000   

    Customer Service Calls  Churn rate in %  
0                 0.600000        14.000000  
1                 0.866785        22.202487  
2                 0.829377        24.332344  
3                 0.849638        24.275362  
4                 0.841402        22.370618  
5                 0.891761        25.363489  
6                 0.857349        24.783862  
7                 0.806569        23.540146  
8                 0.855462        26.554622  
9                 0.899441        24.767225  
10                1.118143        33.333333  
11                1.066445        43.853821  
12                1.292419        37.545126  
13                1.329609        42.458101  
14                1.800000        52.000000  
In [10]:
# plot dual axis plot with churn rate on one side and monthly charge on the other
fig, ax1 = plt.subplots()

color = 'tab:blue'
ax1.set_xlabel('Age group')
ax1.xaxis.set(ticks=range(0, 16), ticklabels=df_age_group['Age group'])
ax1.tick_params(labelrotation=45)
ax1.set_ylabel('Avg. monthly charge $', color=color)
ax1.bar(df_age_group.index, df_age_group['Monthly Charge'], color=color)
ax1.tick_params(axis='y', labelcolor=color)

# instantiate a second axes that shares the same x-axis
ax2 = ax1.twinx()  

color = 'tab:red'

ax2.set_ylabel('Percentage %', color=color)
ax2.plot(df_age_group.index, df_age_group['Churn rate in %'], color=color)
ax2.tick_params(axis='y', labelcolor=color)


# otherwise the right y-label is slightly clipped
fig.tight_layout()

plt.show()
In [11]:
# group by senior vs. non-senior
churn_by_senior = df.groupby('Senior')['Churn'].sum()
customer_by_senior = df.groupby('Senior')['Customer ID'].count()
df_senior = pd.concat([churn_by_senior, customer_by_senior], axis = 1).reset_index()
df_senior['Churn rate %'] = df_senior['Churn'] / df_senior['Customer ID'] * 100
df_senior.columns = ['Senior', 'Churn', '# of customers', 'Churn rate %']
df_senior['% of total'] = df_senior['# of customers'] / df_senior['# of customers'].sum() * 100
df_senior = df_senior[['Senior', '# of customers', '% of total', 'Churn', 'Churn rate %']]
print(df_senior)
  Senior  # of customers  % of total  Churn  Churn rate %
0     No            5460   81.650965   1327     24.304029
1    Yes            1227   18.349035    469     38.223309
In [12]:
Group_customer = df.groupby("Number of Customers in Group")['Customer ID'].count()
Group_churn = df.groupby("Number of Customers in Group")['Churn'].sum()
Group_avg_call = df.groupby("Number of Customers in Group")['Customer Service Calls'].mean()
Group_avg_mo_charge = df.groupby("Number of Customers in Group")['Monthly Charge'].mean()

df_Group = pd.concat([Group_customer, Group_churn, Group_avg_call, Group_avg_mo_charge], axis = 1)
df_Group.columns = ["# of customers", "Churn", "Avg. service calls", "Avg. monthly charge"]
df_Group.index = ["No group", "2", "3", "4", "5", "6" ]
df_Group["Churn rate %"] = df_Group["Churn"] / df_Group["# of customers"] * 100
print(df_Group)

# note this is cleaner execution to combine all the info, instead of using using pivot and add groupby columns

fig, ax1 = plt.subplots()

color = 'tab:blue'
ax1.set_xlabel('# of customers in a group')
ax1.xaxis.set(ticks=range(0, 5), ticklabels=df_Group.index)
# ax1.tick_params(labelrotation=45)
ax1.set_ylabel('# of customers', color=color)
ax1.bar(df_Group.index, df_Group['# of customers'], color=color)
ax1.tick_params(axis='y', labelcolor=color)

# instantiate a second axes that shares the same x-axis
ax2 = ax1.twinx()  

color = 'tab:red'

ax2.set_ylabel('Churn percentage', color=color)
ax2.plot(df_Group.index, df_Group['Churn rate %'], color=color)
ax2.tick_params(axis='y', labelcolor=color)


# otherwise the right y-label is slightly clipped
#fig.tight_layout()

plt.show()
          # of customers  Churn  Avg. service calls  Avg. monthly charge  \
No group            5166   1697            1.044328            33.493999   
2                    492     33            0.479675            22.504065   
3                    270     16            0.507407            22.714815   
4                    254     15            0.405512            22.614173   
5                    237     20            0.459916            23.139241   
6                    268     15            0.533582            22.526119   

          Churn rate %  
No group     32.849400  
2             6.707317  
3             5.925926  
4             5.905512  
5             8.438819  
6             5.597015  
In [13]:
# service call by # of customers in a group 

print(df_Group)

fig, ax1 = plt.subplots()

color = 'tab:blue'
ax1.set_xlabel('# of customers in a group')
ax1.xaxis.set(ticks=range(0, 5), ticklabels=df_Group.index)
# ax1.tick_params(labelrotation=45)
ax1.set_ylabel('Avg. service calls', color=color)
ax1.bar(df_Group.index, df_Group['Avg. service calls'], color=color)
ax1.tick_params(axis='y', labelcolor=color)

# instantiate a second axes that shares the same x-axis
ax2 = ax1.twinx()  

color = 'tab:red'

ax2.set_ylabel('Churn percentage %', color=color)
ax2.plot(df_Group.index, df_Group['Churn rate %'], color=color)
ax2.tick_params(axis='y', labelcolor=color)


# otherwise the right y-label is slightly clipped
fig.tight_layout()

plt.show()
          # of customers  Churn  Avg. service calls  Avg. monthly charge  \
No group            5166   1697            1.044328            33.493999   
2                    492     33            0.479675            22.504065   
3                    270     16            0.507407            22.714815   
4                    254     15            0.405512            22.614173   
5                    237     20            0.459916            23.139241   
6                    268     15            0.533582            22.526119   

          Churn rate %  
No group     32.849400  
2             6.707317  
3             5.925926  
4             5.905512  
5             8.438819  
6             5.597015  
In [14]:
# review customer and churn by contract type
print('Customer contract type - \n', df['Contract Type'].value_counts())
customer_by_contract = df.groupby("Contract Type")['Customer ID'].count()
fig = plt.figure(figsize=(12,12))
ax1 = fig.add_subplot(121)
ax1.pie(customer_by_contract, labels = customer_by_contract.index, autopct ='%1.1f%%')
ax1.set(title = 'All customers contract type')

churn_by_contract = df.groupby("Contract Type")['Churn'].sum()
print('Churned customer contract type - \n', churn_by_contract)
ax2 = fig.add_subplot(122)
ax2.pie(churn_by_contract, labels = churn_by_contract.index, autopct='%1.1f%%')
ax2.set(title = 'Churned customers contract type')

plt.show()
Customer contract type - 
 Month-to-Month    3411
Two Year          1797
One Year          1479
Name: Contract Type, dtype: int64
Churned customer contract type - 
 Contract Type
Month-to-Month    1579
One Year           167
Two Year            50
Name: Churn, dtype: int64
In [15]:
#review churn category / reason composition
df_churn_group = pd.DataFrame(df.groupby(['Churn Category','Churn Reason'])['Churn'].sum())

df_churn_group['% of total'] = df_churn_group['Churn'] / df_churn_group['Churn'].sum() * 100
print(df_churn_group)
                                                           Churn  % of total
Churn Category  Churn Reason                                                
Attitude        Attitude of service provider                  84    4.677060
                Attitude of support person                   203   11.302895
Competitor      Competitor had better devices                297   16.536748
                Competitor made better offer                 303   16.870824
                Competitor offered higher download speeds     95    5.289532
                Competitor offered more data                 110    6.124722
Dissatisfaction Lack of self-service on Website               26    1.447661
                Limited range of services                     35    1.948775
                Network reliability                           69    3.841871
                Poor expertise of online support              12    0.668151
                Poor expertise of phone support               11    0.612472
                Product dissatisfaction                       73    4.064588
                Service dissatisfaction                       60    3.340757
No info given   No info given                                 27    1.503341
Other           Deceased                                       6    0.334076
                Don't know                                   123    6.848552
                Moved                                         44    2.449889
                Poor expertise of online support              18    1.002227
Price           Extra data charges                            37    2.060134
                Lack of affordable download/upload speed      28    1.559020
                Long distance charges                         61    3.396437
                Price too high                                74    4.120267
In [16]:
#customer by state review
state_customer = df.groupby(['State'])['Customer ID'].count()
state_churn = df.groupby(['State'])['Churn'].sum()
df_by_state = pd.concat([state_customer, state_churn], axis = 1)
df_by_state.columns = (['# of customers', 'Churn'])
df_by_state['Churn rate %'] = df_by_state['Churn'] / df_by_state['# of customers'] * 100
df_by_state.sort_values('Churn rate %', inplace=True, ascending=False)
print(df_by_state.head())
df_by_state.sort_values('# of customers', inplace=True, ascending=False)
print(df_by_state.head())
       # of customers  Churn  Churn rate %
State                                     
CA                 68     43     63.235294
OH                158     55     34.810127
PA                 90     30     33.333333
MD                140     46     32.857143
NE                122     40     32.786885
       # of customers  Churn  Churn rate %
State                                     
WV                213     57     26.760563
MN                168     38     22.619048
NY                167     39     23.353293
AL                161     46     28.571429
OH                158     55     34.810127
In [17]:
# international plan and international call active review
intl_plan_customer = df.groupby(['Intl Plan', 'Intl Active'])['Customer ID'].count()
intl_plan_churn = df.groupby(['Intl Plan', 'Intl Active'])['Churn'].sum()
intl_plan_ext_charge = df.groupby(['Intl Plan', 'Intl Active'])['Extra International Charges'].mean()
intl_plan_monthly_bill = df.groupby(['Intl Plan', 'Intl Active'])['Monthly Charge'].mean()
df_plan = pd.concat([intl_plan_customer, intl_plan_churn, intl_plan_ext_charge, intl_plan_monthly_bill], axis = 1)
df_plan['Churn rate'] = df_plan['Churn'] / df_plan['Customer ID'] * 100
df_plan.columns = ['# of customers', 'Churn', 'Avg. extra Intl charge', 'Avg. monthly charge', 'Churn rate %']
print(df_plan)
                       # of customers  Churn  Avg. extra Intl charge  \
Intl Plan Intl Active                                                  
No        No                     3939    788                1.691978   
          Yes                    2097    846              104.100095   
Yes       No                      177    126                0.000000   
          Yes                     474     36                0.000000   

                       Avg. monthly charge  Churn rate %  
Intl Plan Intl Active                                     
No        No                     30.639502     20.005077  
          Yes                    32.182165     40.343348  
Yes       No                     33.118644     71.186441  
          Yes                    28.402954      7.594937  
In [18]:
df_plan.loc[:,'Churn rate %'].reset_index()
Out[18]:
Intl Plan Intl Active Churn rate %
0 No No 20.005077
1 No Yes 40.343348
2 Yes No 71.186441
3 Yes Yes 7.594937
In [19]:
# review data usage group in relation to data plan
data_plan_customer = df.groupby(['Data group','Unlimited Data Plan'])['Customer ID'].count()
data_plan_churn = df.groupby(['Data group','Unlimited Data Plan'])['Churn'].sum()
data_plan_data_usage = df.groupby(['Data group','Unlimited Data Plan'])['Avg Monthly GB Download'].mean()
data_plan_ext_charge = df.groupby(['Data group','Unlimited Data Plan'])['Extra Data Charges'].mean()
data_plan_avg_charge = df.groupby(['Data group','Unlimited Data Plan'])['Monthly Charge'].mean()
df_data = pd.concat([data_plan_customer, data_plan_churn, data_plan_data_usage, data_plan_ext_charge,
                     data_plan_avg_charge], axis = 1)
df_data.columns = ['# of customers', 'Churn', 'Avg. monthly GB download', 'Avg. extra data charges', 'Avg. monthly charges']
df_data['Churn rate %'] = df_data['Churn'] / df_data['# of customers'] * 100
print(df_data)
                                # of customers  Churn  \
Data group Unlimited Data Plan                          
0-4        No                             1714    211   
           Yes                            1524    529   
5-9        No                              236     75   
           Yes                            1552    521   
10+        No                              243     67   
           Yes                            1418    393   

                                Avg. monthly GB download  \
Data group Unlimited Data Plan                             
0-4        No                                   0.391482   
           Yes                                  2.542651   
5-9        No                                   6.567797   
           Yes                                  6.605026   
10+        No                                  17.024691   
           Yes                                 17.132581   

                                Avg. extra data charges  Avg. monthly charges  \
Data group Unlimited Data Plan                                                  
0-4        No                                  4.341890             13.857060   
           Yes                                 0.000000             37.725722   
5-9        No                                 31.966102             37.741525   
           Yes                                 0.000000             37.207474   
10+        No                                 31.189300             36.160494   
           Yes                                 0.000000             35.835684   

                                Churn rate %  
Data group Unlimited Data Plan                
0-4        No                      12.310385  
           Yes                     34.711286  
5-9        No                      31.779661  
           Yes                     33.569588  
10+        No                      27.572016  
           Yes                     27.715092  
In [20]:
df_temp = df_data.reset_index()
print(df_temp)
fig = plt.figure(figsize=(6,6))
colors = ('#1f77b4', 'orange')

ax1 = fig.add_subplot(131)
ax1.set_xlabel('Unlimited plan \n 0-4 GB')
ax1.set_ylabel('Churn %')
ax1.bar(df_temp[df_temp['Data group'] == '0-4']['Unlimited Data Plan'], df_temp[df_temp['Data group'] == '0-4']
        ['Churn rate %'], color=colors)
ax1.tick_params(axis='y')
ax1.set_ylim(0, 35)

ax2 = fig.add_subplot(132)
ax2.set_xlabel('Unlimited plan \n 5-9 GB')
ax2.bar(df_temp[df_temp['Data group'] == '5-9']['Unlimited Data Plan'], df_temp[df_temp['Data group'] == '5-9']
        ['Churn rate %'], color=colors)
ax2.set_ylim(0, 35)

ax3 = fig.add_subplot(133)
ax3.set_xlabel('Unlimited plan \n 10+ GB')
ax3.bar(df_temp[df_temp['Data group'] == '10+']['Unlimited Data Plan'], df_temp[df_temp['Data group'] == '10+']
        ['Churn rate %'], color=colors)
ax3.set_ylim(0, 35)

plt.suptitle('Avg. monthly data usage')
plt.show()
  Data group Unlimited Data Plan  # of customers  Churn  \
0        0-4                  No            1714    211   
1        0-4                 Yes            1524    529   
2        5-9                  No             236     75   
3        5-9                 Yes            1552    521   
4        10+                  No             243     67   
5        10+                 Yes            1418    393   

   Avg. monthly GB download  Avg. extra data charges  Avg. monthly charges  \
0                  0.391482                 4.341890             13.857060   
1                  2.542651                 0.000000             37.725722   
2                  6.567797                31.966102             37.741525   
3                  6.605026                 0.000000             37.207474   
4                 17.024691                31.189300             36.160494   
5                 17.132581                 0.000000             35.835684   

   Churn rate %  
0     12.310385  
1     34.711286  
2     31.779661  
3     33.569588  
4     27.572016  
5     27.715092  
In [21]:
# review detail unlimited data plan churn reason
data_plan_churn = df.groupby(['Unlimited Data Plan','Churn Reason'])['Churn'].sum().reset_index()
data_plan_churn['Churn rate %'] = data_plan_churn['Churn'] / total_customers * 100
print(data_plan_churn)
   Unlimited Data Plan                               Churn Reason  Churn  \
0                   No               Attitude of service provider     18   
1                   No                 Attitude of support person     53   
2                   No              Competitor had better devices     40   
3                   No               Competitor made better offer     65   
4                   No  Competitor offered higher download speeds     15   
5                   No               Competitor offered more data     21   
6                   No                                 Don't know     13   
7                   No                         Extra data charges      6   
8                   No   Lack of affordable download/upload speed      2   
9                   No            Lack of self-service on Website      4   
10                  No                  Limited range of services      7   
11                  No                      Long distance charges     34   
12                  No                                      Moved      9   
13                  No                        Network reliability     12   
14                  No                              No info given      4   
15                  No           Poor expertise of online support      4   
16                  No            Poor expertise of phone support      2   
17                  No                             Price too high     18   
18                  No                    Product dissatisfaction     13   
19                  No                    Service dissatisfaction     13   
20                 Yes               Attitude of service provider     66   
21                 Yes                 Attitude of support person    150   
22                 Yes              Competitor had better devices    257   
23                 Yes               Competitor made better offer    238   
24                 Yes  Competitor offered higher download speeds     80   
25                 Yes               Competitor offered more data     89   
26                 Yes                                   Deceased      6   
27                 Yes                                 Don't know    110   
28                 Yes                         Extra data charges     31   
29                 Yes   Lack of affordable download/upload speed     26   
30                 Yes            Lack of self-service on Website     22   
31                 Yes                  Limited range of services     28   
32                 Yes                      Long distance charges     27   
33                 Yes                                      Moved     35   
34                 Yes                        Network reliability     57   
35                 Yes                              No info given     23   
36                 Yes           Poor expertise of online support     26   
37                 Yes            Poor expertise of phone support      9   
38                 Yes                             Price too high     56   
39                 Yes                    Product dissatisfaction     60   
40                 Yes                    Service dissatisfaction     47   

    Churn rate %  
0       0.269179  
1       0.792583  
2       0.598176  
3       0.972035  
4       0.224316  
5       0.314042  
6       0.194407  
7       0.089726  
8       0.029909  
9       0.059818  
10      0.104681  
11      0.508449  
12      0.134590  
13      0.179453  
14      0.059818  
15      0.059818  
16      0.029909  
17      0.269179  
18      0.194407  
19      0.194407  
20      0.986990  
21      2.243158  
22      3.843278  
23      3.559145  
24      1.196351  
25      1.330941  
26      0.089726  
27      1.644983  
28      0.463586  
29      0.388814  
30      0.328997  
31      0.418723  
32      0.403769  
33      0.523404  
34      0.852400  
35      0.343951  
36      0.388814  
37      0.134590  
38      0.837446  
39      0.897263  
40      0.702856  
In [22]:
#review relationship between contract type, payment method versus account duration
contract_length = df.groupby(['Contract Type','Payment Method'])['Account Length (in months)'].mean()
contract_churn = df.groupby(['Contract Type','Payment Method'])['Churn'].sum()
contract_customer = df.groupby(['Contract Type','Payment Method'])['Customer ID'].count()
contract_monthly_charge = df.groupby(['Contract Type','Payment Method'])['Monthly Charge'].mean()
contract_df = pd.concat([contract_customer, contract_churn, contract_length, contract_monthly_charge], 
                        axis = 1).reset_index()
contract_df.columns = ['Contract Type', 'Payment Method', "# of Customers", 'Churn', 'Avg. Acct Length', 
                       'Avg. Monthly Charge']
contract_df['Churn rate %'] = contract_df['Churn'] / contract_df['# of Customers'] * 100
print(contract_df)
fig,ax1 = plt.subplots()

ax1.scatter(contract_df['Avg. Acct Length'], contract_df['Churn rate %'], 
            s = contract_df['Churn'], color =['red', 'red', 'red', 'blue', 'blue', 'blue', 'violet', 'violet', 'violet'])

ax1.set_ylabel('Churn rate %')
ax1.text(10, 20,'Month-to-Month', color = 'red')
ax1.text(35, 20,'One Year', color = 'blue')
ax1.text(50, 5,'Two Year', color = 'blue')
ax1.set_title('Churn rate by account length by payment type')

ax2 = ax1.twinx()
ax2.scatter(contract_df['Avg. Acct Length'], contract_df['Avg. Monthly Charge'], color = 'orange')
ax2.set_ylabel('Avg. monthly charge $', color = 'orange')

plt.show()
    Contract Type Payment Method  # of Customers  Churn  Avg. Acct Length  \
0  Month-to-Month    Credit Card            1069    309         15.928906   
1  Month-to-Month   Direct Debit            2117   1141         18.745867   
2  Month-to-Month    Paper Check             225    129          8.155556   
3        One Year    Credit Card             645     51         38.424806   
4        One Year   Direct Debit             755    104         44.303311   
5        One Year    Paper Check              79     12         30.620253   
6        Two Year    Credit Card             900     18         52.872222   
7        Two Year   Direct Debit             830     32         56.191566   
8        Two Year    Paper Check              67      0         42.074627   

   Avg. Monthly Charge  Churn rate %  
0            25.854069     28.905519  
1            35.690600     53.897024  
2            23.386667     57.333333  
3            27.555039      7.906977  
4            34.554967     13.774834  
5            22.240506     15.189873  
6            26.456667      2.000000  
7            34.101205      3.855422  
8            19.537313      0.000000  

No comments:

Post a Comment

Google Data Analytics capstone project

Google Data Analytics capstone project

I decided to use blog as the venue to publish my Google Data Analytics learning because I wasn't sure how I can upload the findings, the...