In [1]:
# this section of codes read 3 sheets in an excel file from the same directory
# the two sheets are combined together and same item is added together
# still need to figure out why dtype in read_excel doesn't work properly without any error message
# - dtype didn't read in properly because there are empty rows in the cells (Null / NaN)
# note that when there isn't any Null value, the cells / data types are consistent across multiple sheets
# 11/9/2022
import os
print(os.getcwd())
dirname = 'c:\\MLcourse\\HJ project'    
os.chdir(dirname) 
print(os.getcwd())
import pandas as pd
import numpy as np
file_name = "Multi_sheet.xlsx"
df = pd.read_excel(file_name, sheet_name=[0,1,2], header=1, usecols="B:E")
# if sheet_name=None, all sheets will be read in as part of the dictionary
# all sheets are stored in the dictionary and can be assessed using key / df[key]    
df_add = pd.DataFrame()
print('# of items in df ->', len(df))
for key in df:
    print(key, "\n", df[key])
    df_add = df_add.append(df[key], ignore_index=True)
# drop Null rows and convert the data types
df_add.dropna(how='all',inplace=True)
df_add = df_add.astype({'Item #':'int64', 'Item description':'str', 'UPC code':'str', 'Quantity':'int64'})
# group items together and add up all the same item # quantity
df_group = df_add.groupby(['Item #','Item description','UPC code']).sum()
print(df_group)
C:\MLCourse\HJ project
C:\MLCourse\HJ project
# of items in df -> 3
0 
       Item # Item description      UPC code  Quantity
0   862082.0     basic car #1  5.813628e+11       4.0
1   582646.0     basic car #2  2.996459e+11       2.0
2   740363.0     basic car #3  4.547111e+11       3.0
3   165209.0     basic car #4  7.560307e+11       1.0
4   745818.0     basic car #5  3.520311e+11       3.0
5   618993.0     basic car #6  2.550729e+11       5.0
6        NaN              NaN           NaN       NaN
7   703883.0     delux car #1  5.642937e+11       3.0
8   173254.0     delux car #2  9.561079e+10       2.0
9   646018.0     delux car #3  8.003565e+11       6.0
10  529847.0     delux car #4  6.777414e+11       3.0
11   56151.0     delux car #5  2.268907e+11       2.0
12  309874.0     delux car #6  5.877168e+11       1.0
1 
       Item # Item description      UPC code  Quantity
0   862082.0     basic car #1  5.813628e+11       3.0
1   582646.0     basic car #2  2.996459e+11       5.0
2   740363.0     basic car #3  4.547111e+11       3.0
3   165209.0     basic car #4  7.560307e+11       3.0
4   745818.0     basic car #5  3.520311e+11       6.0
5   618993.0     basic car #6  2.550729e+11       4.0
6        NaN              NaN           NaN       NaN
7   703883.0     delux car #1  5.642937e+11       1.0
8   173254.0     delux car #2  9.561079e+10       3.0
9   646018.0     delux car #3  8.003565e+11       5.0
10  529847.0     delux car #4  6.777414e+11       6.0
11   56151.0     delux car #5  2.268907e+11       7.0
12  309874.0     delux car #6  5.877168e+11       8.0
13       NaN              NaN           NaN       NaN
14  555988.0   special car #1  8.947164e+11       3.0
2 
     Item # Item description      UPC code  Quantity
0    56151     delux car #5  226890748623         9
1   165209     basic car #4  756030704398         4
2   173254     delux car #2   95610786143         5
3   309874     delux car #6  587716839953         9
4   529847     delux car #4  677741438376         9
5   555988   special car #1  894716423380         3
6   582646     basic car #2  299645942550         7
7   618993     basic car #6  255072922341         9
8   646018     delux car #3  800356544694        11
9   703883     delux car #1  564293728345         4
10  740363     basic car #3  454711139547         6
11  745818     basic car #5  352031103297         9
12  862082     basic car #1  581362817884         7
                                        Quantity
Item # Item description UPC code                
56151  delux car #5     226890748623.0        18
165209 basic car #4     756030704398.0         8
173254 delux car #2     95610786143.0         10
309874 delux car #6     587716839953.0        18
529847 delux car #4     677741438376.0        18
555988 special car #1   894716423380.0         6
582646 basic car #2     299645942550.0        14
618993 basic car #6     255072922341.0        18
646018 delux car #3     800356544694.0        22
703883 delux car #1     564293728345.0         8
740363 basic car #3     454711139547.0        12
745818 basic car #5     352031103297.0        18
862082 basic car #1     581362817884.0        14
C:\Users\guesswho\AppData\Local\Temp\ipykernel_21936\421086600.py:26: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. df_add = df_add.append(df[key], ignore_index=True) C:\Users\guesswho\AppData\Local\Temp\ipykernel_21936\421086600.py:26: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. df_add = df_add.append(df[key], ignore_index=True) C:\Users\guesswho\AppData\Local\Temp\ipykernel_21936\421086600.py:26: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. df_add = df_add.append(df[key], ignore_index=True)
In [2]:
# this sections reads all the excel files in the directory and combine them
# get current working directory, then change the current working directory to c:\MLcourse\HJ project\experiement
# import all .xlsx files into df_new and group_by Item #
import pandas as pd
import numpy as np
import os
print(os.getcwd())
    
dirname = 'c:\\MLcourse\\HJ project\\experiement'
output_file = "combined.xlsx"
os.chdir(dirname) 
print(os.getcwd())
ext = ('.xlsx')
df_new = pd.DataFrame()
for files in os.listdir(dirname):
    if files.endswith(ext):
        print("file name - ", files)  # printing file name of desired extension
        df_new = df_new.append(pd.read_excel(files, header=1, usecols="B:E"), ignore_index=True)
    else:
        continue
print(df_new.dtypes)
df_new.dropna(how='all',inplace=True)
df_new = df_new.astype({'Item #':'int64', 'Item description':'str', 'UPC code':'str', 'Quantity':'int64'})
print("converted df_new", '\n', df_new)
print(df_new.dtypes)
df_new_group = df_new.groupby(['Item #','Item description','UPC code']).sum()
print(df_new_group)
C:\MLCourse\HJ project
c:\MLcourse\HJ project\experiement
file name -  single-1.xlsx
file name -  single-2.xlsx
file name -  single-3.xlsx
Item #              float64
Item description     object
UPC code            float64
Quantity            float64
dtype: object
converted df_new 
     Item # Item description        UPC code  Quantity
0   862082     basic car #1  581362817884.0         4
1   582646     basic car #2  299645942550.0         2
2   740363     basic car #3  454711139547.0         3
3   165209     basic car #4  756030704398.0         1
4   745818     basic car #5  352031103297.0         3
5   618993     basic car #6  255072922341.0         5
7   703883     delux car #1  564293728345.0         3
8   173254     delux car #2   95610786143.0         2
9   646018     delux car #3  800356544694.0         6
10  529847     delux car #4  677741438376.0         3
11   56151     delux car #5  226890748623.0         2
12  309874     delux car #6  587716839953.0         1
13  862082     basic car #1  581362817884.0         3
14  582646     basic car #2  299645942550.0         5
15  740363     basic car #3  454711139547.0         3
16  165209     basic car #4  756030704398.0         3
17  745818     basic car #5  352031103297.0         6
18  618993     basic car #6  255072922341.0         4
20  703883     delux car #1  564293728345.0         1
21  173254     delux car #2   95610786143.0         3
22  646018     delux car #3  800356544694.0         5
23  529847     delux car #4  677741438376.0         6
24   56151     delux car #5  226890748623.0         7
25  309874     delux car #6  587716839953.0         8
27  555988   special car #1  894716423380.0         3
28   56151     delux car #5  226890748623.0         9
29  165209     basic car #4  756030704398.0         4
30  173254     delux car #2   95610786143.0         5
31  309874     delux car #6  587716839953.0         9
32  529847     delux car #4  677741438376.0         9
33  555988   special car #1  894716423380.0         3
34  582646     basic car #2  299645942550.0         7
35  618993     basic car #6  255072922341.0         9
36  646018     delux car #3  800356544694.0        11
37  703883     delux car #1  564293728345.0         4
38  740363     basic car #3  454711139547.0         6
39  745818     basic car #5  352031103297.0         9
40  862082     basic car #1  581362817884.0         7
Item #               int64
Item description    object
UPC code            object
Quantity             int64
dtype: object
                                        Quantity
Item # Item description UPC code                
56151  delux car #5     226890748623.0        18
165209 basic car #4     756030704398.0         8
173254 delux car #2     95610786143.0         10
309874 delux car #6     587716839953.0        18
529847 delux car #4     677741438376.0        18
555988 special car #1   894716423380.0         6
582646 basic car #2     299645942550.0        14
618993 basic car #6     255072922341.0        18
646018 delux car #3     800356544694.0        22
703883 delux car #1     564293728345.0         8
740363 basic car #3     454711139547.0        12
745818 basic car #5     352031103297.0        18
862082 basic car #1     581362817884.0        14
C:\Users\guesswho\AppData\Local\Temp\ipykernel_21936\2300699762.py:21: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. df_new = df_new.append(pd.read_excel(files, header=1, usecols="B:E"), ignore_index=True) C:\Users\guesswho\AppData\Local\Temp\ipykernel_21936\2300699762.py:21: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. df_new = df_new.append(pd.read_excel(files, header=1, usecols="B:E"), ignore_index=True) C:\Users\guesswho\AppData\Local\Temp\ipykernel_21936\2300699762.py:21: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. df_new = df_new.append(pd.read_excel(files, header=1, usecols="B:E"), ignore_index=True)
In [3]:
df_new
Out[3]:
| Item # | Item description | UPC code | Quantity | |
|---|---|---|---|---|
| 0 | 862082 | basic car #1 | 581362817884.0 | 4 | 
| 1 | 582646 | basic car #2 | 299645942550.0 | 2 | 
| 2 | 740363 | basic car #3 | 454711139547.0 | 3 | 
| 3 | 165209 | basic car #4 | 756030704398.0 | 1 | 
| 4 | 745818 | basic car #5 | 352031103297.0 | 3 | 
| 5 | 618993 | basic car #6 | 255072922341.0 | 5 | 
| 7 | 703883 | delux car #1 | 564293728345.0 | 3 | 
| 8 | 173254 | delux car #2 | 95610786143.0 | 2 | 
| 9 | 646018 | delux car #3 | 800356544694.0 | 6 | 
| 10 | 529847 | delux car #4 | 677741438376.0 | 3 | 
| 11 | 56151 | delux car #5 | 226890748623.0 | 2 | 
| 12 | 309874 | delux car #6 | 587716839953.0 | 1 | 
| 13 | 862082 | basic car #1 | 581362817884.0 | 3 | 
| 14 | 582646 | basic car #2 | 299645942550.0 | 5 | 
| 15 | 740363 | basic car #3 | 454711139547.0 | 3 | 
| 16 | 165209 | basic car #4 | 756030704398.0 | 3 | 
| 17 | 745818 | basic car #5 | 352031103297.0 | 6 | 
| 18 | 618993 | basic car #6 | 255072922341.0 | 4 | 
| 20 | 703883 | delux car #1 | 564293728345.0 | 1 | 
| 21 | 173254 | delux car #2 | 95610786143.0 | 3 | 
| 22 | 646018 | delux car #3 | 800356544694.0 | 5 | 
| 23 | 529847 | delux car #4 | 677741438376.0 | 6 | 
| 24 | 56151 | delux car #5 | 226890748623.0 | 7 | 
| 25 | 309874 | delux car #6 | 587716839953.0 | 8 | 
| 27 | 555988 | special car #1 | 894716423380.0 | 3 | 
| 28 | 56151 | delux car #5 | 226890748623.0 | 9 | 
| 29 | 165209 | basic car #4 | 756030704398.0 | 4 | 
| 30 | 173254 | delux car #2 | 95610786143.0 | 5 | 
| 31 | 309874 | delux car #6 | 587716839953.0 | 9 | 
| 32 | 529847 | delux car #4 | 677741438376.0 | 9 | 
| 33 | 555988 | special car #1 | 894716423380.0 | 3 | 
| 34 | 582646 | basic car #2 | 299645942550.0 | 7 | 
| 35 | 618993 | basic car #6 | 255072922341.0 | 9 | 
| 36 | 646018 | delux car #3 | 800356544694.0 | 11 | 
| 37 | 703883 | delux car #1 | 564293728345.0 | 4 | 
| 38 | 740363 | basic car #3 | 454711139547.0 | 6 | 
| 39 | 745818 | basic car #5 | 352031103297.0 | 9 | 
| 40 | 862082 | basic car #1 | 581362817884.0 | 7 | 
In [4]:
df_new.to_excel(output_file, index=False)
No comments:
Post a Comment