# -*- coding: utf-8 -*- import pandas as pd import numpy as np from openpyxl import load_workbook month_dict = { '01': 'Jan.', '02': 'Feb.', '03': 'Mar.', '04': 'April', '05': 'May', '06': 'June', '07': 'July', '08': 'Aug.', '09': 'Sep.', '10': 'Oct.', '11': 'Nov.', '12': 'Dec.' } class Custom_Excel(): def __init__(self, *args, **kwargs): self.path = kwargs.get('path', None) self.prd_li = kwargs.get('prd_li', []) self.non_prd_li = kwargs.get('non_prd_li', []) self.name = kwargs.get('name', '') def get_data(self): df = pd.DataFrame() self.month = month_dict.get(self.path[0][-6:-4]) for csv in self.path: csv_data = pd.read_csv( csv, sep=',', encoding='utf-8', skiprows=[0, ], dtype={'LinkedAccountId': np.object}, ) df = pd.concat([df, csv_data], axis=0) df.reset_index(drop=True) df['UsageType'] = df['UsageType'].fillna('') df = df[df['RecordType'] == 'LinkedLineItem'] # total = df['CostBeforeTax'].sum() EC2_data = df[(df['ProductName'] == 'Amazon Elastic Compute Cloud') & (df['Operation'].str.contains('RunInstances') | df['Operation'].str.contains('Hourly'))] EBS_data = df.loc[df['UsageType'].str.contains('EBS')] snapshot_data = EBS_data.loc[EBS_data['ItemDescription'].str.contains('snapshot')] ELB_data = df[df['ProductName'] == 'Elastic Load Balancing'] S3_data = df[df['ProductName'] == 'Amazon Simple Storage Service'] DC_data = df[df['ProductName'] == 'AWS Direct Connect'] other_data = df[(~((df['ProductName'] == 'Amazon Elastic Compute Cloud') & (df['Operation'].str.contains('RunInstances')))) & (~(df['UsageType'].str.contains('EBS'))) & (df['ProductName'] != 'Amazon Simple Storage Service') & (df['ProductName'] != 'Elastic Load Balancing') & (df['ProductName'] != 'Amazon Elastic Compute Cloud') & (df['ProductName'] != 'AWS Direct Connect') & (df['ProductName'].notna())] vat_data = df snapshot_prd = snapshot_data[snapshot_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum() snapshot_no_prd = snapshot_data[snapshot_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum() EBS_prd = EBS_data[EBS_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum() - snapshot_prd EBS_no_prd = EBS_data[EBS_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum() - snapshot_no_prd EC2_prd = EC2_data[EC2_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum() EC2_no_prd = EC2_data[EC2_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum() ELB_prd = ELB_data[ELB_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum() ELB_no_prd = ELB_data[ELB_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum() S3_prd = S3_data[S3_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum() S3_no_prd = S3_data[S3_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum() DC_prd = DC_data[DC_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum() DC_no_prd = DC_data[DC_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum() other_prd = df[df['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum() other_no_prd = df[df['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum() vat_prd = vat_data[vat_data['LinkedAccountId'].isin(self.prd_li)]['TaxAmount'].sum() vat_no_prd = vat_data[vat_data['LinkedAccountId'].isin(self.non_prd_li)]['TaxAmount'].sum() workbook = load_workbook(self.name) sheet = workbook[self.month] sheet.cell(2, 2).value = EC2_prd sheet.cell(2, 3).value = EC2_no_prd sheet.cell(2, 4).value = EC2_prd + EC2_no_prd sheet.cell(3, 2).value = EBS_prd sheet.cell(3, 3).value = EBS_no_prd sheet.cell(3, 4).value = EBS_prd + EBS_no_prd sheet.cell(4, 2).value = snapshot_prd sheet.cell(4, 3).value = snapshot_no_prd sheet.cell(4, 4).value = snapshot_prd + snapshot_no_prd sheet.cell(5, 2).value = ELB_prd sheet.cell(5, 3).value = ELB_no_prd sheet.cell(5, 4).value = ELB_prd + ELB_no_prd sheet.cell(6, 2).value = S3_prd sheet.cell(6, 3).value = S3_no_prd sheet.cell(6, 4).value = S3_prd + S3_no_prd sheet.cell(7, 2).value = DC_prd sheet.cell(7, 3).value = DC_no_prd sheet.cell(7, 4).value = DC_prd + DC_no_prd sheet.cell(8, 2).value = other_prd - EC2_prd - EBS_prd - snapshot_prd - ELB_prd - S3_prd - DC_prd sheet.cell(8, 3).value = other_no_prd - EC2_no_prd - EBS_no_prd - snapshot_no_prd - ELB_no_prd - S3_no_prd - DC_no_prd sheet.cell(8, 4).value = (other_prd - EC2_prd - EBS_prd - snapshot_prd - ELB_prd - S3_prd - DC_prd) + (other_no_prd - EC2_no_prd - EBS_no_prd - snapshot_no_prd - ELB_no_prd - S3_no_prd - DC_no_prd) sheet.cell(9, 2).value = vat_prd sheet.cell(9, 3).value = vat_no_prd sheet.cell(9, 4).value = vat_prd + vat_no_prd sheet.cell(10, 2).value = other_prd + vat_prd sheet.cell(10, 3).value = other_no_prd + vat_no_prd sheet.cell(10, 4).value = other_prd + other_no_prd + vat_prd + vat_no_prd workbook.save(self.name) print('数据处理完成') if __name__ == '__main__': name = 'SONY PSC Monthly Bill Analysis-2021.xlsx' ecar_sona_name = 'SONY Ecar&SONA Monthly Bill Analysis-2021.xlsx' path = ['089158463905-aws-cost-allocation-ACTS-2022-03.csv', '089158463905-aws-cost-allocation-ACTS-Ningxia-2022-03.csv'] prd_li = ['594359020897'] non_prd_li = ['538753690131', '158810359537'] ecar_sona_prd_li = [''] ecar_sona = ['244618405305'] Custom_Excel(path=path, prd_li=prd_li, non_prd_li=non_prd_li, name=name).get_data() Custom_Excel(path=path, prd_li=ecar_sona_prd_li, non_prd_li=ecar_sona, name=ecar_sona_name).get_data()