|
|
# -*- coding: utf-8 -*-
|
|
|
import pandas as pd
|
|
|
import numpy as np
|
|
|
from openpyxl import load_workbook
|
|
|
|
|
|
month_dict = {
|
|
|
'01': 'Jan.',
|
|
|
'02': 'Feb.',
|
|
|
'03': 'Mar.',
|
|
|
'04': 'April',
|
|
|
'05': 'May',
|
|
|
'06': 'June',
|
|
|
'07': 'July',
|
|
|
'08': 'Aug.',
|
|
|
'09': 'Sep.',
|
|
|
'10': 'Oct.',
|
|
|
'11': 'Nov.',
|
|
|
'12': 'Dec.'
|
|
|
}
|
|
|
|
|
|
class Custom_Excel():
|
|
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
|
self.path = kwargs.get('path', None)
|
|
|
self.prd_li = kwargs.get('prd_li', [])
|
|
|
self.non_prd_li = kwargs.get('non_prd_li', [])
|
|
|
self.name = kwargs.get('name', '')
|
|
|
|
|
|
def get_data(self):
|
|
|
df = pd.DataFrame()
|
|
|
self.month = month_dict.get(self.path[0][-6:-4])
|
|
|
for csv in self.path:
|
|
|
csv_data = pd.read_csv(
|
|
|
csv,
|
|
|
sep=',',
|
|
|
encoding='utf-8',
|
|
|
skiprows=[0, ],
|
|
|
dtype={'LinkedAccountId': np.object},
|
|
|
)
|
|
|
df = pd.concat([df, csv_data], axis=0)
|
|
|
df.reset_index(drop=True)
|
|
|
df['UsageType'] = df['UsageType'].fillna('')
|
|
|
df = df[df['RecordType'] == 'LinkedLineItem']
|
|
|
# total = df['CostBeforeTax'].sum()
|
|
|
EC2_data = df[(df['ProductName'] == 'Amazon Elastic Compute Cloud') & (df['Operation'].str.contains('RunInstances') | df['Operation'].str.contains('Hourly'))]
|
|
|
EBS_data = df.loc[df['UsageType'].str.contains('EBS')]
|
|
|
snapshot_data = EBS_data.loc[EBS_data['ItemDescription'].str.contains('snapshot')]
|
|
|
ELB_data = df[df['ProductName'] == 'Elastic Load Balancing']
|
|
|
S3_data = df[df['ProductName'] == 'Amazon Simple Storage Service']
|
|
|
DC_data = df[df['ProductName'] == 'AWS Direct Connect']
|
|
|
other_data = df[(~((df['ProductName'] == 'Amazon Elastic Compute Cloud') & (df['Operation'].str.contains('RunInstances')))) &
|
|
|
(~(df['UsageType'].str.contains('EBS'))) &
|
|
|
(df['ProductName'] != 'Amazon Simple Storage Service') &
|
|
|
(df['ProductName'] != 'Elastic Load Balancing') &
|
|
|
(df['ProductName'] != 'Amazon Elastic Compute Cloud') &
|
|
|
(df['ProductName'] != 'AWS Direct Connect') &
|
|
|
(df['ProductName'].notna())]
|
|
|
vat_data = df
|
|
|
snapshot_prd = snapshot_data[snapshot_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum()
|
|
|
snapshot_no_prd = snapshot_data[snapshot_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum()
|
|
|
EBS_prd = EBS_data[EBS_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum() - snapshot_prd
|
|
|
EBS_no_prd = EBS_data[EBS_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum() - snapshot_no_prd
|
|
|
EC2_prd = EC2_data[EC2_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum()
|
|
|
EC2_no_prd = EC2_data[EC2_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum()
|
|
|
ELB_prd = ELB_data[ELB_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum()
|
|
|
ELB_no_prd = ELB_data[ELB_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum()
|
|
|
S3_prd = S3_data[S3_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum()
|
|
|
S3_no_prd = S3_data[S3_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum()
|
|
|
DC_prd = DC_data[DC_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum()
|
|
|
DC_no_prd = DC_data[DC_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum()
|
|
|
other_prd = df[df['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum()
|
|
|
other_no_prd = df[df['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum()
|
|
|
vat_prd = vat_data[vat_data['LinkedAccountId'].isin(self.prd_li)]['TaxAmount'].sum()
|
|
|
vat_no_prd = vat_data[vat_data['LinkedAccountId'].isin(self.non_prd_li)]['TaxAmount'].sum()
|
|
|
workbook = load_workbook(self.name)
|
|
|
sheet = workbook[self.month]
|
|
|
sheet.cell(2, 2).value = EC2_prd
|
|
|
sheet.cell(2, 3).value = EC2_no_prd
|
|
|
sheet.cell(2, 4).value = EC2_prd + EC2_no_prd
|
|
|
sheet.cell(3, 2).value = EBS_prd
|
|
|
sheet.cell(3, 3).value = EBS_no_prd
|
|
|
sheet.cell(3, 4).value = EBS_prd + EBS_no_prd
|
|
|
sheet.cell(4, 2).value = snapshot_prd
|
|
|
sheet.cell(4, 3).value = snapshot_no_prd
|
|
|
sheet.cell(4, 4).value = snapshot_prd + snapshot_no_prd
|
|
|
sheet.cell(5, 2).value = ELB_prd
|
|
|
sheet.cell(5, 3).value = ELB_no_prd
|
|
|
sheet.cell(5, 4).value = ELB_prd + ELB_no_prd
|
|
|
sheet.cell(6, 2).value = S3_prd
|
|
|
sheet.cell(6, 3).value = S3_no_prd
|
|
|
sheet.cell(6, 4).value = S3_prd + S3_no_prd
|
|
|
sheet.cell(7, 2).value = DC_prd
|
|
|
sheet.cell(7, 3).value = DC_no_prd
|
|
|
sheet.cell(7, 4).value = DC_prd + DC_no_prd
|
|
|
sheet.cell(8, 2).value = other_prd - EC2_prd - EBS_prd - snapshot_prd - ELB_prd - S3_prd - DC_prd
|
|
|
sheet.cell(8, 3).value = other_no_prd - EC2_no_prd - EBS_no_prd - snapshot_no_prd - ELB_no_prd - S3_no_prd - DC_no_prd
|
|
|
sheet.cell(8, 4).value = (other_prd - EC2_prd - EBS_prd - snapshot_prd - ELB_prd - S3_prd - DC_prd) + (other_no_prd - EC2_no_prd - EBS_no_prd - snapshot_no_prd - ELB_no_prd - S3_no_prd - DC_no_prd)
|
|
|
sheet.cell(9, 2).value = vat_prd
|
|
|
sheet.cell(9, 3).value = vat_no_prd
|
|
|
sheet.cell(9, 4).value = vat_prd + vat_no_prd
|
|
|
sheet.cell(10, 2).value = other_prd + vat_prd
|
|
|
sheet.cell(10, 3).value = other_no_prd + vat_no_prd
|
|
|
sheet.cell(10, 4).value = other_prd + other_no_prd + vat_prd + vat_no_prd
|
|
|
workbook.save(self.name)
|
|
|
print('数据处理完成')
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
|
name = 'SONY PSC Monthly Bill Analysis-2021.xlsx'
|
|
|
ecar_sona_name = 'SONY Ecar&SONA Monthly Bill Analysis-2021.xlsx'
|
|
|
path = ['089158463905-aws-cost-allocation-ACTS-2022-03.csv',
|
|
|
'089158463905-aws-cost-allocation-ACTS-Ningxia-2022-03.csv']
|
|
|
prd_li = ['594359020897']
|
|
|
non_prd_li = ['538753690131', '158810359537']
|
|
|
ecar_sona_prd_li = ['']
|
|
|
ecar_sona = ['244618405305']
|
|
|
Custom_Excel(path=path, prd_li=prd_li, non_prd_li=non_prd_li, name=name).get_data()
|
|
|
Custom_Excel(path=path, prd_li=ecar_sona_prd_li, non_prd_li=ecar_sona, name=ecar_sona_name).get_data()
|