You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

120 lines
6.2 KiB

This file contains invisible Unicode characters!

This file contains invisible Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.

# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
from openpyxl import load_workbook
month_dict = {
'01': 'Jan.',
'02': 'Feb.',
'03': 'Mar.',
'04': 'April',
'05': 'May',
'06': 'June',
'07': 'July',
'08': 'Aug.',
'09': 'Sep.',
'10': 'Oct.',
'11': 'Nov.',
'12': 'Dec.'
}
class Custom_Excel():
def __init__(self, *args, **kwargs):
self.path = kwargs.get('path', None)
self.prd_li = kwargs.get('prd_li', [])
self.non_prd_li = kwargs.get('non_prd_li', [])
self.name = kwargs.get('name', '')
def get_data(self):
df = pd.DataFrame()
self.month = month_dict.get(self.path[0][-6:-4])
for csv in self.path:
csv_data = pd.read_csv(
csv,
sep=',',
encoding='utf-8',
skiprows=[0, ],
dtype={'LinkedAccountId': np.object},
)
df = pd.concat([df, csv_data], axis=0)
df.reset_index(drop=True)
df['UsageType'] = df['UsageType'].fillna('')
df = df[df['RecordType'] == 'LinkedLineItem']
# total = df['CostBeforeTax'].sum()
EC2_data = df[(df['ProductName'] == 'Amazon Elastic Compute Cloud') & (df['Operation'].str.contains('RunInstances') | df['Operation'].str.contains('Hourly'))]
EBS_data = df.loc[df['UsageType'].str.contains('EBS')]
snapshot_data = EBS_data.loc[EBS_data['ItemDescription'].str.contains('snapshot')]
ELB_data = df[df['ProductName'] == 'Elastic Load Balancing']
S3_data = df[df['ProductName'] == 'Amazon Simple Storage Service']
DC_data = df[df['ProductName'] == 'AWS Direct Connect']
other_data = df[(~((df['ProductName'] == 'Amazon Elastic Compute Cloud') & (df['Operation'].str.contains('RunInstances')))) &
(~(df['UsageType'].str.contains('EBS'))) &
(df['ProductName'] != 'Amazon Simple Storage Service') &
(df['ProductName'] != 'Elastic Load Balancing') &
(df['ProductName'] != 'Amazon Elastic Compute Cloud') &
(df['ProductName'] != 'AWS Direct Connect') &
(df['ProductName'].notna())]
vat_data = df
snapshot_prd = snapshot_data[snapshot_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum()
snapshot_no_prd = snapshot_data[snapshot_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum()
EBS_prd = EBS_data[EBS_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum() - snapshot_prd
EBS_no_prd = EBS_data[EBS_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum() - snapshot_no_prd
EC2_prd = EC2_data[EC2_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum()
EC2_no_prd = EC2_data[EC2_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum()
ELB_prd = ELB_data[ELB_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum()
ELB_no_prd = ELB_data[ELB_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum()
S3_prd = S3_data[S3_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum()
S3_no_prd = S3_data[S3_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum()
DC_prd = DC_data[DC_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum()
DC_no_prd = DC_data[DC_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum()
other_prd = df[df['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum()
other_no_prd = df[df['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum()
vat_prd = vat_data[vat_data['LinkedAccountId'].isin(self.prd_li)]['TaxAmount'].sum()
vat_no_prd = vat_data[vat_data['LinkedAccountId'].isin(self.non_prd_li)]['TaxAmount'].sum()
workbook = load_workbook(self.name)
sheet = workbook[self.month]
sheet.cell(2, 2).value = EC2_prd
sheet.cell(2, 3).value = EC2_no_prd
sheet.cell(2, 4).value = EC2_prd + EC2_no_prd
sheet.cell(3, 2).value = EBS_prd
sheet.cell(3, 3).value = EBS_no_prd
sheet.cell(3, 4).value = EBS_prd + EBS_no_prd
sheet.cell(4, 2).value = snapshot_prd
sheet.cell(4, 3).value = snapshot_no_prd
sheet.cell(4, 4).value = snapshot_prd + snapshot_no_prd
sheet.cell(5, 2).value = ELB_prd
sheet.cell(5, 3).value = ELB_no_prd
sheet.cell(5, 4).value = ELB_prd + ELB_no_prd
sheet.cell(6, 2).value = S3_prd
sheet.cell(6, 3).value = S3_no_prd
sheet.cell(6, 4).value = S3_prd + S3_no_prd
sheet.cell(7, 2).value = DC_prd
sheet.cell(7, 3).value = DC_no_prd
sheet.cell(7, 4).value = DC_prd + DC_no_prd
sheet.cell(8, 2).value = other_prd - EC2_prd - EBS_prd - snapshot_prd - ELB_prd - S3_prd - DC_prd
sheet.cell(8, 3).value = other_no_prd - EC2_no_prd - EBS_no_prd - snapshot_no_prd - ELB_no_prd - S3_no_prd - DC_no_prd
sheet.cell(8, 4).value = (other_prd - EC2_prd - EBS_prd - snapshot_prd - ELB_prd - S3_prd - DC_prd) + (other_no_prd - EC2_no_prd - EBS_no_prd - snapshot_no_prd - ELB_no_prd - S3_no_prd - DC_no_prd)
sheet.cell(9, 2).value = vat_prd
sheet.cell(9, 3).value = vat_no_prd
sheet.cell(9, 4).value = vat_prd + vat_no_prd
sheet.cell(10, 2).value = other_prd + vat_prd
sheet.cell(10, 3).value = other_no_prd + vat_no_prd
sheet.cell(10, 4).value = other_prd + other_no_prd + vat_prd + vat_no_prd
workbook.save(self.name)
print('数据处理完成')
if __name__ == '__main__':
name = 'SONY PSC Monthly Bill Analysis-2021.xlsx'
ecar_sona_name = 'SONY Ecar&SONA Monthly Bill Analysis-2021.xlsx'
path = ['089158463905-aws-cost-allocation-ACTS-2022-03.csv',
'089158463905-aws-cost-allocation-ACTS-Ningxia-2022-03.csv']
prd_li = ['594359020897']
non_prd_li = ['538753690131', '158810359537']
ecar_sona_prd_li = ['']
ecar_sona = ['244618405305']
Custom_Excel(path=path, prd_li=prd_li, non_prd_li=non_prd_li, name=name).get_data()
Custom_Excel(path=path, prd_li=ecar_sona_prd_li, non_prd_li=ecar_sona, name=ecar_sona_name).get_data()