You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
937 lines
34 KiB
937 lines
34 KiB
import numpy as np
|
|
import pandas as pd
|
|
from openpyxl import Workbook
|
|
from openpyxl.styles import NamedStyle, PatternFill, Font, Alignment, Border, Side, colors
|
|
import boto3
|
|
import time
|
|
import os
|
|
from collections import defaultdict
|
|
import openpyxl
|
|
import time
|
|
import datetime
|
|
import calendar
|
|
from calendar import monthrange
|
|
from docxtpl import DocxTemplate
|
|
from pandas import Index
|
|
import shutil
|
|
import sys
|
|
|
|
_dict = {
|
|
# 6910账号aksk
|
|
'AWS_ACCESS_KEY_ID': 'AKIA4FU4JFDXO5J2ISNY',
|
|
'AWS_SECRET_ACCESS_KEY': 'AVfPZ/XryDsoWmYB293eNPOyiO82x9xR62KFKezS',
|
|
# 7422账号aksk
|
|
'GLOBAL_AWS_ACCESS_KEY_ID': "AKIAQWIJ3GB7CTO5GAWY",
|
|
'GLOBAL_AWS_SECRET_ACCESS_KEY': "o1iQFQFSW+W/GGzjIRcNu1RDO1s72Uv2pJK/zf3z"
|
|
}
|
|
aws_aksk = {
|
|
"785781180852": {
|
|
"account_type": "amazon-china",
|
|
"aws_access_key_id": "AKIA3N5BVJG2B7BN7D7F",
|
|
"aws_secret_access_key": "FgNR1LMEfW2Z3imL4PTZCGyslxjz4HV5m4Ut4hm5",
|
|
"bucket": "785781180852billing",
|
|
"credential_type": "Amazon Access Key",
|
|
"role_arn": "",
|
|
"external_id": ""
|
|
},
|
|
}
|
|
def auto_download_file_form_s3(file: tuple):
|
|
for i in file:
|
|
payer_id = i.split('-')[0]
|
|
credential = get_credentials(payer_id)
|
|
bucket = credential.pop('bucket')
|
|
s3_client = boto3.client('s3', **credential)
|
|
print('{}开始下载'.format(i))
|
|
s3_client.download_file(
|
|
bucket,i, i)
|
|
#shutil.copy(i, data_dir+i)
|
|
#shutil.move(i, os.path.join(data_dir, i))
|
|
|
|
print('{}下载完成'.format(i))
|
|
def get_credentials(payer_id):
|
|
credential = aws_aksk.get(payer_id, None)
|
|
if not credential:
|
|
return
|
|
if credential.get('account_type', None) == 'amazon-china':
|
|
region_name = 'cn-northwest-1'
|
|
aws_access_key_id = _dict['AWS_ACCESS_KEY_ID']
|
|
aws_secret_access_key = _dict['AWS_SECRET_ACCESS_KEY']
|
|
else:
|
|
region_name = 'us-east-2'
|
|
aws_access_key_id = _dict['GLOBAL_AWS_ACCESS_KEY_ID']
|
|
aws_secret_access_key = _dict['GLOBAL_AWS_SECRET_ACCESS_KEY']
|
|
if credential.get('credential_type', None) == 'Amazon Access Key':
|
|
return {
|
|
'aws_access_key_id': credential['aws_access_key_id'],
|
|
'aws_secret_access_key': credential['aws_secret_access_key'],
|
|
'region_name': region_name,
|
|
'bucket': credential['bucket']
|
|
}
|
|
else:
|
|
sts = boto3.client('sts',
|
|
region_name=region_name,
|
|
aws_access_key_id=aws_access_key_id,
|
|
aws_secret_access_key=aws_secret_access_key
|
|
)
|
|
sts_credential = sts.assume_role(
|
|
RoleArn=credential['role_arn'],
|
|
RoleSessionName='crawl-billing' + str(time.time()),
|
|
DurationSeconds=900,
|
|
ExternalId=credential['external_id']
|
|
)
|
|
return {
|
|
'aws_access_key_id': sts_credential['Credentials']['AccessKeyId'],
|
|
'aws_secret_access_key': sts_credential['Credentials']['SecretAccessKey'],
|
|
'region_name': region_name,
|
|
'aws_session_token': sts_credential['Credentials']['SessionToken'],
|
|
'bucket': credential['bucket']
|
|
}
|
|
|
|
bwm_map = {
|
|
# 'LingYue': {
|
|
# '012281355708',
|
|
# '356785436510',
|
|
# '658099132956',
|
|
# '658759333659',
|
|
# '266213574481',
|
|
# '292889508331',
|
|
# '301085154281',
|
|
# '128030099971',
|
|
# '130024587818',
|
|
# '130277835603',
|
|
# '927108770652',
|
|
# '462173214055',
|
|
# },
|
|
'NSC': {
|
|
"738984596256",
|
|
"516494992171",
|
|
"516559113274",
|
|
"516718973345",
|
|
"115197553139",
|
|
"115888370350",
|
|
"482015173674",
|
|
"741600887936",
|
|
"041339000575",
|
|
"220276650644",
|
|
"220254894522",
|
|
"741612116470",
|
|
"566166420744",
|
|
"571369485139",
|
|
"571453974672",
|
|
"571620293191",
|
|
"281283439347",
|
|
"281409454190",
|
|
"391370694119",
|
|
"391652259466",
|
|
"257098133642",
|
|
"258662998378",
|
|
"259918045777",
|
|
"174766421626",
|
|
"018419931154",
|
|
"019468012204",
|
|
"019616485031",
|
|
"019628801948",
|
|
"221737289359",
|
|
"222086759490",
|
|
"222704261174",
|
|
"266213574481",
|
|
"128030099971",
|
|
"130024587818",
|
|
"042047776321",
|
|
"107974775660",
|
|
"130772574839",
|
|
"772981767791",
|
|
"804270813472",
|
|
"750594069180",
|
|
"957524751526",
|
|
"605745121453",
|
|
"605427007075",
|
|
"462729772077",
|
|
"825059405741",
|
|
"825754710372",
|
|
"826223612679",
|
|
"826726830564",
|
|
"188665867276",
|
|
"190967990884",
|
|
"265795087340",
|
|
"266066444577",
|
|
"130349229654",
|
|
},
|
|
'BCS': {
|
|
"012281355708",
|
|
"356785436510",
|
|
"658099132956",
|
|
"658759333659",
|
|
"130277835603",
|
|
"462173214055",
|
|
"483213877262",
|
|
},
|
|
'BBA': {
|
|
"114925441656",
|
|
"220347068059",
|
|
"115245083140",
|
|
"115814403771",
|
|
"482112011301",
|
|
"482174782124",
|
|
"066200769587",
|
|
"066546212151",
|
|
"066999274102",
|
|
"067021776723",
|
|
"067030146309",
|
|
"763476499527",
|
|
"607562784642",
|
|
"607580047483",
|
|
"607962074546",
|
|
"608034771178",
|
|
"902176823059",
|
|
"902161166225",
|
|
"114643259757",
|
|
"472159510273",
|
|
"470697101235",
|
|
"471757415074",
|
|
"179496244136",
|
|
"763781648782",
|
|
"125559227903",
|
|
"133017054983",
|
|
"178547782801",
|
|
"181289317951",
|
|
"902120954857",
|
|
"902219438803",
|
|
"763854435329",
|
|
"763918937436",
|
|
"610756541592",
|
|
"763933527642",
|
|
"827982547867",
|
|
"119959612117",
|
|
"126654413831",
|
|
"127966691320",
|
|
"131447637234",
|
|
"132225845962",
|
|
"133703668756",
|
|
"179751599285",
|
|
"763669372248",
|
|
"180763457193",
|
|
"902123337973",
|
|
"889045227160",
|
|
"889312712683",
|
|
}
|
|
}
|
|
|
|
csv_columns2 = Index(
|
|
[
|
|
'InvoiceID',
|
|
'PayerAccountId',
|
|
'LinkedAccountId',
|
|
'RecordType',
|
|
'RecordID',
|
|
'BillingPeriodStartDate',
|
|
'BillingPeriodEndDate',
|
|
'InvoiceDate',
|
|
'PayerAccountName',
|
|
'LinkedAccountName',
|
|
'TaxationAddress',
|
|
'PayerPONumber',
|
|
'ProductCode',
|
|
'ProductName',
|
|
'SellerOfRecord',
|
|
'UsageType',
|
|
'Operation',
|
|
'AvailabilityZone',
|
|
'RateId',
|
|
'ItemDescription',
|
|
'UsageStartDate',
|
|
'UsageEndDate',
|
|
'UsageQuantity',
|
|
'BlendedRate',
|
|
'CurrencyCode',
|
|
'CostBeforeTax',
|
|
'Credits',
|
|
'TaxAmount',
|
|
'TaxType',
|
|
'TotalCost',
|
|
'aws:autoscaling:groupName',
|
|
'aws:cloudformation:logical-id',
|
|
'aws:cloudformation:stack-id',
|
|
'aws:cloudformation:stack-name',
|
|
'aws:createdBy',
|
|
'aws:ec2launchtemplate:id',
|
|
'aws:ec2launchtemplate:version',
|
|
'aws:elasticmapreduce:instance-group-role',
|
|
'aws:elasticmapreduce:job-flow-id',
|
|
'user:APP-ID',
|
|
'user:AccountID',
|
|
'user:Author',
|
|
'user:COST TAG 1',
|
|
'user:Cost Center',
|
|
'user:DLP',
|
|
'user:Description',
|
|
'user:Environment',
|
|
'user:EnvironmentType',
|
|
'user:KubernetesCluster',
|
|
'user:ManagedByAmazonSageMakerResource',
|
|
'user:NAME',
|
|
'user:Name',
|
|
'user:Patch Group',
|
|
'user:Product ID',
|
|
'user:ProjectName',
|
|
'user:Responsible',
|
|
'user:Role',
|
|
'user:Scope',
|
|
'user:Stage',
|
|
'user:Terraform',
|
|
'user:Tool',
|
|
'user:createdBy',
|
|
'user:data-source',
|
|
'user:data-source-billing',
|
|
'user:desc',
|
|
'user:description',
|
|
'user:elasticbeanstalk:environment-id',
|
|
'user:elasticbeanstalk:environment-name',
|
|
'user:environment',
|
|
'user:hub',
|
|
'user:k8s.io/cluster-autoscaler/node-template/label/kops.k8s.io/instancegroup',
|
|
'user:k8s.io/etcd/events',
|
|
'user:k8s.io/etcd/main',
|
|
'user:k8s.io/role/master',
|
|
'user:k8s.io/role/node',
|
|
'user:kubernetes.io/cluster/infotain-e2e.k8s.local',
|
|
'user:kubernetes.io/cluster/infotain-prod.k8s.local',
|
|
'user:kubernetes.io/created-for/pv/name',
|
|
'user:kubernetes.io/created-for/pvc/name',
|
|
'user:kubernetes.io/created-for/pvc/namespace',
|
|
'user:kubernetes.io/service-name',
|
|
'user:name',
|
|
'user:namespace',
|
|
'user:owner',
|
|
'user:project',
|
|
'user:resourcePrefix',
|
|
'user:role',
|
|
'user:server-name',
|
|
'user:snsTopicArn',
|
|
'user:stage',
|
|
'user:sub-product',
|
|
'user:workload-type'])
|
|
|
|
data_type = {
|
|
'UsageStartDate': object,
|
|
'UsageEndDate': object,
|
|
'InvoiceID': object,
|
|
'ProductName': object,
|
|
'PayerAccountId': object,
|
|
'LinkedAccountId': object,
|
|
'UnBlendedCost': str,
|
|
'BlendedCost': str,
|
|
'ItemDescription': object,
|
|
'RecordType': object
|
|
}
|
|
|
|
def from_filename_get_datetime(filename: str):
|
|
dt = filename[-7:]
|
|
date = datetime.datetime.strptime(dt, "%Y-%m")
|
|
days = calendar.monthrange(date.year, date.month)
|
|
later1dt = date + datetime.timedelta(days[1] + 1)
|
|
later1 = later1dt.strftime("%Y-%m-%d")
|
|
n_days = calendar.monthrange(later1dt.year, later1dt.month)
|
|
later2dt = later1dt + datetime.timedelta(n_days[1])
|
|
later2 = later2dt.strftime("%Y-%m-%d")
|
|
return dt, later1, later2
|
|
|
|
def new_round(_float, _len=2) -> float:
|
|
"""
|
|
Parameters
|
|
----------
|
|
_float:
|
|
_len: int, 指定四舍五入需要保留的小数点后几位数为_len
|
|
|
|
Returns
|
|
-------
|
|
type ==> float, 返回四舍五入后的值
|
|
"""
|
|
try:
|
|
if isinstance(_float, float):
|
|
if 'e' in str(_float).lower():
|
|
return round(_float, _len)
|
|
elif str(_float)[::-1].find('.') <= _len:
|
|
return _float
|
|
elif str(_float)[-1] == '5':
|
|
return round(float(str(_float)[:-1] + '6'), _len)
|
|
else:
|
|
return round(_float, _len)
|
|
elif isinstance(_float, str):
|
|
_float = _float.replace(',', '')
|
|
return new_round(float(_float), _len)
|
|
else:
|
|
return round(_float, _len)
|
|
except Exception as e:
|
|
print(_float, _len, e)
|
|
|
|
alter_csv_dir = 'D:\workspace\zhangdanjiaoben\other/BMW/alter_data/'
|
|
data_dir = 'D:\workspace\zhangdanjiaoben\other/BMW/'
|
|
word_dir = 'D:\workspace\zhangdanjiaoben\other/BMW/word/'
|
|
info_dir = 'D:\workspace\zhangdanjiaoben\other/BMW/info/'
|
|
|
|
|
|
class CustomCsvByDay(object):
|
|
|
|
def __init__(self, path, *args, **kwargs):
|
|
super().__init__(*args, **kwargs)
|
|
self.csv_list = list(path)
|
|
self.credit = 0
|
|
self.account_dict = defaultdict(float)
|
|
self.tax_dict = defaultdict(float)
|
|
self.es_total_dict = dict()
|
|
self.beforetax = 0
|
|
self.credit_dict = dict()
|
|
self.es_total = 0
|
|
self.new_filename = None
|
|
self.init_csv_list()
|
|
|
|
def init_csv_list(self):
|
|
self.new_filename = os.path.join(
|
|
alter_csv_dir, '改_{}'.format(
|
|
self.csv_list[0]))
|
|
for i in range(len(self.csv_list)):
|
|
self.csv_list[i] = os.path.join(data_dir, self.csv_list[i])
|
|
|
|
def data_mergence(self):
|
|
df = pd.DataFrame()
|
|
for _csv in self.csv_list:
|
|
if _csv:
|
|
csv_data = pd.read_csv(
|
|
_csv,
|
|
sep=',',
|
|
encoding='utf-8',
|
|
header=None,
|
|
names=csv_columns2,
|
|
dtype=data_type)
|
|
df = pd.concat([df, csv_data], axis=0)
|
|
df.reset_index(drop=True)
|
|
return df
|
|
|
|
@staticmethod
|
|
def set_deleted_data():
|
|
del_dict = {
|
|
'RecordType': [
|
|
'AccountTotal',
|
|
'StatementTotal',
|
|
'Rounding',
|
|
'InvoiceTotal'],
|
|
'ItemDescription': [
|
|
'11-11 promotion for new-signup 3',
|
|
'11-11 promotion for new-signup',
|
|
'PD 7398',
|
|
'PD 7180',
|
|
],
|
|
'ProductName': 'Amazon Premium Support',
|
|
'PayerAccountId': 'PayerAccountId'
|
|
}
|
|
return del_dict
|
|
|
|
def create_new_csv(self):
|
|
df = self.data_mergence()
|
|
# # LinkedLineItem和CBRounding数据
|
|
data = df[((df['RecordType'] == 'LinkedLineItem')
|
|
& (df['ItemDescription'] != '11-11 promotion for new-signup 3')
|
|
& (df['ItemDescription'] != '11-11 promotion for new-signup')
|
|
& (df['ItemDescription'] != 'PD 7398')
|
|
& (df['ItemDescription'] != 'PD 7180')
|
|
& (df['ProductName'] != 'Amazon Premium Support'))
|
|
# | (df['RecordType'] == 'CBRounding')
|
|
]
|
|
# 获取AWS Premium Support数据
|
|
es_data = df[(df['RecordType'] == 'LinkedLineItem') &
|
|
(df['ProductName'] == 'Amazon Premium Support')]
|
|
# 获取credit数据
|
|
credit_data = df[(df['RecordType'] == 'LinkedLineItem')
|
|
& ((df['ItemDescription'] == '11-11 promotion for new-signup 3')
|
|
| (df['ItemDescription'] == '11-11 promotion for new-signup')
|
|
# | (df['ItemDescription'] == 'PD 7398')
|
|
| (df['ItemDescription'] == 'PD 7398'))
|
|
]
|
|
# 获取AccountTotal的CostBeforeTax
|
|
account_data = df[df['RecordType'] == 'AccountTotal']
|
|
# InvoiceTotal 数据
|
|
# invoice_data = df[df['RecordType'] == 'InvoiceTotal']
|
|
# StatementTotal 数据
|
|
statement_data = df[df['RecordType'] == 'StatementTotal']
|
|
self.get_credit(credit_data)
|
|
self.get_account(data)
|
|
self.get_estotal(es_data)
|
|
self.get_estotal_dict()
|
|
# 获取credit_dict
|
|
self.get_credit_dict()
|
|
insert_data = self.insert_data_to_csv(data)
|
|
es_total_data = self.insert_es_data_to_csv(data)
|
|
tax_data = self.insert_tax_data_to_csv(data)
|
|
account_data = self.alter_account(account_data)
|
|
statement_data = self.alter_statement(statement_data)
|
|
self.save_lineitem_data()
|
|
self.save_alter_data_to_csv(insert_data)
|
|
self.save_alter_data_to_csv(es_total_data)
|
|
self.save_alter_data_to_csv(tax_data)
|
|
self.save_alter_data_to_csv(account_data)
|
|
self.save_alter_data_to_csv(statement_data)
|
|
print('ES总额为{}'.format(self.es_total))
|
|
print('Credit总额为{}'.format(self.credit))
|
|
print('账号消费总额为{}'.format(self.beforetax))
|
|
print('update finish')
|
|
|
|
def get_estotal_dict(self):
|
|
for k, v in self.account_dict.items():
|
|
self.es_total_dict[k] = (v / self.beforetax * self.es_total)
|
|
|
|
def get_estotal(self, data):
|
|
for i in range(data.shape[0]):
|
|
self.es_total += float(data.iloc[i]['CostBeforeTax'])
|
|
|
|
def alter_account(self, data):
|
|
data = data.drop_duplicates(
|
|
subset=['LinkedAccountId'],
|
|
keep='first',
|
|
inplace=False)
|
|
data = data.reset_index(drop=True)
|
|
for i in range(data.shape[0]):
|
|
credit = self.credit_dict.get(data.iloc[i]['LinkedAccountId'], 0)
|
|
costbeforetax = self.account_dict.get(
|
|
data.iloc[i]['LinkedAccountId'], 0)
|
|
es_total = self.es_total_dict.get(
|
|
data.iloc[i]['LinkedAccountId'], 0)
|
|
data.loc[i, 'CostBeforeTax'] = '{:.6f}'.format(
|
|
costbeforetax + es_total)
|
|
data.loc[i, 'Credits'] = '{:.6f}'.format(new_round(credit, 2))
|
|
data.loc[i, 'TaxAmount'] = '{:.6f}'.format(
|
|
(costbeforetax + credit + es_total) * 0.06)
|
|
data.loc[i, 'TotalCost'] = '{:.6f}'.format(
|
|
costbeforetax + credit + es_total + (costbeforetax + credit + es_total) * 0.06)
|
|
return data
|
|
|
|
def alter_statement(self, data):
|
|
data = data.drop_duplicates(
|
|
subset=['PayerAccountId'],
|
|
keep='first',
|
|
inplace=False)
|
|
data = data.reset_index(drop=True)
|
|
for i in range(data.shape[0]):
|
|
data.loc[i, 'ItemDescription'] = data.iloc[i]['ItemDescription']
|
|
data.loc[i, 'CurrencyCode'] = 'CNY'
|
|
data.loc[i, 'CostBeforeTax'] = '{:.6f}'.format(
|
|
self.beforetax + self.es_total)
|
|
data.loc[i, 'Credits'] = '{:.2f}'.format(self.credit)
|
|
data.loc[i, 'TaxAmount'] = '{:.6f}'.format(
|
|
(self.beforetax + self.es_total + self.credit) * 0.06)
|
|
data.loc[i, 'TotalCost'] = '{:.6f}'.format(
|
|
(self.beforetax + self.es_total + self.credit) * 1.06)
|
|
return data
|
|
|
|
@staticmethod
|
|
def get_fill_data(data):
|
|
data = data.loc[data['LinkedAccountId'].notna()]
|
|
linked_set = set(data['LinkedAccountId'])
|
|
insert_data = defaultdict(list)
|
|
for i in linked_set:
|
|
linked_data = data[(data['RecordType'] == 'LinkedLineItem') & (
|
|
data['LinkedAccountId'] == i)].iloc[0]
|
|
insert_data['InvoiceID'].append(linked_data['InvoiceID'])
|
|
insert_data['PayerAccountId'].append(linked_data['PayerAccountId'])
|
|
insert_data['LinkedAccountId'].append(i)
|
|
insert_data['RecordType'].append('LinkedLineItem')
|
|
insert_data['BillingPeriodStartDate'].append(
|
|
linked_data['BillingPeriodStartDate'])
|
|
insert_data['BillingPeriodEndDate'].append(
|
|
linked_data['BillingPeriodEndDate'])
|
|
insert_data['InvoiceDate'].append(linked_data['InvoiceDate'])
|
|
insert_data['UsageStartDate'].append(linked_data['UsageStartDate'])
|
|
insert_data['UsageEndDate'].append(linked_data['UsageEndDate'])
|
|
insert_data['CurrencyCode'].append(linked_data['CurrencyCode'])
|
|
return insert_data
|
|
|
|
def insert_data_to_csv(self, data):
|
|
fill_data = self.get_fill_data(data)
|
|
for f in fill_data['LinkedAccountId']:
|
|
credit = self.credit_dict.get(f, 0)
|
|
fill_data['CostBeforeTax'].append(0)
|
|
fill_data['Credits'].append(new_round(credit))
|
|
fill_data['TaxAmount'].append(new_round(credit * 0.06))
|
|
if new_round(credit * 0.06) == 0:
|
|
fill_data['TaxType'].append('None')
|
|
else:
|
|
fill_data['TaxType'].append('VAT')
|
|
fill_data['TotalCost'].append(new_round(credit + credit * 0.06))
|
|
data = pd.DataFrame(fill_data, columns=list(data.columns))
|
|
return data
|
|
|
|
def insert_es_data_to_csv(self, data):
|
|
fill_data = self.get_fill_data(data)
|
|
for f in fill_data['LinkedAccountId']:
|
|
es_total = self.es_total_dict.get(f, 0)
|
|
fill_data['CostBeforeTax'].append(new_round(es_total))
|
|
fill_data['Credits'].append(0)
|
|
fill_data['TaxAmount'].append(new_round(es_total * 0.06))
|
|
if new_round(es_total * 0.06) == 0:
|
|
fill_data['TaxType'].append('None')
|
|
else:
|
|
fill_data['TaxType'].append('VAT')
|
|
fill_data['TotalCost'].append(
|
|
new_round(es_total + es_total * 0.06))
|
|
data = pd.DataFrame(fill_data, columns=list(data.columns))
|
|
return data
|
|
|
|
def insert_tax_data_to_csv(self, data):
|
|
fill_data = self.get_fill_data(data)
|
|
for f in fill_data['LinkedAccountId']:
|
|
tax_total = self.tax_dict.get(f, 0)
|
|
es_tax = new_round(self.es_total_dict.get(f, 0) * 0.06)
|
|
credit_tax = new_round(self.credit_dict.get(f, 0) * 0.06)
|
|
account_tax = new_round((self.account_dict.get(f, 0) +
|
|
self.credit_dict.get(f, 0) + self.es_total_dict.get(f, 0)) * 0.06, 6)
|
|
fill_data['CostBeforeTax'].append(0)
|
|
fill_data['Credits'].append(0)
|
|
fill_data['TaxAmount'].append(
|
|
'{:.2f}'.format(account_tax - tax_total - es_tax - credit_tax))
|
|
if new_round(account_tax - tax_total - es_tax - credit_tax * 0.06) == 0:
|
|
fill_data['TaxType'].append('None')
|
|
else:
|
|
fill_data['TaxType'].append('VAT')
|
|
fill_data['TotalCost'].append(
|
|
'{:.2f}'.format(account_tax - tax_total - es_tax - credit_tax))
|
|
data = pd.DataFrame(fill_data, columns=list(data.columns))
|
|
return data
|
|
|
|
def get_credit(self, data):
|
|
for i in range(data.shape[0]):
|
|
self.credit += float(data.iloc[i]['Credits'])
|
|
|
|
def get_account(self, data):
|
|
for i in range(data.shape[0]):
|
|
self.account_dict[data.iloc[i]['LinkedAccountId']
|
|
] += float(data.iloc[i]['CostBeforeTax'])
|
|
self.tax_dict[data.iloc[i]['LinkedAccountId']
|
|
] += float(data.iloc[i]['TaxAmount'])
|
|
self.beforetax += float(data.iloc[i]['CostBeforeTax'])
|
|
|
|
def get_credit_dict(self):
|
|
for k, v in self.account_dict.items():
|
|
self.credit_dict[k] = v / self.beforetax * self.credit
|
|
|
|
@staticmethod
|
|
def save_csv(path, data):
|
|
data.to_csv(path, mode='a', index=False, encoding='UTF-8', header=0)
|
|
|
|
@staticmethod
|
|
def save_csv_first(path, data):
|
|
data.to_csv(path, mode='w', encoding='UTF-8', index=False)
|
|
|
|
def save_lineitem_data(self):
|
|
del_dict = self.set_deleted_data()
|
|
with open(self.new_filename, 'w', encoding='UTF-8') as w:
|
|
w.write('"' + '","'.join(list(csv_columns2)) + '"' + '\n')
|
|
num = 0
|
|
for path in self.csv_list:
|
|
with open(path, 'r', encoding='UTF-8') as r:
|
|
while True:
|
|
line = r.readline()
|
|
if not line:
|
|
break
|
|
data = line.split('","')
|
|
if len(data) < 2:
|
|
continue
|
|
line_dict = dict(zip(list(csv_columns2), data))
|
|
# 通过 InvoiceID判断时会有问题
|
|
if line_dict['RecordType'] in del_dict['RecordType']:
|
|
continue
|
|
elif line_dict['ItemDescription'] in del_dict['ItemDescription']:
|
|
continue
|
|
elif line_dict['ProductName'] == del_dict['ProductName']:
|
|
continue
|
|
elif line_dict['PayerAccountId'] == del_dict['PayerAccountId']:
|
|
continue
|
|
w.write('","'.join(data))
|
|
num += 1
|
|
# print(num)
|
|
print('LineItem数据加载完成')
|
|
|
|
def save_alter_data_to_csv(self, alter_data):
|
|
_data = alter_data.fillna('').to_dict(orient='list')
|
|
with open(self.new_filename, 'a', encoding='utf-8') as a:
|
|
dbr_dict = {v: int(k) for k, v in enumerate(list(csv_columns2))}
|
|
for i in range(len(list(_data.values())[0])):
|
|
empty_data = ["" for _ in range(len(list(csv_columns2)))]
|
|
for k, v in _data.items():
|
|
empty_data[dbr_dict[k]] = v[i]
|
|
a.write(','.join('"{}"'.format(x) for x in empty_data) + '\n')
|
|
|
|
class GenerateWord(object):
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
self.args = args
|
|
self.kwargs = kwargs
|
|
|
|
def my_sum(self, data_dict, loc, length=2):
|
|
# 模板内使用
|
|
count = 0.0
|
|
for item in data_dict.values():
|
|
x = self.reverse_fill(item[loc])
|
|
count += x
|
|
a = new_round(count, length)
|
|
return a
|
|
|
|
@staticmethod
|
|
def reverse_fill(value_str: float, length=6):
|
|
return new_round(value_str, length)
|
|
|
|
@staticmethod
|
|
def get_region_display(data: dict):
|
|
for i in data.keys():
|
|
if i == 'cn-north-1':
|
|
data['北京区资源费'] = data.pop(i)
|
|
elif i == 'cn-northwest-1':
|
|
data['宁夏区资源费'] = data.pop(i)
|
|
return data
|
|
|
|
@staticmethod
|
|
def get_date_array(_date: str):
|
|
_array = monthrange(int(_date[:4]), int(_date[5:7]))
|
|
return [
|
|
"%s-%s-01" % (_date[:4], _date[5:7]),
|
|
"%s-%s-%2d" % (_date[:4], _date[5:7], _array[1]),
|
|
]
|
|
|
|
@staticmethod
|
|
def fill(value, length=2):
|
|
temp = "{:,.%sf}" % length
|
|
if isinstance(value, str):
|
|
value = value.replace(',', '')
|
|
return temp.format(float(value))
|
|
elif isinstance(value, int) or isinstance(value, float):
|
|
return temp.format(value)
|
|
else:
|
|
return temp.format(0)
|
|
|
|
@staticmethod
|
|
def display_time(day_str, str_fmt="%Y-%m-%d %H:%M:%S"):
|
|
# 设置中文编码格式
|
|
# import locale
|
|
# locale.setlocale(locale.LC_CTYPE, 'chinese')
|
|
zh_fmt = '%Y年%m月%d日'
|
|
# en_fmt = '%B %d, %Y'
|
|
return time.strftime(zh_fmt, time.strptime(str(day_str), str_fmt))
|
|
|
|
def get_currency(self):
|
|
if self.kwargs.get("currency") == 'USD':
|
|
return "$"
|
|
return "¥"
|
|
|
|
def get_temple_number(self, number):
|
|
data = [
|
|
self.fill(number * 1.06),
|
|
self.fill(number),
|
|
self.fill(0),
|
|
self.fill(number * 0.06),
|
|
]
|
|
return data
|
|
|
|
@staticmethod
|
|
def check_path_creat(path):
|
|
if not os.path.exists(path):
|
|
os.mkdir(path)
|
|
|
|
class GenerateWordFromCSV(GenerateWord):
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
super().__init__(*args, **kwargs)
|
|
self.csv_list = []
|
|
self.total = 0
|
|
self.credit_total = 0
|
|
self.es_total = 0
|
|
self.linked_dict = dict()
|
|
self.filepath = list(self.kwargs['csv_list'])[0][:-4]
|
|
self.init_csv_list()
|
|
|
|
def init_csv_list(self):
|
|
for i in self.kwargs['csv_list']:
|
|
self.csv_list.append(os.path.join(data_dir, i))
|
|
|
|
def data_mergence(self):
|
|
df = pd.DataFrame()
|
|
for csv in self.csv_list:
|
|
csv_data = pd.read_csv(
|
|
csv,
|
|
sep=',',
|
|
encoding='utf-8',
|
|
header=None,
|
|
names=csv_columns2,
|
|
dtype=data_type)
|
|
df = pd.concat([df, csv_data], axis=0)
|
|
df.reset_index(drop=True)
|
|
return df
|
|
|
|
def get_data_form_csv(self):
|
|
df = self.data_mergence()
|
|
df['CostBeforeTax'] = df['CostBeforeTax'].apply(
|
|
pd.to_numeric, errors='coerce').fillna(0.0)
|
|
df['Credits'] = df['Credits'].apply(
|
|
pd.to_numeric, errors='coerce').fillna(0.0)
|
|
df['TaxAmount'] = df['TaxAmount'].apply(
|
|
pd.to_numeric, errors='coerce').fillna(0.0)
|
|
df['TotalCost'] = df['TotalCost'].apply(
|
|
pd.to_numeric, errors='coerce').fillna(0.0)
|
|
lineitem_df = df[((df['RecordType'] == 'LinkedLineItem')
|
|
& (df['ItemDescription'] != '11-11 promotion for new-signup 3')
|
|
& (df['ItemDescription'] != '11-11 promotion for new-signup')
|
|
& (df['ItemDescription'] != 'PD 7180')
|
|
& (df['ItemDescription'] != 'PD 7398')
|
|
& (df['ProductName'] != 'Amazon Premium Support'))]
|
|
es_df = df[(df['RecordType'] == 'LinkedLineItem') & (
|
|
df['ProductName'] == 'Amazon Premium Support')]
|
|
credit_data = df[(df['RecordType'] == 'LinkedLineItem')
|
|
& ((df['ItemDescription'] == '11-11 promotion for new-signup 3')
|
|
| (df['ItemDescription'] == '11-11 promotion for new-signup')
|
|
| (df['ItemDescription'] == 'PD 7180')
|
|
| (df['ItemDescription'] == 'PD 7398'))
|
|
]
|
|
self.get_es_total(es_df)
|
|
self.get_credit_total(credit_data)
|
|
self.get_linkedaccount_dict(lineitem_df)
|
|
print('消费总额为{}'.format(self.total))
|
|
print('Credit总额为{}'.format(self.credit_total))
|
|
print('ES总额为{}'.format(self.es_total))
|
|
self.save_mapping_date_to_csv()
|
|
for i in self.linked_dict:
|
|
self.linked_dict[i]['AWS Support (Enterprise)'] = \
|
|
self.linked_dict[i]['TotalCost'] / self.total * self.es_total
|
|
self.linked_dict[i]['AWS Credits'] = self.linked_dict[i]['TotalCost'] / \
|
|
self.total * self.credit_total
|
|
self.create_word(i)
|
|
|
|
def get_credit_total(self, data):
|
|
for i in range(data.shape[0]):
|
|
self.credit_total += float(data.iloc[i]['Credits'])
|
|
|
|
def get_es_total(self, data):
|
|
for i in range(data.shape[0]):
|
|
self.es_total += float(data.iloc[i]['CostBeforeTax'])
|
|
|
|
def get_linkedaccount_dict(self, data):
|
|
linked_set = set(data['LinkedAccountId'])
|
|
for i in range(data.shape[0]):
|
|
self.total += float(data.iloc[i]['CostBeforeTax'])
|
|
for i in linked_set:
|
|
linked_data = data[data['LinkedAccountId'] == i]
|
|
product_set = set(data['ProductName'])
|
|
linked_dict = {
|
|
'InvoiceID': linked_data.iloc[0]['InvoiceID'],
|
|
'TotalCost': linked_data['CostBeforeTax'].sum(),
|
|
}
|
|
for j in product_set:
|
|
linked_dict[j] = linked_data[linked_data['ProductName']
|
|
== j]['CostBeforeTax'].sum()
|
|
if i in self.linked_dict.keys():
|
|
for k, v in linked_dict.items():
|
|
if not k == 'InvoiceID':
|
|
product_num = self.linked_dict[i].get(k, 0)
|
|
self.linked_dict[i][k] = float(product_num) + float(v)
|
|
else:
|
|
self.linked_dict[i] = linked_dict
|
|
|
|
@staticmethod
|
|
def get_template():
|
|
return os.path.join(info_dir, 'china-template-2.docx')
|
|
|
|
def create_word(self, linked):
|
|
filedate, noticedate, due_data = from_filename_get_datetime(self.filepath)
|
|
result = self.linked_dict.get(linked, None)
|
|
if not result:
|
|
print('{}数据不存在'.format(linked))
|
|
invoiceid = result.pop('InvoiceID')
|
|
totalcost = result.pop('TotalCost')
|
|
product_code_dict = defaultdict()
|
|
for k, v in result.items():
|
|
product_code_dict[k] = super().get_temple_number(v)
|
|
totalcost_li = self.get_temple_number(totalcost)
|
|
date = self.get_date_array(filedate)
|
|
for i in range(len(date)):
|
|
date[i] = self.display_time(date[i], str_fmt="%Y-%m-%d")
|
|
xx = self.get_template()
|
|
template = DocxTemplate(xx)
|
|
content = {
|
|
'storage': self.get_storage(linked),
|
|
'linkedid': linked,
|
|
'noticenumber': invoiceid,
|
|
'noticedate': noticedate,
|
|
'currency': self.get_currency(),
|
|
'region_display': '北京区资源费',
|
|
'totalcost': totalcost_li,
|
|
'product_code_dict': product_code_dict,
|
|
'date': date,
|
|
'workday': due_data,
|
|
}
|
|
file_name = '{}{}_AWS账单{}.docx'.format(
|
|
self.get_storage(linked), linked, filedate)
|
|
doc_file_path = os.path.join(word_dir, self.filepath)
|
|
self.check_path_creat(doc_file_path)
|
|
doc_file_name = os.path.join(doc_file_path, file_name)
|
|
template.render(context=content)
|
|
template.save(doc_file_name)
|
|
print('{}{}_AWS账单{}.docx 生成完成'.format(self.get_storage(linked), linked, filedate))
|
|
|
|
@staticmethod
|
|
def get_storage(linkedid):
|
|
for k, v in bwm_map.items():
|
|
if linkedid in v:
|
|
return k
|
|
return ''
|
|
|
|
def get_temple_number(self, num):
|
|
data = [
|
|
self.fill(
|
|
((num / self.total * self.es_total) + num + num / self.total * self.credit_total) * 1.06),
|
|
self.fill(
|
|
(num / self.total * self.es_total) + num),
|
|
self.fill(
|
|
num / self.total * self.credit_total),
|
|
self.fill(
|
|
((num / self.total * self.es_total) + num + num / self.total * self.credit_total) * 0.06),
|
|
]
|
|
return data
|
|
|
|
def save_mapping_date_to_csv(self):
|
|
filename = os.path.join(
|
|
alter_csv_dir, '{}_mapping.csv'.format(self.filepath))
|
|
with open(filename, 'w', encoding='utf-8') as w:
|
|
w.write(','.join('"{}"'.format(x) for x in ['mapping', 'LinkedAccountId', 'UnBlendedCost']) + '\n')
|
|
for key, value in self.linked_dict.items():
|
|
mapping = ''
|
|
for k, v in bwm_map.items():
|
|
if key in v:
|
|
mapping = k
|
|
UnBlendedCost = '{:.6f}'.format((value['TotalCost'] * 1.06) +
|
|
(value['TotalCost'] / self.total * self.es_total) +
|
|
(value['TotalCost'] / self.total * self.es_total * 0.06) +
|
|
(value['TotalCost'] / self.total * self.credit_total) +
|
|
(value['TotalCost'] / self.total * self.credit_total * 0.06))
|
|
w.write(','.join('"{}"'.format(x) for x in [mapping, key, UnBlendedCost]) + '\n')
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
# 获取输入执行时输入参数
|
|
month = '2024-06'
|
|
if len(sys.argv) > 1:
|
|
month = sys.argv[1]
|
|
print('执行月份:' + month)
|
|
csv_list = ['785781180852-aws-cost-allocation-ACTS-'+month+'.csv',
|
|
'785781180852-aws-cost-allocation-ACTS-Ningxia-'+month+'.csv',]
|
|
# 1.1 循环附件名
|
|
for csv in csv_list:
|
|
# print(os.path.isfile('./'+csv))
|
|
# 1.1.1附件不存在,则拉取
|
|
if not os.path.isfile('./' + csv):
|
|
print('附件不存在:', csv)
|
|
auto_download_file_form_s3([csv])
|
|
|
|
csv_typle = (
|
|
'785781180852-aws-cost-allocation-ACTS-'+month+'.csv',
|
|
'785781180852-aws-cost-allocation-ACTS-Ningxia-'+month+'.csv',
|
|
)
|
|
CustomCsvByDay(csv_typle).create_new_csv()
|
|
GenerateWordFromCSV(csv_list=csv_typle).get_data_form_csv() |