You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

931 lines
36 KiB

# -*- coding: utf-8 -*-
import os
from collections import defaultdict
import pandas as pd
import numpy as np
import openpyxl
import time
from pandas import Index
from docxtpl import DocxTemplate
from calendar import monthrange
import datetime
import calendar
# -*- 跑2022-10月脚本 -*-
alter_dbr_dir = '/server/billing/daimler/alter_data'
data_dir = '/server/billing/daimler/data'
merge_file_dir = '/server/billing/daimler/merge_data'
word_dir = '/server/billing/daimler/word'
info_dir = '/server/billing/daimler/info'
daimler_map = {
"DGRC": {
'032861711361',
'032968694919',
'033176839117',
'412333074862',
'412592672283',
'413185959261',
'571509464092',
'571914751254',
'572500455038',
'604214497778',
'605281233780',
'605492383956',
'606333019050',
'607073490803',
'607215769011',
'607251498050',
'745178511853',
'745186985354',
'745404617525',
'843064179036',
'843146470903',
'843403612003',
'938026107066',
'959875115713',
'846363994602',
'847121021275',
'847349672033',
'847535715113',
'787753559055',
'157425357330',
'157571289045',
'222510028891',
'222726652783',
'394747274994',
'641166177863',
'641228128543',
'150264918770',
'150265876719',
'154308107197',
'154344974850',
'154396341553',
'154377832036'
},
"BMBS": {
'005952357992',
'006618809156',
'008855685915',
'099609831577',
'149729312152',
'158107609377',
'158515190244',
'158636931693',
'160478048914',
'161725851224',
'179818971150',
'180063110126',
'180280548979',
'259625653064',
'263249970047',
'263902424712',
'264554558165',
'381109879703',
'381965952467',
'382093053518',
'420190688017',
'420235015001',
'420347211434',
'468973200212',
'470975648111',
'471722715986',
'606431034865',
'606635571449',
'614449149933',
'614450235268',
'662090766598',
'662661601741',
'680011596858',
'693339510868',
'693619437440',
'693700422527',
'694079957533',
'694096809975',
'694098823346',
'720819555472',
'720826711875',
'720959775198',
'721033167150',
'721152002098',
'721305584708',
'731145591192',
'738568990634',
'738993122676',
'824698755435',
'824850862427',
'825095481624',
'830723988525',
'830765046968',
'833723158764',
'845876480170',
'846152349690',
'846800936473',
'864364323687',
'864518602649',
'864710874555',
'902795390617',
'939002646100',
'965407060405',
'965453987633',
'965517804909',
'060167615401',
'076513887860',
'420626856655',
'421314535486',
'329076050796',
'329159661224',
'376544640131',
'376560970070',
'585581760571',
'586490397677',
'836190164454',
'836212206490',
'444387297159',
'444466323067',
'551526584239',
'551543229458',
'603217519688',
'623235613882',
'633869915042',
'003518598515',
'003606583618',
'634001137982',
'721172726690',
'723527083861'
},
"BMBS-Rollout": {
'111035368608',
'143293953769',
'143419428863'
},
"BMBS-RD": {
'814940158343 ',
'815152760759 ',
'815217109270 ',
'815218645700 ',
'815470951043 ',
'815623564417 ',
'815665691798 ',
'815773864716 ',
'815919514877 ',
'816040880523'
},
"DGRC-Rollout": {
'157701174715',
'157708043020',
'157784268223',
'157844458947',
'157876968059',
'157972004829',
'158027931531',
'767680705323',
'778776044487',
'778911054371',
'779093734419',
'779265673788',
'779378744728',
'779435304058',
'779482782114',
'779517320026',
'779563825200',
},
"MBCL": {
'115032441317',
'115159631000'
},
}
dbr_index = Index(['InvoiceID',
'PayerAccountId',
'LinkedAccountId',
'RecordType',
'RecordId',
'ProductName',
'RateId',
'SubscriptionId',
'PricingPlanId',
'UsageType',
'Operation',
'AvailabilityZone',
'ReservedInstance',
'ItemDescription',
'UsageStartDate',
'UsageEndDate',
'UsageQuantity',
'BlendedRate',
'BlendedCost',
'UnBlendedRate',
'UnBlendedCost',
'ResourceId'],
dtype='object')
data_type = {
'UsageStartDate': np.object,
'UsageEndDate': np.object,
'InvoiceID': np.object,
'ProductName': np.object,
'PayerAccountId': np.object,
'LinkedAccountId': np.object,
'UnBlendedCost': str,
'BlendedCost': str,
'ItemDescription': np.object,
'RecordType': np.object
}
def check_path_creat(path):
"""
检查路径是否存在,不存在就创建
:param path: 文件路径
:return:
"""
if not os.path.exists(path):
os.mkdir(path)
def del_file_form_path(filename: str, path: str):
if filename in os.listdir(path):
os.remove(os.path.join(merge_file_dir, filename))
def new_round(_float, _len=2) -> float:
"""
Parameters
----------
_float:
_len: int, 指定四舍五入需要保留的小数点后几位数为_len
Returns
-------
type ==> float, 返回四舍五入后的值
"""
try:
if isinstance(_float, float):
if 'e' in str(_float).lower():
return round(_float, _len)
elif str(_float)[::-1].find('.') <= _len:
return _float
elif str(_float)[-1] == '5':
return round(float(str(_float)[:-1] + '6'), _len)
else:
return round(_float, _len)
elif isinstance(_float, str):
_float = _float.replace(',', '')
return new_round(float(_float), _len)
else:
return round(_float, _len)
except Exception as e:
print(_float, _len, e)
def merge_file(filetuple: tuple):
for file in filetuple:
filename = list(file)[0]
del_file_form_path(filename, merge_file_dir)
for f in file:
filepath = os.path.join(data_dir, f)
alterpath = os.path.join(merge_file_dir, filename)
with open(filepath, 'r', encoding='utf-8') as r, open(alterpath, 'a', encoding='utf-8') as a:
num = 0
while True:
line = r.readline()
if not line:
break
num += 1
a.write(line)
if num % 10000 == 0:
print(num)
print('{}合并完成'.format(f))
def from_filename_get_datetime(filename: str):
dt = filename[-7:]
date = datetime.datetime.strptime(dt, "%Y-%m")
days = calendar.monthrange(date.year, date.month)
later1dt = date + datetime.timedelta(days[1] + 1)
later1 = later1dt.strftime("%Y-%m-%d")
n_days = calendar.monthrange(later1dt.year, later1dt.month)
later2dt = later1dt + datetime.timedelta(n_days[1])
later2 = later2dt.strftime("%Y-%m-%d")
return dt, later1, later2
class GenerateWord(object):
def __init__(self, *args, **kwargs):
self.args = args
self.kwargs = kwargs
def my_sum(self, data_dict, loc, length=2):
# 模板内使用
count = 0.0
for item in data_dict.values():
x = self.reverse_fill(item[loc])
count += x
a = new_round(count, length)
return a
@staticmethod
def reverse_fill(value_str: float, length=6):
return new_round(value_str, length)
@staticmethod
def get_bank(**kwargs):
info_map = {
'ch bj': os.path.join(info_dir, 'cn_bank_bj.txt'),
'ch hk': os.path.join(info_dir, 'cn_bank_hk.txt'),
'en bj': os.path.join(info_dir, 'en_bank_bj.txt'),
'en hk': os.path.join(info_dir, 'en_bank_hk.txt')
}
return info_map.get(kwargs.get('bank')) or info_map['ch bj']
@staticmethod
def get_region_display(data: dict):
for i in data.keys():
if i == 'cn-north-1':
data['北京区资源费'] = data.pop(i)
elif i == 'cn-northwest-1':
data['宁夏区资源费'] = data.pop(i)
return data
@staticmethod
def get_date_array(_date: str):
_array = monthrange(int(_date[:4]), int(_date[5:7]))
return [
"%s-%s-01" % (_date[:4], _date[5:7]),
"%s-%s-%2d" % (_date[:4], _date[5:7], _array[1]),
]
@staticmethod
def fill(value, length=2):
temp = "{:,.%sf}" % length
if isinstance(value, str):
value = value.replace(',', '')
return temp.format(float(value))
elif isinstance(value, int) or isinstance(value, float):
return temp.format(value)
else:
return temp.format(0)
@staticmethod
def display_time(day_str, str_fmt="%Y-%m-%d %H:%M:%S"):
# 设置中文编码格式
# import locale
# locale.setlocale(locale.LC_CTYPE, 'chinese')
zh_fmt = '%Y年%m月%d'
# en_fmt = '%B %d, %Y'
return time.strftime(zh_fmt, time.strptime(str(day_str), str_fmt))
def get_currency(self):
if self.kwargs.get("currency") == 'USD':
return "$"
return ""
def get_temple_number(self, number):
data = [
self.fill(number * 1.06),
self.fill(number),
self.fill(0),
self.fill(number * 0.06),
]
return data
@staticmethod
def check_path_creat(path):
if not os.path.exists(path):
os.mkdir(path)
class DaimlerGenerateWord(GenerateWord):
def __init__(self, linkedid, result, filename, *args, **kwargs):
super().__init__(*args, **kwargs)
self.linkedid = linkedid
self.result = result
self.filename = filename
self.link_dict = kwargs.get('link_dict', None)
def create_word(self):
filedate, noticedate, due_data = from_filename_get_datetime(self.filename)
noticenumber = self.result.pop('InvoiceID')
totalcost = self.result.pop('TotalCost')
product_code_dict = defaultdict()
# order_dict = dict(sorted(self.result.items(), key=lambda t: t[1], reverse=True))
# for k, v in order_dict.items():
for k, v in self.result.items():
product_code_dict[k] = self.get_temple_number(v)
totalcost_li = self.get_temple_number(totalcost)
date = self.get_date_array(filedate)
for i in range(len(date)):
date[i] = self.display_time(date[i], str_fmt="%Y-%m-%d")
bank_info_list = []
with open(self.get_bank(), 'r', encoding='utf8') as f:
for line in f.readlines():
if "{{company_email}}" in line:
line = line.replace(
'{{company_email}}', self.kwargs.get(
'company_email', ''))
bank_info_list.append(line.strip('\n'))
xx = self.get_template()
template = DocxTemplate(xx)
link_data = [{
'linkedid': self.linkedid,
'totalcost': totalcost_li,
'product_code_dict': product_code_dict,
},
{
'linkedid': self.linkedid,
'totalcost': totalcost_li,
'product_code_dict': product_code_dict,
}
]
content = {
'storage': self.get_storage(),
'linkedid': self.linkedid,
'noticenumber': noticenumber,
'noticedate': noticedate,
'currency': self.get_currency(),
'region_display': '北京区资源费',
'totalcost': totalcost_li,
'product_code_dict': product_code_dict,
'date': date,
'workday': due_data,
'bank_info_list': bank_info_list,
'link_data': link_data,
}
file_name = '{}{}_AWS账单{}.docx'.format(
self.get_storage(), self.linkedid, filedate)
doc_file_path = os.path.join(word_dir, self.filename)
self.check_path_creat(doc_file_path)
doc_file_name = os.path.join(doc_file_path, file_name)
template.render(context=content)
template.save(doc_file_name)
print(
'{}{}_AWS账单{}.docx 生成完成'.format(
self.get_storage(),
self.linkedid,
filedate))
return [doc_file_name]
@staticmethod
def get_template():
return os.path.join(info_dir, 'china-template.docx')
def get_storage(self):
for k, v in daimler_map.items():
if self.linkedid in v:
return k
return ''
class CustomCsvByHour(object):
def __init__(self, pathlist, *args, **kwargs):
super().__init__(*args, **kwargs)
self.path = [pathlist]
self.finally_dict = defaultdict(float)
self.sourcename = None
self.es_iid = None
self.es_total = 0
self.edp_iid = ''
self.edp_total = 0
self.exclude_id = list()
self.unblendedcost = 0
self.payer = None
self.accountdt = pd.DataFrame()
self.statementdt = pd.DataFrame()
self.name = None
self.filename = None
self.partname = None
self.filepath = None
self.word_dict = defaultdict()
self.init_edp_dict = defaultdict()
self.es_data = None
# self.dbr_index = None
def set_deleted_data(self):
del_dict = {
'InvoiceID': self.exclude_id,
'RecordType': ['AccountTotal', 'StatementTotal', 'Rounding'],
'ItemDescription': 'AWS Solution Provider Program Discount',
'PayerAccountId': 'PayerAccountId'
}
return del_dict
def init_dbr_fill_data(self, path):
self.sourcename = path
self.filename = '改_{}'.format(path)
self.partname = '修改部分_{}'.format(path)
def get_dbr_data(self, path, name):
self.filepath = path
# ============================================================
# add_20221113_5094
self.isfile_5094 = False
if '563347375094' in self.filepath:
print('{} is 5094'.format(self.filepath))
self.isfile_5094 = True
# ============================================================
self.name = name
self.init_dbr_fill_data(name)
check_path_creat(os.path.join(alter_dbr_dir, self.sourcename[:-4]))
es_rule = 'Enterprise Support for month of'
reader = pd.read_csv(
path,
iterator=True,
sep=',',
chunksize=10000,
header=None,
names=dbr_index,
dtype=data_type,
low_memory=False)
num = 0
for result in reader:
word_dict = defaultdict(dict)
_edp_dict = defaultdict(dict)
num += 1
print('read', num)
# if not self.dbr_index:
# self.dbr_index = result.columns.tolist()
result['UnBlendedCost'] = result['UnBlendedCost'].apply(
pd.to_numeric, errors='coerce').fillna(0.0)
'''
读取(筛选)account信息 确定当前是5094这个账号
ItemDescription字段里面包括credit且
UnBlendedCost<0得数据不再参与计算
'''
# ==================================================================================================
# add_20221113_5094
if self.isfile_5094 == True:
result = result[~((result['ItemDescription'].astype(str).str.contains('credit') |
result['ItemDescription'].astype(str).str.contains('Credit')) &
(result['UnBlendedCost'].astype(float) < 0))]
# ==================================================================================================
# result['ItemDescription'] = result['ItemDescription'].fillna('')
data = result[~result['ItemDescription'].str.contains(
'AWS Solution Provider Program Discount')]
# print(data)
data = data[~data['ItemDescription'].str.contains('POC 6337')]
asppd_data = result[(result['ItemDescription'].str.contains('AWS Solution Provider Program Discount')) |
(result['ItemDescription'].str.contains('AWS SOLUTION PROVIDER PROGRAM DISCOUNT'))]
self.exclude_id.extend(list(set(asppd_data['InvoiceID'])))
exclude_data = data[data['ProductName'] == 'Amazon Premium Support']
self.exclude_id.extend(list(exclude_data['InvoiceID']))
spp_data = data[data['ItemDescription'] == 'Billing error - SPP overdiscount issued for November period']
self.exclude_id.extend(list(spp_data['InvoiceID']))
if not exclude_data[exclude_data['ItemDescription']
== 'Enterprise Program Discount'].empty:
self.edp_total = exclude_data[exclude_data['ItemDescription']
== 'Enterprise Program Discount']['UnBlendedCost'].sum()
self.edp_iid = exclude_data[exclude_data['ItemDescription']
== 'Enterprise Program Discount'].iloc[0]['InvoiceID']
if not exclude_data[exclude_data['ItemDescription'].str.startswith(es_rule)].empty:
self.es_total += exclude_data[exclude_data['ItemDescription'].str.startswith(
es_rule)]['UnBlendedCost'].sum()
if not self.es_iid:
self.es_iid = exclude_data[exclude_data['ItemDescription'].str.startswith(
es_rule)].iloc[0]['InvoiceID']
self.es_data = exclude_data[exclude_data['ItemDescription'].str.startswith(
es_rule)].iloc[0]['ItemDescription']
# 去除不展示的部分
dt = data[((data['RecordType'] == 'LineItem') & (data['LinkedAccountId'] != '') &
(~data['InvoiceID'].isin(self.exclude_id))) |
((data['RecordType'] == 'InvoiceTotal') & (data['LinkedAccountId'] == '') &
(~data['InvoiceID'].isin(self.exclude_id)))]
# 去除不参与计算的部分
np_dt = dt.loc[dt['ProductName'].notna()]
linkedaccountid_set = set(np_dt['LinkedAccountId'])
productname_set = set(np_dt['ProductName'])
account_dt = data[data['RecordType'] == 'AccountTotal']
statement_dt = data[data['RecordType'] == 'StatementTotal']
self.unblendedcost += np_dt['UnBlendedCost'].sum()
for i in linkedaccountid_set:
_linked_data = np_dt[np_dt['LinkedAccountId'] == i]
self.finally_dict[i] += _linked_data['UnBlendedCost'].sum()
edp_data = _linked_data[_linked_data['ItemDescription'] == 'Enterprise Program Discount']
l_data = _linked_data[_linked_data['ItemDescription'] != 'Enterprise Program Discount']
_edp_dict[i] = edp_data['UnBlendedCost'].sum()
for p in productname_set:
word_dict[i][p] = l_data[l_data['ProductName'] == p]['UnBlendedCost'].sum()
word_dict[i]['TotalCost'] = _linked_data['UnBlendedCost'].sum()
word_dict[i]['InvoiceID'] = _linked_data.iloc[0]['InvoiceID']
for k, v in word_dict.items():
if k in self.word_dict.keys():
for c_k, c_v in v.items():
if c_k != 'InvoiceID':
chunk_c_k = self.word_dict[k].get(c_k, 0)
self.word_dict[k][c_k] = chunk_c_k + c_v
else:
self.word_dict[k] = v
for k, v in _edp_dict.items():
if k in self.init_edp_dict.keys():
self.init_edp_dict[k] += v
else:
self.init_edp_dict[k] = v
self.accountdt = pd.concat([self.accountdt, account_dt], axis=0)
self.statementdt = pd.concat(
[self.statementdt, statement_dt], axis=0)
for k, v in self.word_dict.items():
v['Enterprise Program Discount'] = self.init_edp_dict.get(
k, 0) + v.get('TotalCost', 0) / self.unblendedcost * self.edp_total
v['Enterprise Support'] = v.get('TotalCost', 0) / self.unblendedcost * self.es_total
v['TotalCost'] = v.get('TotalCost', 0) + v.get('TotalCost', 0) / self.unblendedcost * self.es_total + \
v.get('TotalCost', 0) / self.unblendedcost * self.edp_total
# print(self.word_dict)
for k, v in self.word_dict.items():
DaimlerGenerateWord(k, v, name[:-4]).create_word()
self.save_lineitem_data()
self.init_part_file()
self.save_alter_data()
self.save_mapping_date_to_csv()
self.save_mapping()
def save_lineitem_data(self):
del_dict = self.set_deleted_data()
filepath = os.path.join(
alter_dbr_dir, self.sourcename[:-4], self.filename)
with open(filepath, 'w', encoding='utf8') as w, open(self.filepath, 'r', encoding='utf8') as r:
w.write('"' + '","'.join(list(dbr_index)) + '"' + '\n')
num = 0
for line in r:
num += 1
if num % 10000 == 0:
print(num)
line_data = line.replace('"', '')
line_dict = dict(zip(list(dbr_index), line_data.split(',')))
if line_dict['InvoiceID'] in del_dict['InvoiceID']:
continue
elif line_dict['RecordType'] in del_dict['RecordType']:
continue
elif line_dict['ItemDescription'].startswith(del_dict['ItemDescription']):
continue
elif line_dict['ItemDescription'] == 'POC 6337':
continue
elif line_dict['PayerAccountId'] == del_dict['PayerAccountId']:
continue
# ============================================================
# add_20221113_5094
elif ('credit' in line_dict['ItemDescription'] or
'Credit' in line_dict['ItemDescription']) and \
(float(line_dict['UnBlendedCost'])<0) and \
self.isfile_5094==True:
continue
# ============================================================
w.write(line)
print('LineItem数据加载完成')
def save_alter_data(self):
print('ES总数为{}'.format(self.es_total))
print('EDP总数为{}'.format(self.edp_total))
print('消费总额为{}'.format(self.unblendedcost))
print(self.finally_dict)
print(self.exclude_id)
fill_data = self.get_fill_data(self.name)
self.save_data_to_dbr(fill_data)
self.add_round_to_dbr()
accountdt = self.alter_account_data(self.accountdt)
accountdt_dt = accountdt.to_dict(orient='list')
self.save_data_to_dbr(accountdt_dt)
statementdt = self.alter_statement_data(self.statementdt)
statementdt_dt = statementdt.to_dict(orient='list')
self.save_data_to_dbr(statementdt_dt)
print('{}文件修改完成'.format(self.sourcename))
def alter_account_data(self, data):
data = data.drop_duplicates(
subset=['LinkedAccountId'],
keep='first',
inplace=False)
data = data.reset_index(drop=True)
for i in range(data.shape[0]):
cost = self.finally_dict.get(data.loc[i, 'LinkedAccountId'], 0)
if data.loc[i, 'LinkedAccountId'] == self.payer:
data.loc[i, 'UnBlendedCost'] = '{:.6f}'.format((self.finally_dict.get(self.payer, 0) * 1.06) +
(cost / self.unblendedcost * self.es_total) +
(cost / self.unblendedcost * self.edp_total) +
(cost / self.unblendedcost * self.edp_total * 0.06))
else:
data.loc[i, 'UnBlendedCost'] = '{:.6f}'.format((cost * 1.06) +
(cost / self.unblendedcost * self.es_total) +
(cost / self.unblendedcost * self.es_total * 0.06) +
(cost / self.unblendedcost * self.edp_total) +
(cost / self.unblendedcost * self.edp_total * 0.06))
return data.fillna('')
def alter_statement_data(self, data):
data = data.drop_duplicates(
subset=['PayerAccountId'],
keep='first',
inplace=False)
data = data.reset_index(drop=True)
for i in range(data.shape[0]):
data.loc[i, 'UnBlendedCost'] = (
self.unblendedcost + self.es_total + self.edp_total) * 1.06
return data.fillna(value='')
def get_fill_data(self, name):
payer = name.split('-')[0]
self.payer = payer
date = name[-11:-4]
insert_data = defaultdict(list)
startdate, enddate = self.get_month_date(date)
for k, v in self.finally_dict.items():
insert_data['InvoiceID'].extend(
[self.es_iid, self.es_iid, self.edp_iid, self.edp_iid])
insert_data['PayerAccountId'].extend([payer, payer, payer, payer])
insert_data['LinkedAccountId'].extend([k, k, k, k])
insert_data['RecordType'].extend(['LineItem' for _ in range(4)])
insert_data['ProductName'].extend(
['Amazon Premium Support', '', 'Amazon Premium Support', ''])
insert_data['UsageStartDate'].extend([startdate for _ in range(4)])
insert_data['UsageEndDate'].extend([enddate for _ in range(4)])
insert_data['ItemDescription'].extend(
[
self.es_data,
'税金 VAT 类型',
'Enterprise Program Discount',
'税金 VAT 类型'])
insert_data['UnBlendedCost'].extend([
'{:.6f}'.format(v / self.unblendedcost * self.es_total),
'{:.6f}'.format(v / self.unblendedcost * self.es_total * 0.06),
'{:.6f}'.format(v / self.unblendedcost * self.edp_total),
'{:.6f}'.format(v / self.unblendedcost * self.edp_total * 0.06),
])
# data = pd.DataFrame(insert_data, columns=list(self.dbr_index))
return insert_data
def get_round_fill_data(self, name):
payer = name.split('-')[0]
self.payer = payer
insert_data = defaultdict(list)
linkedaccountid = 843403612003
insert_data['PayerAccountId'].extend([payer for _ in range(4)])
insert_data['LinkedAccountId'].extend(
[linkedaccountid for _ in range(4)])
insert_data['RecordType'].extend(['Rounding' for _ in range(4)])
insert_data['ItemDescription'].extend(
['Rounding of 563347375094',
'Rounding of 563646727715',
'Rounding of 565095721352',
'Rounding of 011562250191'])
insert_data['InvoiceID'].extend(
['1341453335',
'1341519427',
'1341507711',
'1341422927'])
insert_data['UnBlendedCost'].extend([
'{:.6f}'.format(-8.27),
'{:.6f}'.format(-1.86),
'{:.6f}'.format(-0.07),
'{:.6f}'.format(-0.02),
])
data = pd.DataFrame(insert_data, columns=list(dbr_index))
return data.fillna('')
def save_new_dbr(self, path, data, first=False):
filepath = os.path.join(alter_dbr_dir, self.sourcename[:-4], path)
if first:
data.to_csv(filepath, mode='w', encoding='UTF-8', index=False)
else:
data.to_csv(
filepath,
mode='a',
index=False,
encoding='UTF-8',
header=0)
@staticmethod
def get_month_date(date):
start = datetime.datetime.strptime(date, "%Y-%m")
month = start.month
year = start.year
if month == 12:
end = datetime.datetime(year + 1, 1, 1) - \
datetime.timedelta(seconds=1)
else:
end = datetime.datetime(year, month + 1, 1) - \
datetime.timedelta(seconds=1)
start_dt = start.strftime("%Y-%m-%d %H:%M:%S")
end_dt = end.strftime("%Y-%m-%d %H:%M:%S")
return start_dt, end_dt
def create_new_csv(self):
for path in self.path:
if path in os.listdir(merge_file_dir):
self.get_dbr_data(os.path.join(merge_file_dir, path), path)
else:
print('请合并文件')
# self.get_dbr_data(os.path.join(data_dir, path), path)
def save_data_to_dbr(self, insert_data):
partname = os.path.join(
alter_dbr_dir, self.sourcename[:-4], self.partname)
filename = os.path.join(
alter_dbr_dir, self.sourcename[:-4], self.filename)
with open(filename, 'a', encoding='utf-8') as fw, open(partname, 'a', encoding='utf-8') as pw:
dbr_dict = {v: int(k) for k, v in enumerate(list(dbr_index))}
for i in range(len(list(insert_data.values())[0])):
empty_data = ["" for _ in range(len(dbr_index))]
for k, v in insert_data.items():
empty_data[dbr_dict[k]] = v[i]
fw.write(','.join('"{}"'.format(x) for x in empty_data) + '\n')
pw.write(','.join('"{}"'.format(x) for x in empty_data) + '\n')
def init_part_file(self):
partname = os.path.join(
alter_dbr_dir, self.sourcename[:-4], self.partname)
with open(partname, 'w', encoding='utf-8') as f:
f.write(','.join('"{}"'.format(x) for x in list(dbr_index)) + '\n')
def add_round_to_dbr(self):
payer = self.name.split('-')[0]
if payer == '563646727715':
round_fill_data = self.get_round_fill_data(self.name)
round_data = round_fill_data.to_dict(orient='list')
self.save_data_to_dbr(round_data)
def save_mapping_date_to_csv(self):
filename = os.path.join(
alter_dbr_dir, self.sourcename[:-4], '{}_mapping.csv'.format(self.payer))
with open(filename, 'w', encoding='utf-8') as w:
w.write(','.join('"{}"'.format(x) for x in ['mapping', 'LinkedAccountId', 'UnBlendedCost']) + '\n')
for key, value in self.finally_dict.items():
mapping = ''
for k, v in daimler_map.items():
if key in v:
mapping = k
UnBlendedCost = '{:.6f}'.format((value * 1.06) +
(value / self.unblendedcost * self.es_total) +
(value / self.unblendedcost * self.es_total * 0.06) +
(value / self.unblendedcost * self.edp_total) +
(value / self.unblendedcost * self.edp_total * 0.06))
w.write(','.join('"{}"'.format(x) for x in [mapping, key, UnBlendedCost]) + '\n')
def save_mapping(self):
path = '/server/billing/daimler/bill_checklist/Daimler22-10月账单核对表.xlsx'
wb = openpyxl.load_workbook(path)
table = wb['Daimler账单核对表']
for row in table.iter_rows():
if str(row[4].value).isdigit():
_total = self.finally_dict.get(str(row[4].value), None)
if _total:
row[5].value = ((_total * 1.06) +
(_total / self.unblendedcost * self.es_total) +
(_total / self.unblendedcost * self.es_total * 0.06) +
(_total / self.unblendedcost * self.edp_total) +
(_total / self.unblendedcost * self.edp_total * 0.06))
wb.save('/server/billing/daimler/bill_checklist/Daimler22-10月账单核对表.xlsx')
print('mapping数据处理完成')
def daimler_del_tags_to_dbr():
# 修改前目录
filepath = '/server/billing/daimler/merge_data/563646727715-aws-billing-detailed-line-items-with-resources-and-tags-ACTS-2022-09.csv'
# 修改后目录
alterpath = '/server/billing/daimler/merge_data/563646727715-aws-billing-detailed-line-items-with-resources-and-tags-ACTS-2022-09-1.csv'
with open(filepath, 'r', encoding='utf-8') as r, open(alterpath, 'w', encoding='utf-8') as w:
num = 0
is_valid = False
columns_index = []
while True:
line = r.readline()
if not line:
break
data = line[1:-2].split('","')
if len(data) < len(dbr_index):
print('无效数据已忽略')
continue
elif not is_valid:
columns_index = [i for i in range(len(data)) if data[i] in dbr_index]
is_valid = True
target_data = []
for k, v in enumerate(data):
if k in columns_index:
target_data.append(v)
w.write('"' + '","'.join(target_data) + '"' + '\n')
num += 1
if num % 10000 == 0:
print(num)
print('{}数据裁剪完成'.format('563646727715'))
if __name__ == '__main__':
dbr_typle = (
# ('563347375094-aws-billing-detailed-line-items-with-resources-and-tags-ACTS-2022-10.csv',
# '563347375094-aws-billing-detailed-line-items-with-resources-and-tags-ACTS-Ningxia-2022-10.csv',),
# ('563646727715-aws-billing-detailed-line-items-with-resources-and-tags-ACTS-2022-10.csv',
# '563646727715-aws-billing-detailed-line-items-with-resources-and-tags-ACTS-Ningxia-2022-10.csv',),
# ('565095721352-aws-billing-detailed-line-items-with-resources-and-tags-ACTS-2022-10.csv',
# '565095721352-aws-billing-detailed-line-items-with-resources-and-tags-ACTS-Ningxia-2022-10.csv',),
# ('565359735310-aws-billing-detailed-line-items-with-resources-and-tags-ACTS-2022-10.csv',
# '565359735310-aws-billing-detailed-line-items-with-resources-and-tags-ACTS-Ningxia-2022-10.csv',),
# ('010265887827-aws-billing-detailed-line-items-with-resources-and-tags-ACTS-2022-10.csv',
# '010265887827-aws-billing-detailed-line-items-with-resources-and-tags-ACTS-Ningxia-2022-10.csv',),
('011562250191-aws-billing-detailed-line-items-with-resources-and-tags-ACTS-2022-10.csv',
'011562250191-aws-billing-detailed-line-items-with-resources-and-tags-ACTS-Ningxia-2022-10.csv',),
)
# 1.先合并文件
#merge_file(dbr_typle),
for i in dbr_typle:
CustomCsvByHour(i[0]).create_new_csv()
#daimler_del_tags_to_dbr()