使用pandas,提取pcap文件得到的dataframe后,切分为前向流与后向流
import pandas as pd
def split_forward_backward_flow(dataframe):
# addr_ip:源IP
# dst_ip:目的IP
addr_diff = dataframe['addr_ip'].value_counts().index
for addr_ip in addr_diff:
addr_df = dataframe[dataframe['addr_ip'] == addr_ip]
diff_dst_index = addr_df['dst_ip'].value_counts().index
for dst_ip in diff_dst_index:
# addr_ip dst_ip
forward_se = dataframe.loc[dataframe['addr_ip'] == addr_ip, 'dst_ip'] == dst_ip
forward_df = dataframe.loc[forward_se[forward_se == True].index]
forward_df['state'] = 'forward'
backward_se = dataframe.loc[dataframe['addr_ip'] == dst_ip, 'dst_ip'] == addr_ip
backward_df = dataframe.loc[backward_se[backward_se == True].index]
backward_df['state'] = 'backward'
# 下面可以保存一下这个flow
# feature_df = pd.concat([forward_df, backward_df])
# feature_df.to_csv("pcap_flow/" + addr_ip + '_' + dst_ip + '.csv', mode='a', header=None)
# 也可以yield整个dataframe
# yield feature_df
yield forward_df,backward_df