透明加密
-
透明:对使用者来说是无感知的
文件在硬盘上是密文,在内存中是明文。一旦离开使用环境,由于应用程序无法得到自动解密的服务而无法打开,从而起来保护文件内容的效果
当使用者在打开或编辑指定文件时,系统将自动对未加密的文件进行加密,对已加密的文件自动解密
加密等级
加密等级 | 优缺点 |
---|---|
客户端加密,服务端加密 | 优点:加密粒度小,加密数据量可控。缺点:缓存级加密,性能较差,索引无法使用 |
集簇级加密,对整个集簇进行加密,初始化时确定集簇是否加密 | 优点:架构简单,使用成本低,操作系统缓存级加密(数据缓存刷入、读取磁盘时加解密)。缺点:加密细粒度大,所有集簇内对象都会加密 |
表空间级加密,数据库级加密,表级加密 | 优点:加密细粒度降低缺点:需要加密的对象较多时,使用成本较高 |
加密算法
流式加密:密钥长度与明文数据长度一致.
公钥加密:降低了密钥管理成本,加密性能差
分组加密:目前主流的加密算法,性能最优,应用最广。
国际公认的分组加密算法是AES
AES拥有5种加密模式:
-
ECB mode: Electronic Code Book mode,电子密码本模式
- 将明文分组加密之后的结果将直接成为密文分组
- 明文中的重复排列会反映在密文中,通过删除、替换密文分组可以对明文进行操作
-
CFB mode: Cipher FeedBack mode,密文反馈模式
- 前一个密文分组会被送回到密码算法的输入端
- 加密不支持并行计算
-
OFB mode: Output FeedBack mode,输出反馈模式
- 密码算法的输出会反馈到密码算法的输入中
- 不支持并行计算
-
CBC mode: Cipher Block Chaining mode,密码分组链表模式
- 首先将明文分组与前一个密文分组进行XOR运算,然后再进行加密
- 加密不支持并行计算
-
CTR mode: Counter mode,计数器模式
- 通过将计数器加密的到的比特序列,与明文分组进行XOR
- 支持并行计算
PG启动
postgresmain 启动时会验证是否开启encryption,读取postgres.conf中的encryption_key_command 配置并运行。得到encryption_key后分配空间,验证密码后启动databse
setup_encryption
void
setup_encryption(void)
{
#ifdef USE_ENCRYPTION
/*
* Setup OpenSSL.
*
* None of these functions should return a value or raise error.
*/
......
/*分配了四种context
init_encryption_context(EVP_CIPHER_CTX **ctx_p, bool encrypt,bool buffile);
*/
init_encryption_context(&ctx_encrypt, true, false);
init_encryption_context(&ctx_decrypt, false, false);
init_encryption_context(&ctx_encrypt_buffile, true, true);
init_encryption_context(&ctx_decrypt_buffile, false, true);
.......
encrypt_buf_xlog = (char *) MemoryContextAlloc(TopMemoryContext,
ENCRYPT_BUF_XLOG_SIZE);
//缓冲大小是8个页大小 :#define ENCRYPT_BUF_XLOG_SIZE (XLOG_ENCRYPT_BUF_PAGES * XLOG_BLCKSZ)
.......
}
init_encryption_context
static void
init_encryption_context(EVP_CIPHER_CTX **ctx_p, bool encrypt, bool buffile)
{
EVP_CIPHER_CTX *ctx;
const EVP_CIPHER *cipher;
cipher = !buffile ? EVP_aes_128_ctr() : EVP_aes_128_cbc();
if ((*ctx_p = EVP_CIPHER_CTX_new()) == NULL)
evp_error();
ctx = *ctx_p;
if (encrypt)
{
if (EVP_EncryptInit_ex(ctx, cipher, NULL, NULL, NULL) != 1)
evp_error();
}
else
{
if (EVP_DecryptInit_ex(ctx, cipher, NULL, NULL, NULL) != 1)
evp_error();
}
/* CTR mode is effectively a stream cipher. */
Assert((!buffile && EVP_CIPHER_CTX_block_size(ctx) == 1) ||
(buffile && EVP_CIPHER_CTX_block_size(ctx) == 16));
EVP_CIPHER_CTX_set_padding(ctx, 0);
Assert(EVP_CIPHER_CTX_iv_length(ctx) == TWEAK_SIZE);
Assert(EVP_CIPHER_CTX_key_length(ctx) == ENCRYPTION_KEY_LENGTH);
}
wal的读取与写入
XLogWrite
static void
XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
{
bool ispartialpage;
bool last_iteration;
bool finishing_seg;
bool use_existent;
int curridx;
int npages;
int startidx;
uint32 startoffset;
........
//第一部分 写入xlog
LogwrtResult = XLogCtl->LogwrtResult; //获取本地已落盘的XLOG LSN
//初始化变量 npages用于记录需要落盘的页面数量;startidx表示第一个需要落盘的页面的下标;startoffset表示页面的起始写入偏移
npages = 0;
startidx = 0;
startoffset = 0;
//XLogRecPtrToBufIdx将LogwrtResult.Write转换为buffer page的下标,
curridx = XLogRecPtrToBufIdx(LogwrtResult.Write);
//当已落盘Write的LSN小于请求Write的LSN时
while (LogwrtResult.Write < WriteRqst.Write)
{
//为了buffer page对应的endpoint
XLogRecPtr EndPtr = XLogCtl->xlblocks[curridx];
//已落盘Write的LSN不应该大于等于该buffer page的endpoint
if (LogwrtResult.Write >= EndPtr)
elog(PANIC, "xlog write request %X/%X is past end of log %X/%X",
(uint32) (LogwrtResult.Write >> 32),
(uint32) LogwrtResult.Write,
(uint32) (EndPtr >> 32), (uint32) EndPtr);
/*本地缓存的LogwrtResult.Write修改为当前log buffer page的结束位置*/
LogwrtResult.Write = EndPtr;
/*如果页是被填满的,这两个值应该相等*/
ispartialpage = WriteRqst.Write < LogwrtResult.Write;
//判断是否需要开到新的日志文件段,并将当前页添加到待写入的页集
if (!XLByteInPrevSeg(LogwrtResult.Write, openLogSegNo,
wal_segment_size))
{
Assert(npages == 0);
if (openLogFile >= 0)
XLogFileClose();
XLByteToPrevSeg(LogwrtResult.Write, openLogSegNo,
wal_segment_size);
/* create/use new log file */
use_existent = true;
openLogFile = XLogFileInit(openLogSegNo, &use_existent, true);
ReserveExternalFD();
}
/* Make sure we have the current logfile open */
if (openLogFile < 0)
{
XLByteToPrevSeg(LogwrtResult.Write, openLogSegNo,
wal_segment_size);
openLogFile = XLogFileOpen(openLogSegNo);
ReserveExternalFD();
}
/* Add current page to the set of pending pages-to-dump */
if (npages == 0)
{
/* first of group */
startidx = curridx;
//LogwrtResult.Write - XLOG_BLCKSZ 前一个页面起始位置的LSN
startoffset = XLogSegmentOffset(LogwrtResult.Write - XLOG_BLCKSZ,
wal_segment_size);
}
npages++;
//判断当前是否为最后一次循环或最后一个页
last_iteration = WriteRqst.Write <= LogwrtResult.Write;
//判断当前是否为最后一个segment
finishing_seg = !ispartialpage &&
(startoffset + npages * XLOG_BLCKSZ) >= wal_segment_size;
if (last_iteration ||
curridx == XLogCtl->XLogCacheBlck ||
finishing_seg)
{
//写入系统os
char *from;
from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ;
if (data_encrypted)
{
int i,
nencrypted;
char *to;
uint32 encr_offset;
nencrypted = 0;
to = encrypt_buf_xlog;
encr_offset = startoffset;
for (i = 1; i <= npages; i++)
{
char tweak[TWEAK_SIZE];
Size nbytes;
//生成Tweak
XLogEncryptionTweak(tweak, ThisTimeLineID, openLogSegNo, encr_offset);
if (i == npages && ispartialpage)
nbytes = WriteRqst.Write % XLOG_BLCKSZ;
else
nbytes = XLOG_BLCKSZ;
//加密block
encrypt_block(from,
to,
nbytes,
tweak,
InvalidXLogRecPtr,
InvalidBlockNumber,
EDK_REL_WAL);
nencrypted++;
from += XLOG_BLCKSZ;
to += XLOG_BLCKSZ;
encr_offset += XLOG_BLCKSZ;
//如果满8页了,执行XLogWritePages,更新startoffset
if (nencrypted >= XLOG_ENCRYPT_BUF_PAGES || i >= npages)
{
startoffset += XLogWritePages(encrypt_buf_xlog,
nencrypted,
startoffset);
/* Prepare for the next round of page encryptions. */
nencrypted = 0;
to = encrypt_buf_xlog;
encr_offset = startoffset;
}
}
}
else
startoffset += XLogWritePages(from, npages, startoffset);
npages = 0;
if (finishing_seg)
{
issue_xlog_fsync(openLogFile, openLogSegNo);
WalSndWakeupRequest();
LogwrtResult.Flush = LogwrtResult.Write;
if (XLogArchivingActive())
XLogArchiveNotifySeg(openLogSegNo);
XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL);
XLogCtl->lastSegSwitchLSN = LogwrtResult.Flush;
if (IsUnderPostmaster && XLogCheckpointNeeded(openLogSegNo))
{
(void) GetRedoRecPtr();
if (XLogCheckpointNeeded(openLogSegNo))
RequestCheckpoint(CHECKPOINT_CAUSE_XLOG);
}
}
}
//最后一个,如果落盘则表示WriteRqst.Write之前的所有XLOG都已经落盘,则结束循环
if (ispartialpage)
{
/* Only asked to write a partial page */
LogwrtResult.Write = WriteRqst.Write;
break;
}
curridx = NextBufIdx(curridx);
/* If flexible, break out of loop as soon as we wrote something */
if (flexible && npages == 0)
break;
}
Assert(npages == 0);
//第二部分:落盘xlog
if (LogwrtResult.Flush < WriteRqst.Flush &&
LogwrtResult.Flush < LogwrtResult.Write)
{
if (sync_method != SYNC_METHOD_OPEN &&
sync_method != SYNC_METHOD_OPEN_DSYNC)
{
if (openLogFile >= 0 &&
!XLByteInPrevSeg(LogwrtResult.Write, openLogSegNo,
wal_segment_size))
XLogFileClose();
if (openLogFile < 0)
{
XLByteToPrevSeg(LogwrtResult.Write, openLogSegNo,
wal_segment_size);
openLogFile = XLogFileOpen(openLogSegNo);
ReserveExternalFD();
}
issue_xlog_fsync(openLogFile, openLogSegNo);
}
/* signal that we need to wakeup walsenders later */
WalSndWakeupRequest();
LogwrtResult.Flush = LogwrtResult.Write;
}
/*
第三部分:更新全局状态LogwrtRqst
*/
{
SpinLockAcquire(&XLogCtl->info_lck);
XLogCtl->LogwrtResult = LogwrtResult;
if (XLogCtl->LogwrtRqst.Write < LogwrtResult.Write)
XLogCtl->LogwrtRqst.Write = LogwrtResult.Write;
if (XLogCtl->LogwrtRqst.Flush < LogwrtResult.Flush)
XLogCtl->LogwrtRqst.Flush = LogwrtResult.Flush;
SpinLockRelease(&XLogCtl->info_lck);
}
}
startupxlog()
XLogReaderAllocate()
XLogPageRead()
XLogPageRead
Assert(targetSegNo == readSegNo);
Assert(targetPageOff == readOff);
Assert(reqLen <= readLen);
if (data_encrypted)
{
char tweak[TWEAK_SIZE];
XLogEncryptionTweak(tweak, curFileTLI, readSegNo, readOff);
decrypt_block(readBuf,
readBuf,
XLOG_BLCKSZ,
tweak,
InvalidBlockNumber,
EDK_PERMANENT);
}
加密与解密
XLogEncryptionTweak的作用是生成iv(计数器)
void
XLogEncryptionTweak(char *tweak, TimeLineID timeline, XLogSegNo segment,
uint32 offset)
{
memset(tweak, 0, TWEAK_SIZE);
memcpy(tweak, &timeline, sizeof(timeline));
tweak += sizeof(timeline);
memcpy(tweak, &segment, sizeof(XLogSegNo));
tweak += sizeof(XLogSegNo);
memcpy(tweak, &offset, sizeof(offset));
}
具体的解密和加密过程由encrypt_block/decrypt_block实现
//MarkBufferDirtyHint()可能更新input
void
encrypt_block(const char *input, char *output, Size size, char *tweak,
XLogRecPtr lsn, BlockNumber block,
EncryptedDataKind data_kind)
{
#ifdef USE_ENCRYPTION
EVP_CIPHER_CTX *ctx;
int out_size;
char tweak_loc[TWEAK_SIZE];
Assert(data_encrypted);
//如果tweak == NULL 默认读取的数据是以PageHeaderData开头
if (tweak == NULL)
{ ......
PageXLogRecPtrSet(rec_ptr, lsn);
memcpy(c, &rec_ptr, sizeof(PageXLogRecPtr));
c += sizeof(PageXLogRecPtr);
memcpy(c, &block, sizeof(BlockNumber));
//防止buffile 生成的fake lsn重复
if (data_kind == EDK_TEMP)
{
c += sizeof(BlockNumber);
*c |= 0x1 << 7;
}
tweak = tweak_loc;
if (input != output)
PageSetLSN(output, lsn);
/* 不加密 LSN and checksum. */
unencr_size = offsetof(PageHeaderData, pd_flags);
input += unencr_size;
output += unencr_size;
size -= unencr_size;
}
/*
* 不加密空页
*/
else if (IsAllZero(input, size))
{
if (input != output)
memset(output, 0, size);
return;
}
ctx = data_kind != EDK_BUFFILE ? ctx_encrypt : ctx_encrypt_buffile;
//初始化
if (EVP_EncryptInit_ex(ctx, NULL, NULL, encryption_key,
(unsigned char *) tweak) != 1)
evp_error();
//更新到output
if (EVP_EncryptUpdate(ctx, (unsigned char *) output,
&out_size, (unsigned char *) input, size) != 1)
evp_error();
if (out_size != size)
{
#ifndef FRONTEND
ereport(ERROR, (errmsg("Some data left unencrypted")));
#else
/* Front-end shouldn't actually get here, but be careful. */
fprintf(stderr, "Some data left unencrypted\n");
exit(EXIT_FAILURE);
#endif /* FRONTEND */
}
#else /* !USE_ENCRYPTION */
/* data_encrypted should not be set */
Assert(false);
#endif /* USE_ENCRYPTION */
}
decrypt_block 的实现原理和encrypt_block相同,但是不需要lsn
void
decrypt_block(const char *input, char *output, Size size, char *tweak,
BlockNumber block, EncryptedDataKind data_kind)
{
#ifdef USE_ENCRYPTION
EVP_CIPHER_CTX *ctx;
int out_size;
char tweak_loc[TWEAK_SIZE];
Assert(data_encrypted);
if (tweak == NULL)
{
size_t lsn_size, unencr_size;
char *c = tweak_loc;
Assert(block != InvalidBlockNumber);
//input 不会变 所以可以直接读input
if (XLogRecPtrIsInvalid(PageGetLSN(input)))
{
if (input != output)
memcpy(output, input, size);
return;
}
lsn_size = sizeof(PageXLogRecPtr);
memset(c, 0, TWEAK_SIZE);
memcpy(c, input, lsn_size);
c += lsn_size;
memcpy(c, &block, sizeof(BlockNumber));
if (data_kind == EDK_TEMP)
{
c += sizeof(BlockNumber);
*c |= 0x1 << 7;
}
tweak = tweak_loc;
if (input != output)
memcpy(output, input, lsn_size);
/* Do not encrypt the LSN and checksum. */
unencr_size = offsetof(PageHeaderData, pd_flags);
input += unencr_size;
output += unencr_size;
size -= unencr_size;
}
else if (IsAllZero(input, size))
{
if (input != output)
memset(output, 0, size);
return;
}
ctx = data_kind != EDK_BUFFILE ? ctx_encrypt : ctx_encrypt_buffile;
/* The remaining initialization. */
if (EVP_DecryptInit_ex(ctx, NULL, NULL, encryption_key,
(unsigned char *) tweak) != 1)
evp_error();
/* Do the actual deryption. */
if (EVP_DecryptUpdate(ctx, (unsigned char *) output,
&out_size, (unsigned char *) input, size) != 1)
evp_error();
if (out_size != size)
{
#ifndef FRONTEND
ereport(ERROR, (errmsg("Some data left undecrypted")));
#else
/* Front-end shouldn't actually get here, but be careful. */
fprintf(stderr, "Some data left undecrypted\n");
exit(EXIT_FAILURE);
#endif /* FRONTEND */
}
#else /* !USE_ENCRYPTION */
/* data_encrypted should not be set */
Assert(false);
#endif /* USE_ENCRYPTION */
}