searchusermenu
  • 发布文章
  • 消息中心
点赞
收藏
评论
分享
原创

透明加密介绍

2024-10-10 02:06:57
6
0

透明加密

  • 透明:对使用者来说是无感知的

    文件在硬盘上是密文,在内存中是明文。一旦离开使用环境,由于应用程序无法得到自动解密的服务而无法打开,从而起来保护文件内容的效果

当使用者在打开或编辑指定文件时,系统将自动对未加密的文件进行加密,对已加密的文件自动解密

加密等级

加密等级 优缺点
客户端加密,服务端加密 优点:加密粒度小,加密数据量可控。缺点:缓存级加密,性能较差,索引无法使用
集簇级加密,对整个集簇进行加密,初始化时确定集簇是否加密 优点:架构简单,使用成本低,操作系统缓存级加密(数据缓存刷入、读取磁盘时加解密)。缺点:加密细粒度大,所有集簇内对象都会加密
表空间级加密,数据库级加密,表级加密 优点:加密细粒度降低缺点:需要加密的对象较多时,使用成本较高

加密算法

流式加密:密钥长度与明文数据长度一致.
公钥加密:降低了密钥管理成本,加密性能差
分组加密:目前主流的加密算法,性能最优,应用最广。

国际公认的分组加密算法是AES

AES拥有5种加密模式:

  • ECB mode: Electronic Code Book mode,电子密码本模式

    • 将明文分组加密之后的结果将直接成为密文分组
    • 明文中的重复排列会反映在密文中,通过删除、替换密文分组可以对明文进行操作
  • CFB mode: Cipher FeedBack mode,密文反馈模式

    • 前一个密文分组会被送回到密码算法的输入端
    • 加密不支持并行计算
  • OFB mode: Output FeedBack mode,输出反馈模式

    • 密码算法的输出会反馈到密码算法的输入中
    • 不支持并行计算
  • CBC mode: Cipher Block Chaining mode,密码分组链表模式

    • 首先将明文分组与前一个密文分组进行XOR运算,然后再进行加密
    • 加密不支持并行计算
  • CTR mode: Counter mode,计数器模式

    • 通过将计数器加密的到的比特序列,与明文分组进行XOR
    • 支持并行计算

PG启动

企业微信截图_17276596349472.png

postgresmain 启动时会验证是否开启encryption,读取postgres.conf中的encryption_key_command 配置并运行。得到encryption_key后分配空间,验证密码后启动databse

setup_encryption

void
setup_encryption(void)
{
#ifdef USE_ENCRYPTION
    /*
	 * Setup OpenSSL.
	 *
	 * None of these functions should return a value or raise error.
	 */
	 ...... 
         
     /*分配了四种context 
     init_encryption_context(EVP_CIPHER_CTX **ctx_p, bool encrypt,bool buffile);
     */
	init_encryption_context(&ctx_encrypt, true, false);
	init_encryption_context(&ctx_decrypt, false, false);
	init_encryption_context(&ctx_encrypt_buffile, true, true);
	init_encryption_context(&ctx_decrypt_buffile, false, true);

    .......
	encrypt_buf_xlog = (char *) MemoryContextAlloc(TopMemoryContext,
												   ENCRYPT_BUF_XLOG_SIZE);
    //缓冲大小是8个页大小 :#define ENCRYPT_BUF_XLOG_SIZE	(XLOG_ENCRYPT_BUF_PAGES * XLOG_BLCKSZ)
    .......
}

init_encryption_context

static void
init_encryption_context(EVP_CIPHER_CTX **ctx_p, bool encrypt, bool buffile)
{
	EVP_CIPHER_CTX *ctx;
	const EVP_CIPHER *cipher;

	cipher = !buffile ? EVP_aes_128_ctr() : EVP_aes_128_cbc();

	if ((*ctx_p = EVP_CIPHER_CTX_new()) == NULL)
		evp_error();
	ctx = *ctx_p;

	if (encrypt)
	{
		if (EVP_EncryptInit_ex(ctx, cipher, NULL, NULL, NULL) != 1)
			evp_error();
	}
	else
	{
		if (EVP_DecryptInit_ex(ctx, cipher, NULL, NULL, NULL) != 1)
			evp_error();
	}

	/* CTR mode is effectively a stream cipher. */
	Assert((!buffile && EVP_CIPHER_CTX_block_size(ctx) == 1) ||
		   (buffile && EVP_CIPHER_CTX_block_size(ctx) == 16));
    
	EVP_CIPHER_CTX_set_padding(ctx, 0);

	Assert(EVP_CIPHER_CTX_iv_length(ctx) == TWEAK_SIZE);
	Assert(EVP_CIPHER_CTX_key_length(ctx) == ENCRYPTION_KEY_LENGTH);
}

wal的读取与写入

XLogWrite

企业微信截图_17276597021555.png

企业微信截图_17276597629065.png

static void
XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
{
	bool		ispartialpage;
	bool		last_iteration;
	bool		finishing_seg;
	bool		use_existent;
	int			curridx;
	int			npages;
	int			startidx;
	uint32		startoffset;
    ........
    //第一部分 写入xlog
	LogwrtResult = XLogCtl->LogwrtResult; //获取本地已落盘的XLOG LSN
	
	//初始化变量 npages用于记录需要落盘的页面数量;startidx表示第一个需要落盘的页面的下标;startoffset表示页面的起始写入偏移
	npages = 0;
	startidx = 0;
	startoffset = 0;

    //XLogRecPtrToBufIdx将LogwrtResult.Write转换为buffer page的下标,
	curridx = XLogRecPtrToBufIdx(LogwrtResult.Write);
	//当已落盘Write的LSN小于请求Write的LSN时
	while (LogwrtResult.Write < WriteRqst.Write)
	{
		//为了buffer page对应的endpoint
		XLogRecPtr	EndPtr = XLogCtl->xlblocks[curridx];
        //已落盘Write的LSN不应该大于等于该buffer page的endpoint
		if (LogwrtResult.Write >= EndPtr)
			elog(PANIC, "xlog write request %X/%X is past end of log %X/%X",
				 (uint32) (LogwrtResult.Write >> 32),
				 (uint32) LogwrtResult.Write,
				 (uint32) (EndPtr >> 32), (uint32) EndPtr);

		/*本地缓存的LogwrtResult.Write修改为当前log buffer page的结束位置*/
		LogwrtResult.Write = EndPtr;
		/*如果页是被填满的,这两个值应该相等*/
		ispartialpage = WriteRqst.Write < LogwrtResult.Write;
        //判断是否需要开到新的日志文件段,并将当前页添加到待写入的页集
		if (!XLByteInPrevSeg(LogwrtResult.Write, openLogSegNo,
							 wal_segment_size))
		{
			Assert(npages == 0);
			if (openLogFile >= 0)
				XLogFileClose();
			XLByteToPrevSeg(LogwrtResult.Write, openLogSegNo,
							wal_segment_size);

			/* create/use new log file */
			use_existent = true;
			openLogFile = XLogFileInit(openLogSegNo, &use_existent, true);
			ReserveExternalFD();
		}

		/* Make sure we have the current logfile open */
		if (openLogFile < 0)
		{
			XLByteToPrevSeg(LogwrtResult.Write, openLogSegNo,
							wal_segment_size);
			openLogFile = XLogFileOpen(openLogSegNo);
			ReserveExternalFD();
		}

		/* Add current page to the set of pending pages-to-dump */
		if (npages == 0)
		{
			/* first of group */
			startidx = curridx;
            //LogwrtResult.Write - XLOG_BLCKSZ 前一个页面起始位置的LSN
			startoffset = XLogSegmentOffset(LogwrtResult.Write - XLOG_BLCKSZ,
											wal_segment_size);
		}
		npages++;

		//判断当前是否为最后一次循环或最后一个页
		last_iteration = WriteRqst.Write <= LogwrtResult.Write;
		//判断当前是否为最后一个segment
		finishing_seg = !ispartialpage &&
			(startoffset + npages * XLOG_BLCKSZ) >= wal_segment_size;

		if (last_iteration ||
			curridx == XLogCtl->XLogCacheBlck ||
			finishing_seg)
		{
            //写入系统os
			char	   *from;

			from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ;
			if (data_encrypted)
			{
				int			i,
							nencrypted;
				char	   *to;
				uint32		encr_offset;

				
				nencrypted = 0;
				to = encrypt_buf_xlog;
				encr_offset = startoffset;
				for (i = 1; i <= npages; i++)
				{
					char		tweak[TWEAK_SIZE];
					Size		nbytes;
                    //生成Tweak
					XLogEncryptionTweak(tweak, ThisTimeLineID, openLogSegNo, encr_offset);

					if (i == npages && ispartialpage)
						nbytes = WriteRqst.Write % XLOG_BLCKSZ;
					else
						nbytes = XLOG_BLCKSZ;
					//加密block
					encrypt_block(from,
								  to,
								  nbytes,
								  tweak,
								  InvalidXLogRecPtr,
								  InvalidBlockNumber,
								  EDK_REL_WAL);
					nencrypted++;
					from += XLOG_BLCKSZ;
					to += XLOG_BLCKSZ;
					encr_offset += XLOG_BLCKSZ;

					//如果满8页了,执行XLogWritePages,更新startoffset
					if (nencrypted >= XLOG_ENCRYPT_BUF_PAGES || i >= npages)
					{
						startoffset += XLogWritePages(encrypt_buf_xlog,
													  nencrypted,
													  startoffset);

						/* Prepare for the next round of page encryptions. */
						nencrypted = 0;
						to = encrypt_buf_xlog;
						encr_offset = startoffset;
					}
				}
			}
			else
				startoffset += XLogWritePages(from, npages, startoffset);

			npages = 0;

			if (finishing_seg)
			{
				issue_xlog_fsync(openLogFile, openLogSegNo);
				WalSndWakeupRequest();

				LogwrtResult.Flush = LogwrtResult.Write;	
				if (XLogArchivingActive())
					XLogArchiveNotifySeg(openLogSegNo);

				XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL);
				XLogCtl->lastSegSwitchLSN = LogwrtResult.Flush;
				if (IsUnderPostmaster && XLogCheckpointNeeded(openLogSegNo))
				{
					(void) GetRedoRecPtr();
					if (XLogCheckpointNeeded(openLogSegNo))
						RequestCheckpoint(CHECKPOINT_CAUSE_XLOG);
				}
			}
		}
        //最后一个,如果落盘则表示WriteRqst.Write之前的所有XLOG都已经落盘,则结束循环
		if (ispartialpage)
		{
			/* Only asked to write a partial page */
			LogwrtResult.Write = WriteRqst.Write;
			break;
		}
		curridx = NextBufIdx(curridx);

		/* If flexible, break out of loop as soon as we wrote something */
		if (flexible && npages == 0)
			break;
	}

	Assert(npages == 0);
    //第二部分:落盘xlog
	if (LogwrtResult.Flush < WriteRqst.Flush &&
		LogwrtResult.Flush < LogwrtResult.Write)

	{
		if (sync_method != SYNC_METHOD_OPEN &&
			sync_method != SYNC_METHOD_OPEN_DSYNC)
		{
			if (openLogFile >= 0 &&
				!XLByteInPrevSeg(LogwrtResult.Write, openLogSegNo,
								 wal_segment_size))
				XLogFileClose();
			if (openLogFile < 0)
			{
				XLByteToPrevSeg(LogwrtResult.Write, openLogSegNo,
								wal_segment_size);
				openLogFile = XLogFileOpen(openLogSegNo);
				ReserveExternalFD();
			}

			issue_xlog_fsync(openLogFile, openLogSegNo);
		}

		/* signal that we need to wakeup walsenders later */
		WalSndWakeupRequest();

		LogwrtResult.Flush = LogwrtResult.Write;
	}

	/*
	 第三部分:更新全局状态LogwrtRqst
	 */
	{
		SpinLockAcquire(&XLogCtl->info_lck);
		XLogCtl->LogwrtResult = LogwrtResult;
		if (XLogCtl->LogwrtRqst.Write < LogwrtResult.Write)
			XLogCtl->LogwrtRqst.Write = LogwrtResult.Write;
		if (XLogCtl->LogwrtRqst.Flush < LogwrtResult.Flush)
			XLogCtl->LogwrtRqst.Flush = LogwrtResult.Flush;
		SpinLockRelease(&XLogCtl->info_lck);
	}
}

startupxlog()
XLogReaderAllocate()
XLogPageRead()

XLogPageRead

Assert(targetSegNo == readSegNo);
	Assert(targetPageOff == readOff);
	Assert(reqLen <= readLen);
    if (data_encrypted)
    {
        char		tweak[TWEAK_SIZE];
        XLogEncryptionTweak(tweak, curFileTLI, readSegNo, readOff);
        decrypt_block(readBuf,
                      readBuf,
                      XLOG_BLCKSZ,
                      tweak,
                      InvalidBlockNumber,
                      EDK_PERMANENT);
    }

加密与解密

XLogEncryptionTweak的作用是生成iv(计数器)

void
XLogEncryptionTweak(char *tweak, TimeLineID timeline, XLogSegNo segment,
                    uint32 offset)
{
    memset(tweak, 0, TWEAK_SIZE);
    memcpy(tweak, &timeline, sizeof(timeline));
    tweak += sizeof(timeline);
    memcpy(tweak, &segment, sizeof(XLogSegNo));
    tweak += sizeof(XLogSegNo);
    memcpy(tweak, &offset, sizeof(offset));
}

具体的解密和加密过程由encrypt_block/decrypt_block实现

//MarkBufferDirtyHint()可能更新input
void
encrypt_block(const char *input, char *output, Size size, char *tweak,
              XLogRecPtr lsn, BlockNumber block,
              EncryptedDataKind data_kind)
{
#ifdef USE_ENCRYPTION
    EVP_CIPHER_CTX *ctx;
	int			out_size;
	char	tweak_loc[TWEAK_SIZE];

	Assert(data_encrypted);
    //如果tweak == NULL 默认读取的数据是以PageHeaderData开头
	if (tweak == NULL)
	{   ......
		PageXLogRecPtrSet(rec_ptr, lsn);
		memcpy(c, &rec_ptr, sizeof(PageXLogRecPtr));
		c += sizeof(PageXLogRecPtr);
		memcpy(c, &block, sizeof(BlockNumber));
        //防止buffile 生成的fake lsn重复
		if (data_kind == EDK_TEMP)
		{
			c += sizeof(BlockNumber);
			*c |= 0x1 << 7;
		}

		tweak = tweak_loc;
		if (input != output)
			PageSetLSN(output, lsn);

		/* 不加密 LSN and checksum. */
		unencr_size = offsetof(PageHeaderData, pd_flags);
		input += unencr_size;
		output += unencr_size;
		size -= unencr_size;
	}
	/*
	 * 不加密空页
	 */
	else if (IsAllZero(input, size))
	{
		if (input != output)
			memset(output, 0, size);
		return;
	}

	ctx = data_kind != EDK_BUFFILE ? ctx_encrypt : ctx_encrypt_buffile;

	//初始化
	if (EVP_EncryptInit_ex(ctx, NULL, NULL, encryption_key,
						   (unsigned char *) tweak) != 1)
		evp_error();

	//更新到output
	if (EVP_EncryptUpdate(ctx, (unsigned char *) output,
						  &out_size, (unsigned char *) input, size) != 1)
		evp_error();

	if (out_size != size)
	{
#ifndef FRONTEND
		ereport(ERROR, (errmsg("Some data left unencrypted")));
#else
		/* Front-end shouldn't actually get here, but be careful. */
		fprintf(stderr, "Some data left unencrypted\n");
		exit(EXIT_FAILURE);
#endif	/* FRONTEND */
	}
#else  /* !USE_ENCRYPTION */
    /* data_encrypted should not be set */
    Assert(false);
#endif							/* USE_ENCRYPTION */
}

decrypt_block 的实现原理和encrypt_block相同,但是不需要lsn

void
decrypt_block(const char *input, char *output, Size size, char *tweak,
              BlockNumber block, EncryptedDataKind data_kind)
{
#ifdef USE_ENCRYPTION
    EVP_CIPHER_CTX *ctx;
	int			out_size;
	char	tweak_loc[TWEAK_SIZE];

	Assert(data_encrypted);

	if (tweak == NULL)
	{
		size_t	lsn_size, unencr_size;
		char	*c = tweak_loc;

		Assert(block != InvalidBlockNumber);

		//input 不会变 所以可以直接读input
		if (XLogRecPtrIsInvalid(PageGetLSN(input)))
		{
			if (input != output)
				memcpy(output, input, size);
			return;
		}

		lsn_size = sizeof(PageXLogRecPtr);

		memset(c, 0, TWEAK_SIZE);
		memcpy(c, input, lsn_size);
		c += lsn_size;
		memcpy(c, &block, sizeof(BlockNumber));
		if (data_kind == EDK_TEMP)
		{
			c += sizeof(BlockNumber);
			*c |= 0x1 << 7;
		}

		tweak = tweak_loc;

		if (input != output)
			memcpy(output, input, lsn_size);

		/* Do not encrypt the LSN and checksum. */
		unencr_size = offsetof(PageHeaderData, pd_flags);
		input += unencr_size;
		output += unencr_size;
		size -= unencr_size;
	}
	else if (IsAllZero(input, size))
	{
		if (input != output)
			memset(output, 0, size);
		return;
	}

	ctx = data_kind != EDK_BUFFILE ? ctx_encrypt : ctx_encrypt_buffile;

	/* The remaining initialization. */
	if (EVP_DecryptInit_ex(ctx, NULL, NULL, encryption_key,
						   (unsigned char *) tweak) != 1)
		evp_error();

	/* Do the actual deryption. */
	if (EVP_DecryptUpdate(ctx, (unsigned char *) output,
						  &out_size, (unsigned char *) input, size) != 1)
		evp_error();

	if (out_size != size)
	{
#ifndef FRONTEND
		ereport(ERROR, (errmsg("Some data left undecrypted")));
#else
		/* Front-end shouldn't actually get here, but be careful. */
		fprintf(stderr, "Some data left undecrypted\n");
		exit(EXIT_FAILURE);
#endif	/* FRONTEND */
	}
#else  /* !USE_ENCRYPTION */
    /* data_encrypted should not be set */
    Assert(false);
#endif							/* USE_ENCRYPTION */
}
0条评论
0 / 1000
z****n
4文章数
1粉丝数
z****n
4 文章 | 1 粉丝
z****n
4文章数
1粉丝数
z****n
4 文章 | 1 粉丝
原创

透明加密介绍

2024-10-10 02:06:57
6
0

透明加密

  • 透明:对使用者来说是无感知的

    文件在硬盘上是密文,在内存中是明文。一旦离开使用环境,由于应用程序无法得到自动解密的服务而无法打开,从而起来保护文件内容的效果

当使用者在打开或编辑指定文件时,系统将自动对未加密的文件进行加密,对已加密的文件自动解密

加密等级

加密等级 优缺点
客户端加密,服务端加密 优点:加密粒度小,加密数据量可控。缺点:缓存级加密,性能较差,索引无法使用
集簇级加密,对整个集簇进行加密,初始化时确定集簇是否加密 优点:架构简单,使用成本低,操作系统缓存级加密(数据缓存刷入、读取磁盘时加解密)。缺点:加密细粒度大,所有集簇内对象都会加密
表空间级加密,数据库级加密,表级加密 优点:加密细粒度降低缺点:需要加密的对象较多时,使用成本较高

加密算法

流式加密:密钥长度与明文数据长度一致.
公钥加密:降低了密钥管理成本,加密性能差
分组加密:目前主流的加密算法,性能最优,应用最广。

国际公认的分组加密算法是AES

AES拥有5种加密模式:

  • ECB mode: Electronic Code Book mode,电子密码本模式

    • 将明文分组加密之后的结果将直接成为密文分组
    • 明文中的重复排列会反映在密文中,通过删除、替换密文分组可以对明文进行操作
  • CFB mode: Cipher FeedBack mode,密文反馈模式

    • 前一个密文分组会被送回到密码算法的输入端
    • 加密不支持并行计算
  • OFB mode: Output FeedBack mode,输出反馈模式

    • 密码算法的输出会反馈到密码算法的输入中
    • 不支持并行计算
  • CBC mode: Cipher Block Chaining mode,密码分组链表模式

    • 首先将明文分组与前一个密文分组进行XOR运算,然后再进行加密
    • 加密不支持并行计算
  • CTR mode: Counter mode,计数器模式

    • 通过将计数器加密的到的比特序列,与明文分组进行XOR
    • 支持并行计算

PG启动

企业微信截图_17276596349472.png

postgresmain 启动时会验证是否开启encryption,读取postgres.conf中的encryption_key_command 配置并运行。得到encryption_key后分配空间,验证密码后启动databse

setup_encryption

void
setup_encryption(void)
{
#ifdef USE_ENCRYPTION
    /*
	 * Setup OpenSSL.
	 *
	 * None of these functions should return a value or raise error.
	 */
	 ...... 
         
     /*分配了四种context 
     init_encryption_context(EVP_CIPHER_CTX **ctx_p, bool encrypt,bool buffile);
     */
	init_encryption_context(&ctx_encrypt, true, false);
	init_encryption_context(&ctx_decrypt, false, false);
	init_encryption_context(&ctx_encrypt_buffile, true, true);
	init_encryption_context(&ctx_decrypt_buffile, false, true);

    .......
	encrypt_buf_xlog = (char *) MemoryContextAlloc(TopMemoryContext,
												   ENCRYPT_BUF_XLOG_SIZE);
    //缓冲大小是8个页大小 :#define ENCRYPT_BUF_XLOG_SIZE	(XLOG_ENCRYPT_BUF_PAGES * XLOG_BLCKSZ)
    .......
}

init_encryption_context

static void
init_encryption_context(EVP_CIPHER_CTX **ctx_p, bool encrypt, bool buffile)
{
	EVP_CIPHER_CTX *ctx;
	const EVP_CIPHER *cipher;

	cipher = !buffile ? EVP_aes_128_ctr() : EVP_aes_128_cbc();

	if ((*ctx_p = EVP_CIPHER_CTX_new()) == NULL)
		evp_error();
	ctx = *ctx_p;

	if (encrypt)
	{
		if (EVP_EncryptInit_ex(ctx, cipher, NULL, NULL, NULL) != 1)
			evp_error();
	}
	else
	{
		if (EVP_DecryptInit_ex(ctx, cipher, NULL, NULL, NULL) != 1)
			evp_error();
	}

	/* CTR mode is effectively a stream cipher. */
	Assert((!buffile && EVP_CIPHER_CTX_block_size(ctx) == 1) ||
		   (buffile && EVP_CIPHER_CTX_block_size(ctx) == 16));
    
	EVP_CIPHER_CTX_set_padding(ctx, 0);

	Assert(EVP_CIPHER_CTX_iv_length(ctx) == TWEAK_SIZE);
	Assert(EVP_CIPHER_CTX_key_length(ctx) == ENCRYPTION_KEY_LENGTH);
}

wal的读取与写入

XLogWrite

企业微信截图_17276597021555.png

企业微信截图_17276597629065.png

static void
XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
{
	bool		ispartialpage;
	bool		last_iteration;
	bool		finishing_seg;
	bool		use_existent;
	int			curridx;
	int			npages;
	int			startidx;
	uint32		startoffset;
    ........
    //第一部分 写入xlog
	LogwrtResult = XLogCtl->LogwrtResult; //获取本地已落盘的XLOG LSN
	
	//初始化变量 npages用于记录需要落盘的页面数量;startidx表示第一个需要落盘的页面的下标;startoffset表示页面的起始写入偏移
	npages = 0;
	startidx = 0;
	startoffset = 0;

    //XLogRecPtrToBufIdx将LogwrtResult.Write转换为buffer page的下标,
	curridx = XLogRecPtrToBufIdx(LogwrtResult.Write);
	//当已落盘Write的LSN小于请求Write的LSN时
	while (LogwrtResult.Write < WriteRqst.Write)
	{
		//为了buffer page对应的endpoint
		XLogRecPtr	EndPtr = XLogCtl->xlblocks[curridx];
        //已落盘Write的LSN不应该大于等于该buffer page的endpoint
		if (LogwrtResult.Write >= EndPtr)
			elog(PANIC, "xlog write request %X/%X is past end of log %X/%X",
				 (uint32) (LogwrtResult.Write >> 32),
				 (uint32) LogwrtResult.Write,
				 (uint32) (EndPtr >> 32), (uint32) EndPtr);

		/*本地缓存的LogwrtResult.Write修改为当前log buffer page的结束位置*/
		LogwrtResult.Write = EndPtr;
		/*如果页是被填满的,这两个值应该相等*/
		ispartialpage = WriteRqst.Write < LogwrtResult.Write;
        //判断是否需要开到新的日志文件段,并将当前页添加到待写入的页集
		if (!XLByteInPrevSeg(LogwrtResult.Write, openLogSegNo,
							 wal_segment_size))
		{
			Assert(npages == 0);
			if (openLogFile >= 0)
				XLogFileClose();
			XLByteToPrevSeg(LogwrtResult.Write, openLogSegNo,
							wal_segment_size);

			/* create/use new log file */
			use_existent = true;
			openLogFile = XLogFileInit(openLogSegNo, &use_existent, true);
			ReserveExternalFD();
		}

		/* Make sure we have the current logfile open */
		if (openLogFile < 0)
		{
			XLByteToPrevSeg(LogwrtResult.Write, openLogSegNo,
							wal_segment_size);
			openLogFile = XLogFileOpen(openLogSegNo);
			ReserveExternalFD();
		}

		/* Add current page to the set of pending pages-to-dump */
		if (npages == 0)
		{
			/* first of group */
			startidx = curridx;
            //LogwrtResult.Write - XLOG_BLCKSZ 前一个页面起始位置的LSN
			startoffset = XLogSegmentOffset(LogwrtResult.Write - XLOG_BLCKSZ,
											wal_segment_size);
		}
		npages++;

		//判断当前是否为最后一次循环或最后一个页
		last_iteration = WriteRqst.Write <= LogwrtResult.Write;
		//判断当前是否为最后一个segment
		finishing_seg = !ispartialpage &&
			(startoffset + npages * XLOG_BLCKSZ) >= wal_segment_size;

		if (last_iteration ||
			curridx == XLogCtl->XLogCacheBlck ||
			finishing_seg)
		{
            //写入系统os
			char	   *from;

			from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ;
			if (data_encrypted)
			{
				int			i,
							nencrypted;
				char	   *to;
				uint32		encr_offset;

				
				nencrypted = 0;
				to = encrypt_buf_xlog;
				encr_offset = startoffset;
				for (i = 1; i <= npages; i++)
				{
					char		tweak[TWEAK_SIZE];
					Size		nbytes;
                    //生成Tweak
					XLogEncryptionTweak(tweak, ThisTimeLineID, openLogSegNo, encr_offset);

					if (i == npages && ispartialpage)
						nbytes = WriteRqst.Write % XLOG_BLCKSZ;
					else
						nbytes = XLOG_BLCKSZ;
					//加密block
					encrypt_block(from,
								  to,
								  nbytes,
								  tweak,
								  InvalidXLogRecPtr,
								  InvalidBlockNumber,
								  EDK_REL_WAL);
					nencrypted++;
					from += XLOG_BLCKSZ;
					to += XLOG_BLCKSZ;
					encr_offset += XLOG_BLCKSZ;

					//如果满8页了,执行XLogWritePages,更新startoffset
					if (nencrypted >= XLOG_ENCRYPT_BUF_PAGES || i >= npages)
					{
						startoffset += XLogWritePages(encrypt_buf_xlog,
													  nencrypted,
													  startoffset);

						/* Prepare for the next round of page encryptions. */
						nencrypted = 0;
						to = encrypt_buf_xlog;
						encr_offset = startoffset;
					}
				}
			}
			else
				startoffset += XLogWritePages(from, npages, startoffset);

			npages = 0;

			if (finishing_seg)
			{
				issue_xlog_fsync(openLogFile, openLogSegNo);
				WalSndWakeupRequest();

				LogwrtResult.Flush = LogwrtResult.Write;	
				if (XLogArchivingActive())
					XLogArchiveNotifySeg(openLogSegNo);

				XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL);
				XLogCtl->lastSegSwitchLSN = LogwrtResult.Flush;
				if (IsUnderPostmaster && XLogCheckpointNeeded(openLogSegNo))
				{
					(void) GetRedoRecPtr();
					if (XLogCheckpointNeeded(openLogSegNo))
						RequestCheckpoint(CHECKPOINT_CAUSE_XLOG);
				}
			}
		}
        //最后一个,如果落盘则表示WriteRqst.Write之前的所有XLOG都已经落盘,则结束循环
		if (ispartialpage)
		{
			/* Only asked to write a partial page */
			LogwrtResult.Write = WriteRqst.Write;
			break;
		}
		curridx = NextBufIdx(curridx);

		/* If flexible, break out of loop as soon as we wrote something */
		if (flexible && npages == 0)
			break;
	}

	Assert(npages == 0);
    //第二部分:落盘xlog
	if (LogwrtResult.Flush < WriteRqst.Flush &&
		LogwrtResult.Flush < LogwrtResult.Write)

	{
		if (sync_method != SYNC_METHOD_OPEN &&
			sync_method != SYNC_METHOD_OPEN_DSYNC)
		{
			if (openLogFile >= 0 &&
				!XLByteInPrevSeg(LogwrtResult.Write, openLogSegNo,
								 wal_segment_size))
				XLogFileClose();
			if (openLogFile < 0)
			{
				XLByteToPrevSeg(LogwrtResult.Write, openLogSegNo,
								wal_segment_size);
				openLogFile = XLogFileOpen(openLogSegNo);
				ReserveExternalFD();
			}

			issue_xlog_fsync(openLogFile, openLogSegNo);
		}

		/* signal that we need to wakeup walsenders later */
		WalSndWakeupRequest();

		LogwrtResult.Flush = LogwrtResult.Write;
	}

	/*
	 第三部分:更新全局状态LogwrtRqst
	 */
	{
		SpinLockAcquire(&XLogCtl->info_lck);
		XLogCtl->LogwrtResult = LogwrtResult;
		if (XLogCtl->LogwrtRqst.Write < LogwrtResult.Write)
			XLogCtl->LogwrtRqst.Write = LogwrtResult.Write;
		if (XLogCtl->LogwrtRqst.Flush < LogwrtResult.Flush)
			XLogCtl->LogwrtRqst.Flush = LogwrtResult.Flush;
		SpinLockRelease(&XLogCtl->info_lck);
	}
}

startupxlog()
XLogReaderAllocate()
XLogPageRead()

XLogPageRead

Assert(targetSegNo == readSegNo);
	Assert(targetPageOff == readOff);
	Assert(reqLen <= readLen);
    if (data_encrypted)
    {
        char		tweak[TWEAK_SIZE];
        XLogEncryptionTweak(tweak, curFileTLI, readSegNo, readOff);
        decrypt_block(readBuf,
                      readBuf,
                      XLOG_BLCKSZ,
                      tweak,
                      InvalidBlockNumber,
                      EDK_PERMANENT);
    }

加密与解密

XLogEncryptionTweak的作用是生成iv(计数器)

void
XLogEncryptionTweak(char *tweak, TimeLineID timeline, XLogSegNo segment,
                    uint32 offset)
{
    memset(tweak, 0, TWEAK_SIZE);
    memcpy(tweak, &timeline, sizeof(timeline));
    tweak += sizeof(timeline);
    memcpy(tweak, &segment, sizeof(XLogSegNo));
    tweak += sizeof(XLogSegNo);
    memcpy(tweak, &offset, sizeof(offset));
}

具体的解密和加密过程由encrypt_block/decrypt_block实现

//MarkBufferDirtyHint()可能更新input
void
encrypt_block(const char *input, char *output, Size size, char *tweak,
              XLogRecPtr lsn, BlockNumber block,
              EncryptedDataKind data_kind)
{
#ifdef USE_ENCRYPTION
    EVP_CIPHER_CTX *ctx;
	int			out_size;
	char	tweak_loc[TWEAK_SIZE];

	Assert(data_encrypted);
    //如果tweak == NULL 默认读取的数据是以PageHeaderData开头
	if (tweak == NULL)
	{   ......
		PageXLogRecPtrSet(rec_ptr, lsn);
		memcpy(c, &rec_ptr, sizeof(PageXLogRecPtr));
		c += sizeof(PageXLogRecPtr);
		memcpy(c, &block, sizeof(BlockNumber));
        //防止buffile 生成的fake lsn重复
		if (data_kind == EDK_TEMP)
		{
			c += sizeof(BlockNumber);
			*c |= 0x1 << 7;
		}

		tweak = tweak_loc;
		if (input != output)
			PageSetLSN(output, lsn);

		/* 不加密 LSN and checksum. */
		unencr_size = offsetof(PageHeaderData, pd_flags);
		input += unencr_size;
		output += unencr_size;
		size -= unencr_size;
	}
	/*
	 * 不加密空页
	 */
	else if (IsAllZero(input, size))
	{
		if (input != output)
			memset(output, 0, size);
		return;
	}

	ctx = data_kind != EDK_BUFFILE ? ctx_encrypt : ctx_encrypt_buffile;

	//初始化
	if (EVP_EncryptInit_ex(ctx, NULL, NULL, encryption_key,
						   (unsigned char *) tweak) != 1)
		evp_error();

	//更新到output
	if (EVP_EncryptUpdate(ctx, (unsigned char *) output,
						  &out_size, (unsigned char *) input, size) != 1)
		evp_error();

	if (out_size != size)
	{
#ifndef FRONTEND
		ereport(ERROR, (errmsg("Some data left unencrypted")));
#else
		/* Front-end shouldn't actually get here, but be careful. */
		fprintf(stderr, "Some data left unencrypted\n");
		exit(EXIT_FAILURE);
#endif	/* FRONTEND */
	}
#else  /* !USE_ENCRYPTION */
    /* data_encrypted should not be set */
    Assert(false);
#endif							/* USE_ENCRYPTION */
}

decrypt_block 的实现原理和encrypt_block相同,但是不需要lsn

void
decrypt_block(const char *input, char *output, Size size, char *tweak,
              BlockNumber block, EncryptedDataKind data_kind)
{
#ifdef USE_ENCRYPTION
    EVP_CIPHER_CTX *ctx;
	int			out_size;
	char	tweak_loc[TWEAK_SIZE];

	Assert(data_encrypted);

	if (tweak == NULL)
	{
		size_t	lsn_size, unencr_size;
		char	*c = tweak_loc;

		Assert(block != InvalidBlockNumber);

		//input 不会变 所以可以直接读input
		if (XLogRecPtrIsInvalid(PageGetLSN(input)))
		{
			if (input != output)
				memcpy(output, input, size);
			return;
		}

		lsn_size = sizeof(PageXLogRecPtr);

		memset(c, 0, TWEAK_SIZE);
		memcpy(c, input, lsn_size);
		c += lsn_size;
		memcpy(c, &block, sizeof(BlockNumber));
		if (data_kind == EDK_TEMP)
		{
			c += sizeof(BlockNumber);
			*c |= 0x1 << 7;
		}

		tweak = tweak_loc;

		if (input != output)
			memcpy(output, input, lsn_size);

		/* Do not encrypt the LSN and checksum. */
		unencr_size = offsetof(PageHeaderData, pd_flags);
		input += unencr_size;
		output += unencr_size;
		size -= unencr_size;
	}
	else if (IsAllZero(input, size))
	{
		if (input != output)
			memset(output, 0, size);
		return;
	}

	ctx = data_kind != EDK_BUFFILE ? ctx_encrypt : ctx_encrypt_buffile;

	/* The remaining initialization. */
	if (EVP_DecryptInit_ex(ctx, NULL, NULL, encryption_key,
						   (unsigned char *) tweak) != 1)
		evp_error();

	/* Do the actual deryption. */
	if (EVP_DecryptUpdate(ctx, (unsigned char *) output,
						  &out_size, (unsigned char *) input, size) != 1)
		evp_error();

	if (out_size != size)
	{
#ifndef FRONTEND
		ereport(ERROR, (errmsg("Some data left undecrypted")));
#else
		/* Front-end shouldn't actually get here, but be careful. */
		fprintf(stderr, "Some data left undecrypted\n");
		exit(EXIT_FAILURE);
#endif	/* FRONTEND */
	}
#else  /* !USE_ENCRYPTION */
    /* data_encrypted should not be set */
    Assert(false);
#endif							/* USE_ENCRYPTION */
}
文章来自个人专栏
postgresql 日志分析
3 文章 | 1 订阅
0条评论
0 / 1000
请输入你的评论
0
0