Python套接字Socket读取http网页web数据
基于Python的套接字Socket,读取网页web的数据,以读取百度首页的内容为例,程序代码如下:
import socket
def getDataBySocket(url):
sock = socket.socket()
ip_port = (url, 80)
sock.connect(ip_port)
print("建立连接的远程服务器地址:", sock.getpeername())
sock.send("GET / HTTP/1.1\r\n".encode("utf-8"))
sock.send(("Host: "+url+"\r\n").encode("utf-8"))
sock.send(("\n").encode("utf-8"))
size = 1024
while True:
try:
data = sock.recv(size)
count = len(data)
print("读取数据数量:", count)
if count == 0:
print("读数据完毕")
break
ret = str(data, encoding="utf-8")
print(ret)
except BaseException as exc:
print("发生异常")
break
if __name__ == '__main__':
url = ""
getDataBySocket(url)