requests-KeepStudy

requests

## 安装requests
    pip install requests
    sudo pip install requests #遇到Permission denied安装失败，请加上sudo重试

## 使用requests
    import requests
    response = requests.get('https://www.douban.com/')  ## 豆瓣首页
    
#传入参数
    response = requests.get('https://www.douban.com/search', params={'q': 'python', 'cat': '1001'}) 
    
#传入HTTP Header
    response = requests.get('https://www.douban.com/', headers={'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit', "Accept-Language": "en-US"})
    
#传入cookies
    response = requests.get(url, cookies= {'token': '12345', 'status': 'working'})
    
#控制重定向
    response = requests.get(url, allow_redirects=False)
    
## 超时设置    
    response = requests.get(url, timeout=(3.05, 27))  ## 连接超时 3.05s，读取超时 27s
   
#流式请求： 处理大响应时，逐块接收数据
    response = requests.get(url, stream=True)
    for chunk in response.iter_content(chunk_size=8192):
        process(chunk)  
   
   
#数据发送
    response = requests.post(url, json={"title": "Hello", "body": "World"})   #JSON 数据 Content-Type: application/json
    response = requests.post('https://www.douban.com/search', params={'q': 'python', 'cat': '1001'})  #表单数据 Content-Type: application/x-www-form-urlencoded

## 手动指定编码（如遇乱码）
    response.encoding = "gbk"  ## 针对中文网页

## 异常处理
    try:
        response.raise_for_status()  ## 非 2xx/3xx 状态码抛出异常
    except requests.HTTPError as e:
        print(f"请求失败: {e}")

## 单文件上传
    with open("report.pdf", "rb") as f:
        files = {"document": f}
        response = requests.post(url, files=files)

#多文件/混合数据
    files = {
        "image": ("cat.jpg", open("cat.jpg", "rb"), "image/jpeg"),
        "metadata": ("data.json", json.dumps({"tag": "animal"}), "application/json")
    }
    response = requests.post(url, files=files)

## 会话管理（Session） 持久化配置和连接池复用
    with requests.Session() as session:
        session.headers.update({"User-Agent": "MyApp/1.0"})
        session.auth = ("user", "pass")
        ## 首次登录保存 Cookie
        login_resp = session.post(login_url, data=credentials)
        ## 后续请求自动携带 Cookie
        profile_resp = session.get(profile_url)

## SSL/TLS 安全
    response = requests.get(url, verify=False) ## 禁用验证（不推荐）
    response = requests.get(url, verify="/path/to/ca-bundle.crt") #自定义证书
    response = requests.get(url, cert=("/path/client.cert", "/path/client.key")) #客户端证书认证

## 认证机制
    ## Basic Auth：
        from requests.auth import HTTPBasicAuth
        response = requests.get(url, auth=HTTPBasicAuth("user", "pass"))

## Digest Auth：
        from requests.auth import HTTPDigestAuth
        response = requests.get(url, auth=HTTPDigestAuth("user", "pass"))

## OAuth 1.0：
        from requests_oauthlib import OAuth1
        auth = OAuth1("client_key", "client_secret", "token", "token_secret")
        response = requests.get(url, auth=auth)
        
        
## 代理配置: 支持 HTTP/S 和 SOCKS 代理
    proxies = {
        "http": "http://10.10.1.10:3128",
        "https": "socks5://user:pass@host:port"
    }
    response = requests.get(url, proxies=proxies)

## 错误处理：
    try:
        resp = requests.get(url, timeout=5)
        resp.raise_for_status()
    except requests.RequestException as e:
        logging.error(f"Request failed: {str(e)}")

## 响应属性
    response.status_code  ## HTTP 状态码（如 200, 404）if response.status_code == 200:
    response.text         ## 解码后的文本内容（自动检测编码）print(response.text[:100])
    response.content      ## 返回响应的内容，以字节为单位 with open("image.png", "wb") as f: f.write(response.content)
    response.json()       ## 解析 JSON 为字典data = response.json()
    response.headers      ## 响应头（字典形式）content_type = response.headers["Content-Type"]
    response.cookies      ## 服务器返回的 Cookies，print(response.cookies.get("session_id")) response.cookies['ts']
    response.history      ## 重定向历史记录 for resp in response.history: print(resp.url)
    response.url          ## 实际请求的URL
    response.encoding     ## 使用encoding属性查
    response.close()	  ## 关闭与服务器的连接
    response.elapsed	  ## 返回一个 timedelta 对象，包含了从发送请求到响应到达之间经过的时间量，可以用于测试响应速度。比如 r.elapsed.microseconds 表示响应到达需要多少微秒。
    response.history	  ## 返回包含请求历史的响应对象列表（url）
    response.is_redirect  ## 如果响应被重定向，则返回 True，否则返回 False
    response.iter_lines() ## 迭代响应的行
    response.links	      ## 返回响应的解析头链接
    response.next	      ## 返回重定向链中下一个请求的 PreparedRequest 对象
    response.ok	          ## 检查 "status_code" 的值，如果小于400，则返回 True，如果不小于 400，则返回 False
    response.reason	      ## 响应状态的描述，比如 "Not Found" 或 "OK"
    response.request	  #返回请求此响应的请求对象
    response.iter_content()	        ## 迭代响应
    response.apparent_encoding	    ## 编码方式
    response.raise_for_status()	    ## 如果发生错误，方法返回一个 HTTPError 对象
    response.is_permanent_redirect	## 如果响应是永久重定向的 url，则返回 True，否则返回 False

## requests 方法
    delete(url, args)	        ## 发送 DELETE 请求到指定 url
    get(url, params, args)	    ## 发送 GET 请求到指定 url
    head(url, args)	            ## 发送 HEAD 请求到指定 url
    patch(url, data, args)	    ## 发送 PATCH 请求到指定 url
    post(url, data, json, args)	## 发送 POST 请求到指定 url
    put(url, data, args)	    ## 发送 PUT 请求到指定 url
    request(method, url, args)	## 向指定的 url 发送指定的请求方法

顶部

Python爬虫

目录

相关推荐