In [1]:
county_shp = "6. 专题分析数据/Gansu-County.shp"
lucc_tif = "6. 专题分析数据/Gansu-lucc-2015.tif"
masked_lucc = "6. 专题分析数据/masked_lucc.tif"
rain_tif = "data/简单稳定.tif"
eval_tif = "data/简单稳定蒸发.tif"
In [2]:
import geopandas as gpd
import rasterio
from rasterio.features import geometry_mask
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import warnings
warnings.filterwarnings("ignore")
In [3]:
# 提取植被覆盖类型栅格
vegetation_code = [21,22,23,24,31,32,33]
with rasterio.open(lucc_tif) as src:
lucc = src.read(1)
lucc = np.isin(lucc, vegetation_code).astype(int)
meta = src.meta.copy()
with rasterio.open(masked_lucc, 'w', **meta) as src:
src.write(lucc, 1)
In [4]:
# 统计每个县的lucc栅格数量和栅格总量
county = gpd.read_file(county_shp)
with rasterio.open(masked_lucc) as src:
lucc = src.read(1)
transform = src.transform
crs = src.crs
if county.crs != crs:
county = county.to_crs(crs)
lucc_counts = []
area_counts = []
for idx, row in county.iterrows():
geom = row['geometry']
mask = geometry_mask([geom], out_shape=lucc.shape, transform=transform, invert=True)
lucc_count = np.sum(lucc[mask])
area_count = np.sum(mask)
area_counts.append(area_count)
lucc_counts.append(lucc_count)
county['lucc_count'] = lucc_counts
county['area_count'] = area_counts
county['lucc_rate'] = county['lucc_count'] / county['area_count']
print(county[['LAST_NAME9', 'lucc_count', 'area_count', 'lucc_rate']].head())
LAST_NAME9 lucc_count area_count lucc_rate 0 兰州市市辖区 1023 1681 0.608566 1 永登县 105 199 0.527638 2 永登县 3622 5417 0.668636 3 皋兰县 1820 2546 0.714847 4 榆中县 2165 3253 0.665540
In [5]:
# 统计平均降水量与蒸发量
with rasterio.open(rain_tif) as src:
rain = src.read(1)
transform = src.transform
crs = src.crs
with rasterio.open(eval_tif) as src:
eval = src.read(1)
transform = src.transform
crs = src.crs
if county.crs != crs:
county = county.to_crs(crs)
rain_means = []
eval_means = []
for idx, row in county.iterrows():
geom = row['geometry']
mask = geometry_mask([geom], out_shape=rain.shape, transform=transform, invert=True)
rain_mean = np.mean(rain[mask])
eval_mean = np.mean(eval[mask])
rain_means.append(rain_mean)
eval_means.append(eval_mean)
county['rain_mean'] = rain_means
county['eval_mean'] = eval_means
print(county[['LAST_NAME9', 'rain_mean', 'eval_mean']].head())
LAST_NAME9 rain_mean eval_mean 0 兰州市市辖区 442.452942 1426.226074 1 永登县 306.491302 1621.062256 2 永登县 394.876923 1507.372681 3 皋兰县 367.027405 1496.092285 4 榆中县 399.487976 1442.178833
In [6]:
# 去除小地块
county_trimed = county[county['area_count'] > 2000]
print(f"Remaining counties after area filter: {len(county_trimed)}")
Remaining counties after area filter: 54
In [7]:
# 植被覆盖率与降水量回归分析
plt.scatter(county_trimed['rain_mean'], county_trimed['lucc_rate'])
plt.xlabel('Average Rainfall')
plt.ylabel('Vegetation Coverage Rate')
plt.title('Vegetation Coverage Rate vs Average Rainfall')
plt.show()
In [ ]:
import statsmodels.api as sm
X = county_trimed['rain_mean']
y = county_trimed['lucc_rate']
X_with_const = sm.add_constant(X)
model = sm.OLS(y, X_with_const).fit()
print(model.summary())
OLS Regression Results
==============================================================================
Dep. Variable: lucc_rate R-squared: 0.676
Model: OLS Adj. R-squared: 0.669
Method: Least Squares F-statistic: 108.3
Date: Tue, 25 Nov 2025 Prob (F-statistic): 2.57e-14
Time: 08:19:30 Log-Likelihood: 28.284
No. Observations: 54 AIC: -52.57
Df Residuals: 52 BIC: -48.59
Df Model: 1
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
const 0.1372 0.045 3.081 0.003 0.048 0.227
rain_mean 0.0011 0.000 10.408 0.000 0.001 0.001
==============================================================================
Omnibus: 1.900 Durbin-Watson: 1.383
Prob(Omnibus): 0.387 Jarque-Bera (JB): 1.845
Skew: 0.418 Prob(JB): 0.397
Kurtosis: 2.653 Cond. No. 958.
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
In [9]:
# 画出回归线
plt.scatter(county_trimed['rain_mean'], county_trimed['lucc_rate'], label='Data Points')
plt.plot(county_trimed['rain_mean'], model.predict(X_with_const), color='red', label='Regression Line')
plt.xlabel('Average Rainfall')
plt.ylabel('Vegetation Coverage Rate')
plt.title('Vegetation Coverage Rate vs Average Rainfall with Regression Line')
plt.legend()
plt.show()