在做資料同化過程中,經常會使用到常規觀測資料(地面,船舶,浮標,探空,飛機等觀測)、衛星資料(amsua、amsub等)、GPS掩星資料等作為觀測源,通常情況下他們為prepbufr格式。
在資料同化的分析過程中,我們可能需要直接讀取該資料的要素信息,所以時常會對該類型的資料進行分析和可視化。在大型機中,普遍使用MET這一利器作為讀取prepbufr的工具,但是MET的配置過於複雜,依賴庫過於繁瑣,如何在本地機中以簡單的代價讀取該資料是一個很有意思的問題。
這裡介紹一種基於python的prepbufr讀取方法:
系統:Linux系統,這裡推薦Ubuntu子系統環境依賴:gfortran、初步搭建好的python模塊名稱:ncepbufr源碼地址:https:1、安裝過程
解壓py-ncepbufr-master.tar.gz
進入解壓後的目錄,注意一定要在當前目錄下
執行python setup.py build,目的是產生src/libbufr.a等文件。默認使用gfortran,如果想使用其他版本的fortran,可以執行:
python setup.py build config_fc --fcompiler=<compiler name>
執行python setup.py install
最終結果:
2、安裝測試
進入app/py-ncepbufr/test/ 目錄,可以發現該目錄下有很多的python文件。可以用來測試常規觀測、衛星觀測、掩星觀測等能否正常讀取。如果不行可能是環境問題,推薦gfortran。
3、常規觀測和衛星觀測讀取示例
(1)常規觀測
代碼:prepbufr_read_conv.py <input prepbufr> >& info.txt
from __future__ import print_functionimport ncepbufrimport sys
hdstr='SID XOB YOB DHR TYP ELV SAID T29'obstr='POB QOB TOB ZOB UOB VOB PWO MXGS HOVI CAT PRSS TDO PMO'qcstr='PQM QQM TQM ZQM WQM NUL PWQ PMQ'oestr='POE QOE TOE NUL WOE NUL PWE'
if len(sys.argv) < 2: raise SystemExit('prepbufr_read_conv.py <input prepbufr> >& info.txt')
prepbufr_filename = sys.argv[1]bufr = ncepbufr.open(prepbufr_filename)bufr.print_table() bufr.dump_table('prepbufr.table') while bufr.advance() == 0: print(bufr.msg_counter, bufr.msg_type, bufr.msg_date, bufr.receipt_time) while bufr.load_subset() == 0: hdr = bufr.read_subset(hdstr).squeeze() station_id = hdr[0].tostring() obs = bufr.read_subset(obstr) nlevs = obs.shape[-1] oer = bufr.read_subset(oestr) qcf = bufr.read_subset(qcstr) print('station_id, lon, lat, time, station_type, levels =',\ station_id,hdr[1],hdr[2],hdr[3],int(hdr[4]),nlevs) for k in range(nlevs): if nlevs > 1: print('level',k+1) print('obs',obs[:,k]) print('oer',oer[:,k]) print('qcf',qcf[:,k]) if bufr.msg_counter == 2: breakbufr.close()運行結果:
(2)衛星觀測
代碼:prepbufr_read_radiance.py <input prepbufr> >& info.txt
from __future__ import print_functionimport ncepbufrimport syshdstr1 ='SAID SIID FOVN YEAR MNTH DAYS HOUR MINU SECO CLAT CLON CLATH CLONH HOLS'hdstr2 ='SAZA SOZA BEARAZ SOLAZI'
if len(sys.argv) < 2: raise SystemExit('prepbufr_read_radiance.py <input prepbufr> >& info.txt')
prepbufr_filename = sys.argv[1]bufr = ncepbufr.open(prepbufr_filename)bufr.print_table()while bufr.advance() == 0: print(bufr.msg_counter, bufr.msg_type, bufr.msg_date) while bufr.load_subset() == 0: hdr1 = bufr.read_subset(hdstr1).squeeze() hdr2 = bufr.read_subset(hdstr2).squeeze() yyyymmddhhss ='%04i%02i%02i%02i%02i%02i' % tuple(hdr1[3:9]) print('sat id,sensor id lat, lon, yyyymmddhhmmss =',int(hdr1[0]),\ int(hdr1[1]),hdr1[9],hdr1[10],yyyymmddhhss) obs = bufr.read_subset('TMBR',rep=True).squeeze() nchanl = len(obs) for k in range(nchanl): print('channel, tb =',k+1,obs[k]) if bufr.msg_counter == 4: breakbufr.close()結果:
具體怎樣將其更好讀取並輸出,讀者請自行改進~
4、常規prepbufr轉nc的代碼:
"""convert prepbufr file to netCDF"""from __future__ import print_functionimport ncepbufrimport numpy as npfrom netCDF4 import Datasetfrom ncepbufr import prepbufr_mnemonics_dict as mnemonics_dictimport sys
nobs_chunk = 200
time_min = -3.0; time_max = 3.0
if len(sys.argv) < 3: raise SystemExit('prepbufr2nc <input prepbufr> <output netcdf>')
prepbufr_filename = sys.argv[1]netcdf_filename = sys.argv[2]if prepbufr_filename == netcdf_filename: raise IOError('cannot overwrite input prepbufr file')
hdstr='SID XOB YOB DHR TYP ELV SAID T29'obstr='POB QOB TOB ZOB UOB VOB PWO CAT PRSS TDO PMO XDR YDR HRDR'qcstr='PQM QQM TQM ZQM WQM PWQ PMQ'oestr='POE QOE TOE ZOE WOE PWE'
skiptypes = ['SATWND']
bufr = ncepbufr.open(prepbufr_filename)nmessages = 0while bufr.advance() == 0: if bufr.msg_type in skiptypes: continue nmessages += 1bufr.rewind()
nc = Dataset(netcdf_filename,'w',format='NETCDF4')
hd = nc.createDimension('hdrinfo',len(hdstr.split()))nhdd = len(hd)ob = nc.createDimension('obinfo',len(obstr.split()))nobd = len(ob)oe = nc.createDimension('oeinfo',len(oestr.split()))noed = len(oe)qc = nc.createDimension('qcinfo',len(qcstr.split()))nqcd = len(qc)nobsd = nc.createDimension('nobs',None)nmsgs = nc.createDimension('nmsgs',nmessages)
hdrdata =\nc.createVariable('header',np.float64,('nobs','hdrinfo'),\fill_value=bufr.missing_value,zlib=True,chunksizes=(nobs_chunk,nhdd))hdrdata.desc = 'observation header data'for key in hdstr.split(): hdrdata.setncattr(key,mnemonics_dict[key])hdrdata.hdrinfo = hdstrobid = nc.createVariable('obid',str,('nobs',),zlib=True)obid.desc = 'observation id (station id/type code/lon/lat/time/elevation/pressure)'obdata =\nc.createVariable('obdata',np.float32,('nobs','obinfo'),\fill_value=bufr.missing_value,zlib=True,chunksizes=(nobs_chunk,nobd))oedata =\nc.createVariable('oberr',np.float32,('nobs','oeinfo'),\fill_value=bufr.missing_value,zlib=True,chunksizes=(nobs_chunk,noed))qc_fillval = 255qcdata =\nc.createVariable('qcdata',np.uint8,('nobs','qcinfo'),\fill_value=qc_fillval,zlib=True,chunksizes=(nobs_chunk,nqcd))msgnum = nc.createVariable('msgnum',np.uint32,('nobs'),\zlib=True,chunksizes=(nobs_chunk,))msgnum.desc = 'bufr message number'subsetnum = nc.createVariable('subsetnum',np.uint16,('nobs'),\zlib=True,chunksizes=(nobs_chunk,))subsetnum.desc='subset number within bufr message'msgtype = nc.createVariable('msgtype',str,('nmsgs'),zlib=True)msgtype.desc='bufr message type'msgdate = nc.createVariable('msgdate',np.uint32,('nmsgs'),zlib=True)msgdate.desc='bufr message date'for key in obstr.split(): obdata.setncattr(key,mnemonics_dict[key])obdata.obinfo = obstrobdata.desc = 'observation data'for key in oestr.split(): oedata.setncattr(key,mnemonics_dict[key])oedata.oeinfo = oestroedata.desc = 'observation error data'for key in qcstr.split(): qcdata.setncattr(key,mnemonics_dict[key])qcdata.qcinfo = qcstrqcdata.desc = 'observation QC data'
nob = 0obid_set = set()nmsg = 0while bufr.advance() == 0: if bufr.msg_type in skiptypes: continue
hdrarr = []; obsarr = []; qcarr = []; errarr = []; obidarr = [] msgnumarr = []; subsetnumarr = [] nobs_message = 0 nsubset = 0 nc['msgtype'][nmsg] = bufr.msg_type nc['msgdate'][nmsg] = bufr.msg_date nmsg += 1
while bufr.load_subset() == 0: nsubset += 1 hdr = bufr.read_subset(hdstr).squeeze() obs = bufr.read_subset(obstr) qc = bufr.read_subset(qcstr) err = bufr.read_subset(oestr) nlevs_use = 0 for nlev in range(obs.shape[-1]): lon = hdr[1]; lat = hdr[2]; time = hdr[3] press = obs[0,nlev]; z = hdr[5] if hdr[4] in [120,220,221]: lon = obs[11,nlev]; lat = obs[12,nlev] if obs[13,nlev] >= time_min and obs[13,nlev] < time_max: time = obs[13,nlev] elif hdr[4] >= 223 and hdr[4] <= 228: z = obs[3,nlev] try: obidstr = "%s %3i %6.2f %6.2f %9.5f %5i %6.1f" % \ (hdr[0].tostring(), int(hdr[4]), lon, lat, time, z, press) except: obidstr = "unknown" if obidstr not in obid_set: obid_set.add(obidstr) else: print('skipping duplicate ob %s' % obidstr) continue hdrarr.append(hdr.squeeze()) obidarr.append(obidstr) obsarr.append(obs[:,nlev]) errarr.append(err[:,nlev]) qcarr.append(qc[:,nlev]) msgnumarr.append(nmsg) subsetnumarr.append(nsubset) nob += 1; nlevs_use += 1 nobs_message += nlevs_use hdrarr = np.array(hdrarr); obsarr = np.array(obsarr) errarr = np.array(errarr); qcarr = np.array(qcarr) obidarr = np.array(obidarr); msgnumarr = np.array(msgnumarr) subsetnumarr = np.array(subsetnumarr) qcarr = np.where(qcarr == bufr.missing_value,qc_fillval,qcarr).astype(np.uint8) nob1 = nob-nobs_message print('writing message %s out of %s, type %s with %s obs' %\ (nmsg,nmessages,bufr.msg_type,nobs_message)) nc['header'][nob1:nob] = hdrarr nc['obdata'][nob1:nob] = obsarr nc['oberr'][nob1:nob] = errarr nc['qcdata'][nob1:nob] = qcarr nc['obid'][nob1:nob] = obidarr nc['msgnum'][nob1:nob] = msgnumarr nc['subsetnum'][nob1:nob] = subsetnumarr nc.sync()
bufr.close(); nc.close()結果:
5、結語
Python只是一種工具而已,重要的是看他能否解決實際的問題。如果你認為MET更好,這完全是正常的想法,由個人實際需求決定。關於本文讀入資料以後的工作,這完全可以很自由的解決,謝謝您閱讀到最後,感謝您支持!!