#!/bin/bash ############################################################## # File Name: tjj.sh # Version: V1.0 # Author: sanshi # Organization: https://www.9133w.cn/sources/ # Created Time : 2022-05-27 15:16:59 # Description: ############################################################## base_url=http://www.stats.g-o-v.cn/tjsj/t-j-b-z/tjyqhdmhcxhfdm/2-0-2-1 prov_path="/tmp/province.txt" city_path="/tmp/city.txt" regi_path="/tmp/region.txt" stre_path="/tmp/street.txt" coun_path="/tmp/country.txt" temp_file=`mktemp -t temp.XXXX` json_path="~/address.json" >$prov_path curl -sL $base_url/index.html | \ sed 's@
@\n@g' | \ grep -oP '(?<=href=")\d+.html">\S+' | \ sed 's@.html">@ @g' > $prov_path echo 'please input province id,if all to use : ALL|all|a :' cat $prov_path read -p 'ID: ' ch_id case $ch_id in all|ALL|a) cat $prov_path > $temp_file ;; [0-9]*) grep ^$ch_id $prov_path > $temp_file ;; *) echo 'bad choice,thanks!' exit 1 esac [ ! -s $temp_file ] && echo 'can not find the result,please check your choice!' && exit 0 >$city_path cat $temp_file | while read pro do num1=`echo $pro | awk '{print $1}'` tag=1 while [ $tag -eq 1 ] do curl -sL -m10 -A "IE11" $base_url/${num1}.html | \ sed 's@@\n@g' | grep -oP '(?<=href=")\d+/\d+.html">\N+' | \ sed 's@/@ @g;s@.html">@ @g' >> $city_path if awk '{print $2}' $city_path | grep -q ^$num1 then let tag=0 else let tag=1 fi done done >$regi_path awk '{print $2}' $city_path | uniq | while read line do num2=`echo $line | cut -c1,2` tag2=1 while [ $tag2 -eq 1 ] do curl -sL -m10 -A "IE11" $base_url/$num2/${line}.html | \ sed 's@@\n@g' | grep -oP '(?<=href=")\d+/\d+.html">\N+' | \ sed 's@/@ @g;s@.html">@ @g' >> $regi_path if awk '{print $2}' $regi_path | grep -q ^$line then let tag2=0 else let tag2=1 fi done done >$stre_path >$coun_path awk '{print $2}' $regi_path | uniq | while read street do num3=`echo $street | cut -c1,2` num4=`echo $street | cut -c3,4` tag3=1 while [ $tag3 -eq 1 ] do if [ $num3 -eq 44 -o $num3 -eq 46 ] && [[ $num4 =~ (19|20|04) ]] then curl -sL -m10 -A "IE11" $base_url/$num3/$num4/${street}.html | \ grep -oP "\d+\d+.*" | sed 's@@@g' | \ grep -oP '(?<=)\S+' |sed -re 's@@ @g;s@|> $coun_path if awk '{print $1}' $coun_path | grep -q ^$street then let tag3=0 else let tag3=1 fi else curl -sL -m10 -A "IE11" $base_url/$num3/$num4/${street}.html | \ sed 's@@\n@g' | grep -oP '(?<=href=")\d+/\d+.html">\N+' | \ sed 's@/@ @g;s@.html">@ @g' >> $stre_path if awk '{print $2}' $stre_path | grep -q ^$street then let tag3=0 else let tag3=1 fi fi done done awk '{print $2}' $stre_path | uniq | while read country do num5=`echo $country | cut -c1,2` num6=`echo $country | cut -c3,4` num7=`echo $country | cut -c5,6` tag4=1 while [ $tag4 -eq 1 ] do curl -sL -m10 -A "IE11" $base_url/$num5/$num6/$num7/${country}.html | \ grep -oP "\d+\d+.*" | sed 's@@@g' | \ grep -oP '(?<=)\S+' |sed -re 's@@ @g;s@|> $coun_path if awk '{print $1}' $coun_path | grep -q ^$country then let tag4=0 else let tag4=1 fi done done #######json analysis######### json_analysis() { echo "[" cat $temp_file |while read prov do code_pro=`echo $prov |awk '{print $1}'` prov_name=`echo $prov |awk '{print $2}'` echo -e "{\"province_code\":$code_pro,\n\"province_name\":\"$prov_name\",\n" echo -e "\"city\":[" city_arr=(`grep "^$code_pro " $city_path | awk '{print $2}' | uniq`) for city in ${city_arr[@]} do city_code=`grep ^$code_pro $city_path | grep $city | head -1 | awk '{print $NF}'` city_name=`grep ^$code_pro $city_path | grep $city | tail -1 | awk '{print $NF}'` [ -z "$city_code" ] && continue echo -e "{\"city_code\":$city_code,\n\"city_name\":\"$city_name\",\n" echo -e "\"region\":[" region_arr=(`awk '{print $2}' $regi_path | cut -c1-6 | grep ^$city | uniq`) for region in ${region_arr[@]} do if [ $region -eq 441900 -o \ $region -eq 442000 -o \ $region -eq 460400 ] then echo -e "{\"region_code\":\"null\",\n\"region_name\":\"null\",\n" echo -e "\"street\":[" town_arr=(`awk '{print $2}' $regi_path | grep ^$city | uniq | grep $region`) for town in ${town_arr[@]} do town_id=`echo $city |cut -c3,4` town_code=`grep ^$town_id $regi_path | grep $town | head -1 | awk '{print $NF}'` town_name=`grep ^$town_id $regi_path | grep $town | tail -1 | awk '{print $NF}'` [ -z "$town_code" ] && continue echo -e "{\"town_code\":$town_code,\n\"town_name\":\"$town_name\",\n" echo -e "\"country\":[" country_last=`grep ^$region $coun_path | tail -1 |awk '{print $1}'` grep ^$region $coun_path | while read country1 do country_code=`echo $country1 | awk '{print $1}'` country_id=`echo $country1 | awk '{print $2}'` country_name=`echo $country1 | awk '{print $3}'` [ -z "$country_code" ] && continue echo -e "{\"country_code\":$country_code,\n\"country_id\":$country_id,\n" echo -e "\"country_name\":\"$country_name\"" if [ "$country_code" = $country_last ] then echo "}]" else echo "}," fi done if [ $(echo $town_code | cut -c1-9) = ${town_arr[-1]} ] then echo "}]}]" else echo "}," fi done continue fi region_id=`echo $city | cut -c3,4` region_code=`grep ^$region_id $regi_path | grep $region | head -1 | awk '{print $NF}'` region_name=`grep ^$region_id $regi_path | grep $region | tail -1 | awk '{print $NF}'` [ -z "$region_code" ] && continue echo -e "{\"region_code\":$region_code,\n\"region_name\":\"$region_name\",\n" echo -e "\"street\":[" street_arr=(`awk '{print $2}' $stre_path | uniq | grep ^$region`) for street in ${street_arr[@]} do street_id=`echo $region | cut -c5,6` street_code=`grep ^$street_id $stre_path | grep $street | head -1 | awk '{print $NF}'` street_name=`grep ^$street_id $stre_path | grep $street | tail -1 | awk '{print $NF}'` [ -z "$street_code" ] && continue echo -e "{\"street_code\":$street_code,\n\"street_name\":\"$street_name\",\n" echo -e "\"country\":[" country_tag=`grep ^$street $coun_path | tail -1 | awk '{print $1}'` grep ^$street $coun_path | while read one do c_code=`echo $one | awk '{print $1}'` c_id=`echo $one | awk '{print $2}'` c_name=`echo $one | awk '{print $3}'` [ -z "$c_code" ] && continue echo -e "{\"country_code\":$c_code,\n\"country_id\":$c_id,\n" echo -e "\"country_name\":\"$c_name\"" if [ "$country_tag" = $c_code ] then echo "}]" else echo "}," fi done street_tag=$(echo $street_code | cut -c1-9) if [ "$street_tag" = ${street_arr[-1]} ] then echo "}]" else echo "}," fi done region_tag=$(echo $region_code | cut -c1-6) if [ $region_tag = ${region_arr[-1]} -o $region_tag = $(echo ${region_arr[-1]} | cut -c1-6) ] then echo "}]" else echo "}," fi done if [ $(echo $city_code | cut -c1-4) = ${city_arr[-1]} ] then echo "}]" else echo "}," fi done if [ $code_pro -eq 65 ] then echo "}]" else echo "}," fi done } json_analysis > $json_path rm -f $temp_file $prov_path $city_path $regi_path $stre_path $coun_path exit 0