#!/bin/bash
##############################################################
# File Name: tjj.sh
# Version: V1.0
# Author: sanshi
# Organization: https://www.9133w.cn/sources/
# Created Time : 2022-05-27 15:16:59
# Description:
##############################################################
base_url=http://www.stats.g-o-v.cn/tjsj/t-j-b-z/tjyqhdmhcxhfdm/2-0-2-1
prov_path="/tmp/province.txt"
city_path="/tmp/city.txt"
regi_path="/tmp/region.txt"
stre_path="/tmp/street.txt"
coun_path="/tmp/country.txt"
temp_file=`mktemp -t temp.XXXX`
json_path="~/address.json"
>$prov_path
curl -sL $base_url/index.html | \
sed 's@
@\n@g' | \
grep -oP '(?<=href=")\d+.html">\S+' | \
sed 's@.html">@ @g' > $prov_path
echo 'please input province id,if all to use : ALL|all|a :'
cat $prov_path
read -p 'ID: ' ch_id
case $ch_id in
all|ALL|a)
cat $prov_path > $temp_file
;;
[0-9]*)
grep ^$ch_id $prov_path > $temp_file
;;
*)
echo 'bad choice,thanks!'
exit 1
esac
[ ! -s $temp_file ] && echo 'can not find the result,please check your choice!' && exit 0
>$city_path
cat $temp_file | while read pro
do
num1=`echo $pro | awk '{print $1}'`
tag=1
while [ $tag -eq 1 ]
do
curl -sL -m10 -A "IE11" $base_url/${num1}.html | \
sed 's@@\n@g' | grep -oP '(?<=href=")\d+/\d+.html">\N+' | \
sed 's@/@ @g;s@.html">@ @g' >> $city_path
if awk '{print $2}' $city_path | grep -q ^$num1
then
let tag=0
else
let tag=1
fi
done
done
>$regi_path
awk '{print $2}' $city_path | uniq | while read line
do
num2=`echo $line | cut -c1,2`
tag2=1
while [ $tag2 -eq 1 ]
do
curl -sL -m10 -A "IE11" $base_url/$num2/${line}.html | \
sed 's@@\n@g' | grep -oP '(?<=href=")\d+/\d+.html">\N+' | \
sed 's@/@ @g;s@.html">@ @g' >> $regi_path
if awk '{print $2}' $regi_path | grep -q ^$line
then
let tag2=0
else
let tag2=1
fi
done
done
>$stre_path
>$coun_path
awk '{print $2}' $regi_path | uniq | while read street
do
num3=`echo $street | cut -c1,2`
num4=`echo $street | cut -c3,4`
tag3=1
while [ $tag3 -eq 1 ]
do
if [ $num3 -eq 44 -o $num3 -eq 46 ] && [[ $num4 =~ (19|20|04) ]]
then
curl -sL -m10 -A "IE11" $base_url/$num3/$num4/${street}.html | \
grep -oP "
\d+ | \d+ | .* | " | sed 's@@@g' | \
grep -oP '(?<=)\S+' |sed -re 's@ | @ @g;s@| | > $coun_path
if awk '{print $1}' $coun_path | grep -q ^$street
then
let tag3=0
else
let tag3=1
fi
else
curl -sL -m10 -A "IE11" $base_url/$num3/$num4/${street}.html | \
sed 's@@\n@g' | grep -oP '(?<=href=")\d+/\d+.html">\N+' | \
sed 's@/@ @g;s@.html">@ @g' >> $stre_path
if awk '{print $2}' $stre_path | grep -q ^$street
then
let tag3=0
else
let tag3=1
fi
fi
done
done
awk '{print $2}' $stre_path | uniq | while read country
do
num5=`echo $country | cut -c1,2`
num6=`echo $country | cut -c3,4`
num7=`echo $country | cut -c5,6`
tag4=1
while [ $tag4 -eq 1 ]
do
curl -sL -m10 -A "IE11" $base_url/$num5/$num6/$num7/${country}.html | \
grep -oP "\d+ | \d+ | .* | " | sed 's@@@g' | \
grep -oP '(?<=)\S+' |sed -re 's@ | @ @g;s@ |
|> $coun_path
if awk '{print $1}' $coun_path | grep -q ^$country
then
let tag4=0
else
let tag4=1
fi
done
done
#######json analysis#########
json_analysis() {
echo "["
cat $temp_file |while read prov
do
code_pro=`echo $prov |awk '{print $1}'`
prov_name=`echo $prov |awk '{print $2}'`
echo -e "{\"province_code\":$code_pro,\n\"province_name\":\"$prov_name\",\n"
echo -e "\"city\":["
city_arr=(`grep "^$code_pro " $city_path | awk '{print $2}' | uniq`)
for city in ${city_arr[@]}
do
city_code=`grep ^$code_pro $city_path | grep $city | head -1 | awk '{print $NF}'`
city_name=`grep ^$code_pro $city_path | grep $city | tail -1 | awk '{print $NF}'`
[ -z "$city_code" ] && continue
echo -e "{\"city_code\":$city_code,\n\"city_name\":\"$city_name\",\n"
echo -e "\"region\":["
region_arr=(`awk '{print $2}' $regi_path | cut -c1-6 | grep ^$city | uniq`)
for region in ${region_arr[@]}
do
if [ $region -eq 441900 -o \
$region -eq 442000 -o \
$region -eq 460400 ]
then
echo -e "{\"region_code\":\"null\",\n\"region_name\":\"null\",\n"
echo -e "\"street\":["
town_arr=(`awk '{print $2}' $regi_path | grep ^$city | uniq | grep $region`)
for town in ${town_arr[@]}
do
town_id=`echo $city |cut -c3,4`
town_code=`grep ^$town_id $regi_path | grep $town | head -1 | awk '{print $NF}'`
town_name=`grep ^$town_id $regi_path | grep $town | tail -1 | awk '{print $NF}'`
[ -z "$town_code" ] && continue
echo -e "{\"town_code\":$town_code,\n\"town_name\":\"$town_name\",\n"
echo -e "\"country\":["
country_last=`grep ^$region $coun_path | tail -1 |awk '{print $1}'`
grep ^$region $coun_path | while read country1
do
country_code=`echo $country1 | awk '{print $1}'`
country_id=`echo $country1 | awk '{print $2}'`
country_name=`echo $country1 | awk '{print $3}'`
[ -z "$country_code" ] && continue
echo -e "{\"country_code\":$country_code,\n\"country_id\":$country_id,\n"
echo -e "\"country_name\":\"$country_name\""
if [ "$country_code" = $country_last ]
then
echo "}]"
else
echo "},"
fi
done
if [ $(echo $town_code | cut -c1-9) = ${town_arr[-1]} ]
then
echo "}]}]"
else
echo "},"
fi
done
continue
fi
region_id=`echo $city | cut -c3,4`
region_code=`grep ^$region_id $regi_path | grep $region | head -1 | awk '{print $NF}'`
region_name=`grep ^$region_id $regi_path | grep $region | tail -1 | awk '{print $NF}'`
[ -z "$region_code" ] && continue
echo -e "{\"region_code\":$region_code,\n\"region_name\":\"$region_name\",\n"
echo -e "\"street\":["
street_arr=(`awk '{print $2}' $stre_path | uniq | grep ^$region`)
for street in ${street_arr[@]}
do
street_id=`echo $region | cut -c5,6`
street_code=`grep ^$street_id $stre_path | grep $street | head -1 | awk '{print $NF}'`
street_name=`grep ^$street_id $stre_path | grep $street | tail -1 | awk '{print $NF}'`
[ -z "$street_code" ] && continue
echo -e "{\"street_code\":$street_code,\n\"street_name\":\"$street_name\",\n"
echo -e "\"country\":["
country_tag=`grep ^$street $coun_path | tail -1 | awk '{print $1}'`
grep ^$street $coun_path | while read one
do
c_code=`echo $one | awk '{print $1}'`
c_id=`echo $one | awk '{print $2}'`
c_name=`echo $one | awk '{print $3}'`
[ -z "$c_code" ] && continue
echo -e "{\"country_code\":$c_code,\n\"country_id\":$c_id,\n"
echo -e "\"country_name\":\"$c_name\""
if [ "$country_tag" = $c_code ]
then
echo "}]"
else
echo "},"
fi
done
street_tag=$(echo $street_code | cut -c1-9)
if [ "$street_tag" = ${street_arr[-1]} ]
then
echo "}]"
else
echo "},"
fi
done
region_tag=$(echo $region_code | cut -c1-6)
if [ $region_tag = ${region_arr[-1]} -o $region_tag = $(echo ${region_arr[-1]} | cut -c1-6) ]
then
echo "}]"
else
echo "},"
fi
done
if [ $(echo $city_code | cut -c1-4) = ${city_arr[-1]} ]
then
echo "}]"
else
echo "},"
fi
done
if [ $code_pro -eq 65 ]
then
echo "}]"
else
echo "},"
fi
done
}
json_analysis > $json_path
rm -f $temp_file $prov_path $city_path $regi_path $stre_path $coun_path
exit 0