地址解析(仅供参考与学习)<Java>

时间: 2023-08-02 admin IT培训

地址解析(仅供参考与学习)<Java>

地址解析(仅供参考与学习)<Java>

地址解析步骤如下:

1、检查是否存在省份
2、如果存在省份,将会检查省份是否明确标注省,自治区,市(直辖市),特别行政区的关键字,如果不存在,将会自动拼接上,便于后边正则匹配
3、检查市级地区是否明确标注关键字,如果没有标注,将会自动拼接上,如果市级地址中包含州,盟,自治州等特殊情况的市级地区,将不会进行自动拼接(注:由于给出的公共的市级地区没有明确区分是市还是州或者是县,因此如果没有在地址中明确,将会统一拼接成市
4、通过正则表达式将重构后的地址进行分割

待分割地址:四川成都高新区xxx地址
分割后结果:
1、四川省
2,成都市
3,高新区
4,xxx地址

5、将地址中存在重复的部分进行去重,保留唯一的省市区地址

第一,定义公用的对象,在各个不同的步骤中会相应的使用到
    /*** 省级后缀*/private static String[] suffixs = {"省","市","自治区","特别行政区"}; /*** 中国34个省,直辖市,自治区,特别行政区*/@SuppressWarnings("serial")private static Map<Integer,String[]> maps = new HashMap<Integer,String[]>(){{/*** 中国的23个省*/put(1, new String[]{"河北","山西","辽宁","吉林","黑龙江","江苏","浙江","安徽","福建","江西","山东","河南","湖北","湖南","广东","海南","四川","贵州","云南","陕西","甘肃","青海","台湾"});/*** 中国的4个直辖市*/put(2, new String[]{"北京","天津","上海","重庆"});/*** 中国的5个自治区*/put(3, new String[]{"广西","内蒙古","西藏","宁夏","新疆"});/*** 中国的2个特别行政区*/put(4, new String[]{"香港","澳门"});/*** 中国的市*/put(5, new String[]{"石家庄","唐山","秦皇岛","邯郸","邢台","保定","张家口","承德","沧州","廊坊","衡水","太原","大同","阳泉","长治","晋城","朔州","晋中","运城","忻州","临汾","吕梁","呼和浩特","包头","乌海","赤峰","通辽","鄂尔多斯","呼伦贝尔","巴彦淖尔","乌兰察布","兴安","锡林郭勒","阿拉善","沈阳","大连","鞍山","抚顺","本溪","丹东","锦州","营口","阜新","辽阳","盘锦","铁岭","朝阳","葫芦岛","长春","吉林","四平","辽源","通化","白山","松原","白城","延边","哈尔滨","齐齐哈尔","鸡西","鹤岗","双鸭山","大庆","伊春","佳木斯","七台河","牡丹江","黑河","绥化","大兴安岭","南京","无锡","徐州","常州","苏州","南通","连云港","淮安","盐城","扬州","镇江","泰州","宿迁","杭州","宁波","温州","嘉兴","湖州","绍兴","金华","衢州","舟山","台州","丽水","合肥","芜湖","蚌埠","淮南","马鞍山","淮北","铜陵","安庆","黄山","滁州","阜阳","宿州","巢湖","六安","亳州","池州","宣城","福州","厦门","莆田","三明","泉州","漳州","南平","龙岩","宁德","南昌","景德镇","萍乡","九江","新余","鹰潭","赣州","吉安","宜春","抚州","上饶","济南","青岛","淄博","枣庄","东营","烟台","潍坊","威海","济宁","泰安","日照","莱芜","临沂","德州","聊城","滨州","菏泽","郑州","开封","洛阳","平顶山","焦作","鹤壁","新乡","安阳","濮阳","许昌","漯河","三门峡","南阳","商丘","信阳","周口","驻马店","武汉","黄石","襄樊","十堰","荆州","宜昌","荆门","鄂州","孝感","黄冈","咸宁","随州","恩施","长沙","株洲","湘潭","衡阳","邵阳","岳阳","常德","张家界","益阳","郴州","永州","怀化","娄底","湘西","广州","深圳","珠海","汕头","韶关","佛山","江门","湛江","茂名","肇庆","惠州","梅州","汕尾","河源","阳江","清远","东莞","中山","潮州","揭阳","云浮","南宁","柳州","桂林","梧州","北海","防城港","钦州","贵港","玉林","百色","贺州","河池","来宾","崇左","海口","三亚","成都","自贡","攀枝花","泸州","德阳","绵阳","广元","遂宁","内江","乐山","南充","宜宾","广安","达州","眉山","雅安","巴中","资阳","阿坝","甘孜","凉山","贵阳","六盘水","遵义","安顺","铜仁","毕节","黔西南","黔东南","黔南","昆明","曲靖","玉溪","保山","昭通","丽江","普洱","临沧","文山","红河","西双版纳","楚雄","大理","德宏","怒江","迪庆","拉萨","昌都","山南","日喀则","那曲","阿里","林芝","西安","铜川","宝鸡","咸阳","渭南","延安","汉中","榆林","安康","商洛","兰州","嘉峪关","金昌","白银","天水","武威","张掖","平凉","酒泉","庆阳","定西","陇南","临夏","甘南","西宁","海东","海北","黄南","海南","果洛","玉树","海西","银川","石嘴山","吴忠","固原","中卫","乌鲁木齐","克拉玛依","吐鲁番","哈密","和田","阿克苏","喀什","克孜勒苏","克孜勒苏柯尔克孜","巴音郭楞蒙古","昌吉","博尔塔","博尔塔拉蒙古","伊犁","伊犁哈萨克","塔城","阿勒泰","香港","澳门","台湾台北","台北","高雄","基隆","台中","台南","新竹","嘉义"});}};/*** 拼接“省”,“市”后缀* @param address* @param $suffix* @param s* @return*/private static String restructure(String address,String $suffix,String s){if(address.indexOf($suffix) == 0){//拼接“省”,“市”关键字return address;}else{if(address.indexOf(s) == 0){//拼接“省”,“市”关键字return address.replaceFirst(s, $suffix);}}return null;}
第二,检查地址中是否存在省及地区
    /*** 判断是否包含省级地区* @param address* @return -1(不包含省份 ), 1(包含23个省份中的一个),2(包含直辖市),3(包含自治区),4(包含特别行政区),5(包含市级)*/public static Integer isExistProvince(String address){Integer isExit = -1;for(Map.Entry<Integer, String[]> entry : maps.entrySet()){//过滤掉市级地区if(entry.getKey() == 5) break;//判断是否存在中国34个省级地址(23个普通省份,4个直辖市,5个自治区,2个特别行政区)for(String provin : entry.getValue()){if(address.indexOf(provin) == 0){return entry.getKey();}}}return isExit;}
第三,自动拼接“省,市(直辖市),自治区,特别行政区”关键字
  /*** 处理没有指定省市后缀的地址* 检查出来之后自动拼接* 例如:四川成都高新区   * 结果:四川省成都市高新区* @param suffix* @param address* @param array* @return*/private static String restructure(String address,int k){//省级String provinces = "";//省String[] arrays = maps.get(k);//后缀String suffix = suffixs[k-1];//把全部的"市辖区","市辖县"替换成""for(String s : cityKeyWords.get(1)){address = address.replace(s, "");}for(String s : arrays){//自治区String $suffix = "";if(k == 3){switch (s) {case "广西":$suffix = s + "壮族" + suffix;break;case "宁夏":$suffix = s + "回族" + suffix;break;case "新疆":$suffix = s + "维吾尔" + suffix;break;default:$suffix = s + suffix;break;}}else{$suffix = s + suffix;}provinces = $suffix;//拼接“省”,“市”后缀String $address = restructure(address, $suffix, s);if($address != null){address = $address;break;}}return restructure(address,provinces);}
第四,自动拼接“市”级关键字,特殊情况除外
	/*** 出现下列关键词的将不作处理*/@SuppressWarnings({"serial" })private static Map<Integer,String[]> cityKeyWords = new LinkedHashMap<Integer,String[]>(){{put(0,new String[]{"县"});put(1,new String[]{"市辖区","市辖县"});put(2,new String[]{"盟","州","地区","自治州","回族自治州","土家族苗族自治州","藏族自治州","藏族羌族自治州","蒙古族藏族自治州","壮族苗族自治州","傣族自治州","彝族自治州","朝鲜族自治州","布依族苗族自治州","苗族侗族自治州","傣族景颇族自治州","傈僳族自治州","白族自治州","哈尼族彝族自治州"});}};/*** 处理没有填写市级地区的地址,一旦检查出来,则添加上后缀* 例如:成都高新区* 结果:成都市高新区* @param address* @return*/private static String restructure(String address,String provinces){address = address.replace(provinces, "");//如果第一位是"县",替换为""if(cityKeyWords.get(0)[0].equals(address.substring(0, 1))){address = address.substring(1);}//如果省份是直辖市,自动插入"市辖区"用于区分for(String s : maps.get(2)){if(provinces.equals(s + suffixs[1])){address = cityKeyWords.get(1)[0] + address;}}//后缀String suffix = suffixs[1];big:for(String s : maps.get(5)){for(String key : cityKeyWords.get(2)){if(address.indexOf(s + key) == 0){continue big;}}//拼接“省”,“市”后缀String $address = restructure(address, s + suffix, s);if($address != null){address = $address;break;}}return provinces + address;}
第五,通过正则表达式,将地址进行分割
    /*** 解析地址* @param address* @return*/public static Map<String,String> addressResolution(String address){String regex="(?<province>[^特别行政区]+特别行政区|[^自治区]+自治区|[^省]+省|[^市]+市)(?<city>省直辖行政单位|省属虚拟市|市辖县|市辖区|县|自治州|[^地区]+地区|[^州]+州|[^盟]+盟|[^市]+市|[^区]+区|)?(?<county>[^旗]+旗|[^市]+市|[^区]+区|[^县]+县)?(?<town>[^县]+县|[^区]+区|[^乡]+乡|[^村]+村|[^镇]+镇|[^街道]+街道)?(?<village>.*)";Matcher matcher = Pattern.compile(regex).matcher(address);Map<String,String> rmap = null;while(matcher.find()){rmap = new LinkedHashMap<String,String>();rmap.put("province", matcher.group("province") == null ? "" : matcher.group("province").trim());rmap.put("city", matcher.group("city") == null ? "" : matcher.group("city").trim());rmap.put("county", matcher.group("county") == null ? "" : matcher.group("county").trim());rmap.put("town", matcher.group("town") == null ? "" : matcher.group("town").trim());rmap.put("village", matcher.group("village") == null ? "" : matcher.group("village").trim());}//重构一次地址return addressResolution(rmap);}
第六,将解析后的地址进行去重,去除掉重复的省市区
    /*** 重构一次地址,将直辖市所在区域进行特殊处理* 注:如果在地址中出现未明确省市区的将无法去重,由于详细地址中可能出现于省市同名的情况,所有对于这类情况将* 	保留,即使从肉眼能看出是重复的,也不会处理* 例如:四川省成都市高新区四川成都高新xxxx大道xxx号* @param rmap* @return*/private static Map<String,String> addressResolution(Map<String,String> rmap){if(rmap == null) return rmap;//针对直辖市,进行特殊处理String city = rmap.get("city");//将直辖市-市级全部替换为区级内容,并将区级内容全部替换为""for(String s : cityKeyWords.get(1)){if(s.equals(city)){rmap.put("city", rmap.get("county"));rmap.put("county", "");break;}}//市city = rmap.get("city");//区(县)String county = rmap.get("county");//省String province = rmap.get("province");//街道,乡村,镇rmap.put("town", rmap.get("town").replace(city, "").replace(county, "").replace(province, ""));//详细地址rmap.put("village", rmap.get("village").replace(city, "").replace(county, "").replace(province, ""));return rmap;}
第七,对外暴露的的接口
	/*** 格式化省市县/区信息* @param address* @return*/public static Map<String, String> addressFormat(String address) {if(address == null) return null;address = address.replaceAll("\\s+","");//判断是否存在省级地区int k = isExistProvince(address);if(k == -1){return null;}else{//重构地区格式(拼接省级,市级地区后缀名)address = restructure(address, k);}//格式化地址Map<String,String> addresss = addressResolution(address);if(addresss == null){return null;}//省份String province = addresss.get("province");//市String city = addresss.get("city");//区县String county = addresss.get("county");//详细地址String town = addresss.get("town");String village = addresss.get("village");//完整地址 : 省 + 市 + 区 + 详细地址StringBuilder detailAddress = new StringBuilder();detailAddress.append(province).append(" ").append(city).append(" ").append(county).append(" ").append(town).append(village);Map<String,String> rmap = new LinkedHashMap<String,String>();rmap.put("province",province);rmap.put("city",city);rmap.put("county",county);rmap.put("town",town + village);rmap.put("detail", detailAddress.toString().replaceAll("\\s+",""));rmap.put("detail_format", detailAddress.toString());return rmap;}
第八,测试
public static void main(String[] args) {String[] address = {"新疆维吾尔自治区哈密地区哈密市ces","内蒙古自治区 锡林郭勒盟 锡林浩特市w","新疆维吾尔自治区昌吉州呼图壁县","四川成都双流县华阳街道富民路一段230号","新疆维吾尔自治区昌吉回族自治州呼图壁县","新疆维吾尔自治区伊犁州伊宁市飞机场路88号中华联合保险公司","新疆维吾尔自治区省直辖行政单位石河子市","重庆市渝北区东湖南路333号中渝爱都会7栋一单元1202","湖北省省直辖行政单位潜江市大河小区二栋","海南省省属虚拟市五指山市大河小区二栋","内蒙古自治区 兴安盟 乌兰浩特市大河小区二栋","四川省乐山市峨眉山市,佛欣路19号蒙太奇硅藻泥艺术涂装","湖北省潜江市园林南路建筑设计院门面中天电脑","湖北省省直辖行政单位潜江市校区五号","北京市 市辖区 东城区 5","重庆市 市辖区 万州区万州大道一号","新疆维吾尔自治区 巴音郭楞蒙古自治州 库尔勒市开发路一号","重庆市市辖区江北区重庆市市辖区江北区北滨一路456号北岸江山西区10栋","重庆市 市辖区沙坪坝区蓝溪谷地9栋14—2","河南省济源市沁园愚公路半山春景6号楼","四川成都天府新区华府大道一段","重庆市江北区北滨一路456号北岸江山西区10栋","重庆市市辖区沙坪坝区沙坪坝区蓝溪谷地9栋14–1","重庆市南岸区南坪电建小区","重庆市 县 彭水苗族土家族自治县两江桥行政服务大厅不动产窗口","四川省乐山市峨眉山市四川乐山峨眉山市佛光南路355号-收货人:丁振波","北京丰台区莲花池东路118号","北京市辖区丰台区北京东路118号",};for(String s : address){System.out.println(JSONObject.toJSONString(addressFormat(s)));}}
第九、测试结果
{"province":"新疆维吾尔自治区","city":"哈密地区","county":"哈密市","town":"ces","detail":"新疆维吾尔自治区哈密地区哈密市ces","detail_format":"新疆维吾尔自治区 哈密地区 哈密市 ces"}
{"province":"内蒙古自治区","city":"锡林郭勒盟","county":"锡林浩特市","town":"w","detail":"内蒙古自治区锡林郭勒盟锡林浩特市w","detail_format":"内蒙古自治区 锡林郭勒盟 锡林浩特市 w"}
{"province":"新疆维吾尔自治区","city":"昌吉州","county":"呼图壁县","town":"","detail":"新疆维吾尔自治区昌吉州呼图壁县","detail_format":"新疆维吾尔自治区 昌吉州 呼图壁县 "}
{"province":"四川省","city":"成都市","county":"双流县","town":"华阳街道富民路一段230号","detail":"四川省成都市双流县华阳街道富民路一段230号","detail_format":"四川省 成都市 双流县 华阳街道富民路一段230号"}
{"province":"新疆维吾尔自治区","city":"昌吉回族自治州","county":"呼图壁县","town":"","detail":"新疆维吾尔自治区昌吉回族自治州呼图壁县","detail_format":"新疆维吾尔自治区 昌吉回族自治州 呼图壁县 "}
{"province":"新疆维吾尔自治区","city":"伊犁州","county":"伊宁市","town":"飞机场路88号中华联合保险公司","detail":"新疆维吾尔自治区伊犁州伊宁市飞机场路88号中华联合保险公司","detail_format":"新疆维吾尔自治区 伊犁州 伊宁市 飞机场路88号中华联合保险公司"}
{"province":"新疆维吾尔自治区","city":"省直辖行政单位","county":"石河子市","town":"","detail":"新疆维吾尔自治区省直辖行政单位石河子市","detail_format":"新疆维吾尔自治区 省直辖行政单位 石河子市 "}
{"province":"重庆市","city":"渝北区","county":"","town":"东湖南路333号中渝爱都会7栋一单元1202","detail":"重庆市渝北区东湖南路333号中渝爱都会7栋一单元1202","detail_format":"重庆市 渝北区  东湖南路333号中渝爱都会7栋一单元1202"}
{"province":"湖北省","city":"省直辖行政单位","county":"潜江市","town":"大河小区二栋","detail":"湖北省省直辖行政单位潜江市大河小区二栋","detail_format":"湖北省 省直辖行政单位 潜江市 大河小区二栋"}
{"province":"海南省","city":"省属虚拟市","county":"五指山市","town":"大河小区二栋","detail":"海南省省属虚拟市五指山市大河小区二栋","detail_format":"海南省 省属虚拟市 五指山市 大河小区二栋"}
{"province":"内蒙古自治区","city":"兴安盟","county":"乌兰浩特市","town":"大河小区二栋","detail":"内蒙古自治区兴安盟乌兰浩特市大河小区二栋","detail_format":"内蒙古自治区 兴安盟 乌兰浩特市 大河小区二栋"}
{"province":"四川省","city":"乐山市","county":"峨眉山市","town":",佛欣路19号蒙太奇硅藻泥艺术涂装","detail":"四川省乐山市峨眉山市,佛欣路19号蒙太奇硅藻泥艺术涂装","detail_format":"四川省 乐山市 峨眉山市 ,佛欣路19号蒙太奇硅藻泥艺术涂装"}
{"province":"湖北省","city":"潜江市","county":"","town":"园林南路建筑设计院门面中天电脑","detail":"湖北省潜江市园林南路建筑设计院门面中天电脑","detail_format":"湖北省 潜江市  园林南路建筑设计院门面中天电脑"}
{"province":"湖北省","city":"省直辖行政单位","county":"潜江市","town":"校区五号","detail":"湖北省省直辖行政单位潜江市校区五号","detail_format":"湖北省 省直辖行政单位 潜江市 校区五号"}
{"province":"北京市","city":"东城区","county":"","town":"5","detail":"北京市东城区5","detail_format":"北京市 东城区  5"}
{"province":"重庆市","city":"万州区","county":"","town":"万州大道一号","detail":"重庆市万州区万州大道一号","detail_format":"重庆市 万州区  万州大道一号"}
{"province":"新疆维吾尔自治区","city":"巴音郭楞蒙古自治州","county":"库尔勒市","town":"开发路一号","detail":"新疆维吾尔自治区巴音郭楞蒙古自治州库尔勒市开发路一号","detail_format":"新疆维吾尔自治区 巴音郭楞蒙古自治州 库尔勒市 开发路一号"}
{"province":"重庆市","city":"江北区","county":"","town":"北滨一路456号北岸江山西区10栋","detail":"重庆市江北区北滨一路456号北岸江山西区10栋","detail_format":"重庆市 江北区  北滨一路456号北岸江山西区10栋"}
{"province":"重庆市","city":"沙坪坝区","county":"","town":"蓝溪谷地9栋14—2","detail":"重庆市沙坪坝区蓝溪谷地9栋14—2","detail_format":"重庆市 沙坪坝区  蓝溪谷地9栋14—2"}
{"province":"河南省","city":"济源市","county":"","town":"沁园愚公路半山春景6号楼","detail":"河南省济源市沁园愚公路半山春景6号楼","detail_format":"河南省 济源市  沁园愚公路半山春景6号楼"}
{"province":"四川省","city":"成都市","county":"天府新区","town":"华府大道一段","detail":"四川省成都市天府新区华府大道一段","detail_format":"四川省 成都市 天府新区 华府大道一段"}
{"province":"重庆市","city":"江北区","county":"","town":"北滨一路456号北岸江山西区10栋","detail":"重庆市江北区北滨一路456号北岸江山西区10栋","detail_format":"重庆市 江北区  北滨一路456号北岸江山西区10栋"}
{"province":"重庆市","city":"沙坪坝区","county":"","town":"蓝溪谷地9栋14–1","detail":"重庆市沙坪坝区蓝溪谷地9栋14–1","detail_format":"重庆市 沙坪坝区  蓝溪谷地9栋14–1"}
{"province":"重庆市","city":"南岸区","county":"","town":"南坪电建小区","detail":"重庆市南岸区南坪电建小区","detail_format":"重庆市 南岸区  南坪电建小区"}
{"province":"重庆市","city":"彭水苗族土家族自治县","county":"","town":"两江桥行政服务大厅不动产窗口","detail":"重庆市彭水苗族土家族自治县两江桥行政服务大厅不动产窗口","detail_format":"重庆市 彭水苗族土家族自治县  两江桥行政服务大厅不动产窗口"}
{"province":"四川省","city":"乐山市","county":"峨眉山市","town":"四川乐山佛光南路355号-收货人:丁振波","detail":"四川省乐山市峨眉山市四川乐山佛光南路355号-收货人:丁振波","detail_format":"四川省 乐山市 峨眉山市 四川乐山佛光南路355号-收货人:丁振波"}
{"province":"北京市","city":"丰台区","county":"","town":"莲花池东路118号","detail":"北京市丰台区莲花池东路118号","detail_format":"北京市 丰台区  莲花池东路118号"}
{"province":"北京市","city":"丰台区","county":"","town":"北京东路118号","detail":"北京市丰台区北京东路118号","detail_format":"北京市 丰台区  北京东路118号"}

注:本文介绍仅供参考,并非任意文本都能解析成功,由于不同地址的差异性会出现解析错误,企业项目请慎重使用,特殊地址请特殊处理