diff --git a/functions.php b/functions.php index 3d94716..4e39023 100644 --- a/functions.php +++ b/functions.php @@ -343,85 +343,71 @@ function getAllCommenters() { } /** - * 获取IP归属地 (使用 ip.asbid.cn API) + * 获取IP归属地 */ +// 加载 XdbSearcher 类 +require_once __DIR__ . '/ip2region/XdbSearcher.php'; + +// 单例方式加载 ip2region.xdb 到内存 +function getIp2regionSearcher() { + static $searcher = null; + if ($searcher === null) { + $dbPath = __DIR__ . '/ip2region/ip2region.xdb'; + $cBuff = XdbSearcher::loadContentFromFile($dbPath); + if ($cBuff === null) { + error_log("无法加载 ip2region.xdb"); + return null; + } + try { + $searcher = XdbSearcher::newWithBuffer($cBuff); + } catch (Exception $e) { + error_log("创建 ip2region searcher 失败: " . $e->getMessage()); + return null; + } + } + return $searcher; +} + +/** + * 格式化 IP 归属地 + * + * @param string $region 归属地字符串 + * @return string 格式化后的归属地 + */ +function format_ip_region($region) { + // 分割字符串 + $parts = explode('|', $region); + + // 去除为 0 或 空字符串的部分 + $parts = array_filter($parts, function($item) { + return $item !== '0' && $item !== ''; + }); + + // 如果第一个元素是"中国",则移除 + if (isset($parts[0]) && $parts[0] === '中国') { + array_shift($parts); + } + + // 重新拼接 + return implode('', $parts); +} + +// 通过 IP 获取归属地 function get_ip_region($ip) { - // 检查是否是内网IP + // 检查是否是 IPv6 地址 + if (filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_IPV6)) { + return 'IPv6'; + } + // 检查是否是内网IP if (filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE) === false) { return '内网IP'; } - // 缓存目录路径 - $cacheDir = __TYPECHO_ROOT_DIR__ . '/usr/cache/ip_cache/'; - if (!file_exists($cacheDir)) { - mkdir($cacheDir, 0755, true); - } - - // 缓存文件名 (使用ip作为文件名) - $cacheFile = $cacheDir . md5($ip) . '.json'; - - // 检查是否有缓存 - if (file_exists($cacheFile)) { - $cacheData = json_decode(file_get_contents($cacheFile), true); - if ($cacheData && isset($cacheData['expire']) && $cacheData['expire'] > time()) { - return format_api_ip_region($cacheData['data']); - } - } - - // 调用API获取IP信息 - $apiUrl = 'https://ip.asbid.cn/' . $ip; - $response = @file_get_contents($apiUrl); - - if ($response === false) { - // API调用失败,返回未知 - return '未知'; - } - - $data = json_decode($response, true); - if (!$data || !isset($data['country'])) { - return '未知'; - } - - // 缓存数据 (设置1个月有效期) - $cacheData = [ - 'data' => $data, - 'expire' => time() + 30 * 24 * 3600 - ]; - file_put_contents($cacheFile, json_encode($cacheData)); - - return format_api_ip_region($data); -} - -/** - * 格式化API返回的IP归属地信息 - * - * @param array $data API返回的数据 - * @return string 格式化后的归属地 - */ -function format_api_ip_region($data) { - $regionParts = []; - - if (isset($data['country']) && $data['country']) { - $regionParts[] = $data['country']; - } - if (isset($data['province']) && $data['province']) { - $regionParts[] = $data['province']; - } - if (isset($data['city']) && $data['city']) { - // 处理"中国-江苏-南京"这样的格式 - $cityParts = explode('–', $data['city']); - foreach ($cityParts as $part) { - if (trim($part) && !in_array(trim($part), $regionParts)) { - $regionParts[] = trim($part); - } - } - } - - if (isset($data['isp']) && $data['isp']) { - $regionParts[] = $data['isp']; - } - - return implode(' ', $regionParts); + $searcher = getIp2regionSearcher(); + if (!$searcher) return '未知'; + $region = $searcher->search($ip); + if ($region === null) return '未知'; + return format_ip_region($region); } /** diff --git a/index.php b/index.php index e366d53..9d0230f 100644 --- a/index.php +++ b/index.php @@ -4,7 +4,7 @@ * * @package Typecho Pouck Theme * @author 老孙博客 - * @version 1.1.1 + * @version 1.1.2 * @link http://www.imsun.org */ diff --git a/ip2region/.gitignore b/ip2region/.gitignore new file mode 100644 index 0000000..21fcb95 --- /dev/null +++ b/ip2region/.gitignore @@ -0,0 +1,4 @@ +# Created by .ignore support plugin (hsz.mobi) +/.idea +/vendor/ +/composer.lock diff --git a/ip2region/Ip2Region.php b/ip2region/Ip2Region.php new file mode 100644 index 0000000..20aea65 --- /dev/null +++ b/ip2region/Ip2Region.php @@ -0,0 +1,83 @@ + + * @datetime 2022/07/18 + */ +class Ip2Region +{ + /** + * 查询实例对象 + * @var XdbSearcher + */ + private $searcher; + + /** + * 初始化构造方法 + * @throws Exception + */ + public function __construct() + { + class_exists('XdbSearcher') or include __DIR__ . '/XdbSearcher.php'; + $this->searcher = XdbSearcher::newWithFileOnly(__DIR__ . '/ip2region.xdb'); + } + + /** + * 兼容原 memorySearch 查询 + * @param string $ip + * @return array + * @throws Exception + */ + public function memorySearch($ip) + { + return ['city_id' => 0, 'region' => $this->searcher->search($ip)]; + } + + /** + * 兼容原 binarySearch 查询 + * @param string $ip + * @return array + * @throws Exception + */ + public function binarySearch($ip) + { + return $this->memorySearch($ip); + } + + /** + * 兼容原 btreeSearch 查询 + * @param string $ip + * @return array + * @throws Exception + */ + public function btreeSearch($ip) + { + return $this->memorySearch($ip); + } + + /** + * 直接查询并返回名称 + * @param string $ip + * @return string + * @throws \Exception + */ + public function simple($ip) + { + $geo = $this->memorySearch($ip); + $arr = explode('|', str_replace(['0|'], '|', isset($geo['region']) ? $geo['region'] : '')); + if (($last = array_pop($arr)) === '内网IP') $last = ''; + return join('', $arr) . (empty($last) ? '' : "【{$last}】"); + } + + /** + * destruct method + * resource destroy + */ + public function __destruct() + { + $this->searcher->close(); + unset($this->searcher); + } +} \ No newline at end of file diff --git a/ip2region/LICENSE.md b/ip2region/LICENSE.md new file mode 100644 index 0000000..3ecb37d --- /dev/null +++ b/ip2region/LICENSE.md @@ -0,0 +1,225 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +========================================================================== +The following license applies to the ip2region library +-------------------------------------------------------------------------- +Copyright (c) 2015 Lionsoul + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/ip2region/README.md b/ip2region/README.md new file mode 100644 index 0000000..9baf71b --- /dev/null +++ b/ip2region/README.md @@ -0,0 +1,229 @@ +[![Latest Stable Version](https://poser.pugx.org/zoujingli/ip2region/v/stable)](https://packagist.org/packages/zoujingli/ip2region) +[![Total Downloads](https://poser.pugx.org/zoujingli/ip2region/downloads)](https://packagist.org/packages/zoujingli/ip2region) +[![Monthly Downloads](https://poser.pugx.org/zoujingli/ip2region/d/monthly)](https://packagist.org/packages/zoujingli/ip2region) +[![Daily Downloads](https://poser.pugx.org/zoujingli/ip2region/d/daily)](https://packagist.org/packages/zoujingli/ip2region) +[![PHP Version Require](http://poser.pugx.org/zoujingli/ip2region/require/php)](https://packagist.org/packages/ip2region) +[![License](https://poser.pugx.org/zoujingli/ip2region/license)](https://packagist.org/packages/zoujingli/ip2region) + +本库基于 [ip2region](https://github.com/lionsoul2014/ip2region) 简单整合,方便使用 `Composer` 管理。 + +# Ip2region 是什么 + +ip2region v2.0 - 是一个离线IP地址定位库和IP定位数据管理框架,10微秒级别的查询效率,提供了众多主流编程语言的 `xdb` 数据生成和查询客户端实现。 + +# Ip2region 特性 + +### 1、标准化的数据格式 + +每个 ip 数据段的 region 信息都固定了格式:`国家|区域|省份|城市|ISP`,只有中国的数据绝大部分精确到了城市,其他国家部分数据只能定位到国家,后前的选项全部是0。 + +### 2、数据去重和压缩 + +`xdb` 格式生成程序会自动去重和压缩部分数据,默认的全部 IP 数据,生成的 ip2region.xdb 数据库是 11MiB,随着数据的详细度增加数据库的大小也慢慢增大。 + +### 3、极速查询响应 + +即使是完全基于 `xdb` 文件的查询,单次查询响应时间在十微秒级别,可通过如下两种方式开启内存加速查询: + +1. `vIndex` 索引缓存 :使用固定的 `512KiB` 的内存空间缓存 vector index 数据,减少一次 IO 磁盘操作,保持平均查询效率稳定在10-20微秒之间。 +2. `xdb` 整个文件缓存:将整个 `xdb` 文件全部加载到内存,内存占用等同于 `xdb` 文件大小,无磁盘 IO 操作,保持微秒级别的查询效率。 + +### 4、IP 数据管理框架 + +v2.0 格式的 `xdb` 支持亿级别的 IP 数据段行数,region 信息也可以完全自定义,例如:你可以在 region 中追加特定业务需求的数据,例如:GPS信息/国际统一地域信息编码/邮编等。也就是你完全可以使用 ip2region 来管理你自己的 IP 定位数据。 + +# `xdb` 数据查询 + +API 介绍,使用文档和测试程序请参考对应 `searcher` 查询客户端下的 ReadMe 介绍,全部查询 binding 实现情况如下: + +| Ok? | 状态 | 编程语言 | 描述 | 贡献者 | +|:-------------------|:----|:----------------------------------------------------------------------------------|:---------------------|:------------------------------------------| +| :white_check_mark: | 已完成 | [golang](https://github.com/lionsoul2014/ip2region/blob/master/binding/golang) | golang xdb 查询客户端实现 | [Lion](https://github.com/lionsoul2014) | +| :white_check_mark: | 已完成 | [php](https://github.com/lionsoul2014/ip2region/blob/master/binding/php) | php xdb 查询客户端实现 | [Lion](https://github.com/lionsoul2014) | +| :white_check_mark: | 已完成 | [java](https://github.com/lionsoul2014/ip2region/blob/master/binding/java) | java xdb 查询客户端实现 | [Lion](https://github.com/lionsoul2014) | +| :white_check_mark: | 已完成 | [lua](https://github.com/lionsoul2014/ip2region/blob/master/binding/lua) | 纯 lua xdb 查询客户端实现 | [Lion](https://github.com/lionsoul2014) | +| :white_check_mark: | 已完成 | [c](https://github.com/lionsoul2014/ip2region/blob/master/binding/c) | ANSC c xdb 查询客户端实现 | [Lion](https://github.com/lionsoul2014) | +| :white_check_mark: | 已完成 | [lua_c](https://github.com/lionsoul2014/ip2region/blob/master/binding/lua_c) | lua c 扩展 xdb 查询客户端实现 | [Lion](https://github.com/lionsoul2014) | +|     | 待开始 | [rust](https://github.com/lionsoul2014/ip2region/blob/master/binding/rust) | rust xdb 查询客户端实现 | [Lion](https://github.com/lionsoul2014) | +| :white_check_mark: | 已完成 | [python](https://github.com/lionsoul2014/ip2region/blob/master/binding/python) | python xdb 查询客户端实现 | [厉害的花花](https://github.com/luckydog6132) | +| :white_check_mark: | 已完成 | [nodejs](https://github.com/lionsoul2014/ip2region/blob/master/binding/nodejs) | nodejs xdb 查询客户端实现 | [Wu Jian Ping](https://github.com/wujjpp) | +| :white_check_mark: | 已完成 | [csharp](https://github.com/lionsoul2014/ip2region/blob/master/binding/csharp) | csharp xdb 查询客户端实现 | [Alen Lee](https://github.com/malus2077) | +|     | 待开始 | [php_ext](https://github.com/lionsoul2014/ip2region/blob/master/binding/php7_ext) | php c 扩展 xdb 查询客户端实现 | 待确定 | +|     | 待开始 | [nginx](https://github.com/lionsoul2014/ip2region/blob/master/binding/nginx) | nginx 扩展 xdb 查询客户端实现 | 待确定 | + +# `xdb` 数据生成 + +API 介绍,使用文档和测试程序请参考对应 `maker` 生成程序下的 ReadMe 介绍,全部生成 maker 实现情况如下: + +| Ok? | 状态 | 编程语言 | 描述 | 贡献者 | +|:-------------------|:----|:-----------------------------------------------------------------------------|:------------------|:-----------------------------------------| +| :white_check_mark: | 已完成 | [golang](https://github.com/lionsoul2014/ip2region/blob/master/maker/golang) | golang xdb 生成程序实现 | [Lion](https://github.com/lionsoul2014) | +| :white_check_mark: | 已完成 | [java](https://github.com/lionsoul2014/ip2region/blob/master/maker/java) | java xdb 生成程序实现 | [Lion](https://github.com/lionsoul2014) | +|     | 待开始 | [c](https://github.com/lionsoul2014/ip2region/blob/master/maker/c) | ANSC c xdb 生成程序实现 | [Lion](https://github.com/lionsoul2014) | +| :white_check_mark: | 已完成 | [python](https://github.com/lionsoul2014/ip2region/blob/master/maker/python) | python xdb 生成程序实现 | [leolin49](https://github.com/leolin49) | +| :white_check_mark: | 已完成 | [csharp](https://github.com/lionsoul2014/ip2region/blob/master/maker/csharp) | csharp xdb 生成程序实现 | [Alan Lee](https://github.com/malus2077) | + +# 并发查询必读 + +全部查询客户端的 search 接口都 不是 并发安全的实现,不同进程/线程/协程需要通过创建不同的查询对象来安全使用,并发量很大的情况下,基于文件查询的方式可能会导致打开文件数过多的错误,请修改内核的最大允许打开文件数(fs.file-max=一个更高的值),或者将整个xdb加载到内存进行安全并发使用。 + +# 相关备注 + +### 1、使用声明 + +ip2region 重点在于研究 IP 定位数据的存储设计和各种语言的查询实现,并没有原始 IP 数据的支撑,本项目不保证及时的数据更新,没有也不会有商用版本,你可以使用自定义的数据导入 ip2region 进行管理。 + +### 2、技术交流 + +ip2region 微信交流群,请先加微信:lionsoul2014 (请备注 ip2region) + +### 3、数据更新 + +基于检测算法的数据更新方式视频分享:[数据更新实现视频分享 - part1](https://www.bilibili.com/video/BV1934y1E7Q5/),[数据更新实现视频分享 - part2](https://www.bilibili.com/video/BV1pF411j7Aw/) + +### 4、数据结构 + +1. xdb 数据结构分析:[“ip2region xdb 数据结构和查询过程详解“](https://mp.weixin.qq.com/s?__biz=MzU4MDc2MzQ5OA==&mid=2247483696&idx=1&sn=6e9e138e86cf18245656c54ff4be3129&chksm=fd50ab35ca2722239ae7c0bb08efa44f499110c810227cbad3a16f36ebc1c2afc58eb464a57c#rd) +2. xdb 查询过程分析:[“ip2region xdb 数据结构和查询过程详解”](https://mp.weixin.qq.com/s?__biz=MzU4MDc2MzQ5OA==&mid=2247483696&idx=1&sn=6e9e138e86cf18245656c54ff4be3129&chksm=fd50ab35ca2722239ae7c0bb08efa44f499110c810227cbad3a16f36ebc1c2afc58eb464a57c#rd) +3. xdb 生成过程分析:[“ip2region xdb 二进制数据生成过程详解”](https://mp.weixin.qq.com/s?__biz=MzU4MDc2MzQ5OA==&mid=2247483718&idx=1&sn=92e552f3bba44a97ca661da244f35574&chksm=fd50ab43ca2722559733ed4e1082f239f381aaa881f9dbeb479174936145522696d9d200531e#rd) + +# 关于 ip2region v2.0 的 PHP 用法 + +### 完全基于文件的查询 + +```php +$dbFile = "ip2region.xdb file path"; +try { + $searcher = XdbSearcher::newWithFileOnly($dbFile); +} catch (Exception $e) { + printf("failed to create searcher with '%s': %s\n", $dbFile, $e); + return; +} + +$ip = '1.2.3.4'; +$sTime = XdbSearcher::now(); +$region = $searcher->search($ip); +if ($region === null) { + // something is wrong + printf("failed search(%s)\n", $ip); + return; +} + +printf("{region: %s, took: %.5f ms}\n", $region, XdbSearcher::now() - $sTime); + +// 备注:并发使用,每个线程或者协程需要创建一个独立的 searcher 对象。 +``` + +### 缓存 `VectorIndex` 索引 + +如果你的 php 母环境支持,可以预先加载 vectorIndex 缓存,然后做成全局变量,每次创建 Searcher 的时候使用全局的 vectorIndex,可以减少一次固定的 IO 操作从而加速查询,减少 io 压力。 + +```php +// 1、从 dbPath 加载 VectorIndex 缓存,把下述的 vIndex 变量缓存到内存里面。 +$vIndex = XdbSearcher::loadVectorIndexFromFile($dbPath); +if ($vIndex === null) { + printf("failed to load vector index from '%s'\n", $dbPath); + return; +} + +// 2、使用全局的 vIndex 创建带 VectorIndex 缓存的查询对象。 +try { + $searcher = XdbSearcher::newWithVectorIndex($dbFile, $vIndex); +} catch (Exception $e) { + printf("failed to create vectorIndex cached searcher with '%s': %s\n", $dbFile, $e); + return; +} + +// 3、查询 +$sTime = XdbSearcher::now(); +$region = $searcher->search('1.2.3.4'); +if ($region === null) { + printf("failed search(1.2.3.4)\n"); + return; +} + +printf("{region: %s, took: %.5f ms}\n", $region, XdbSearcher::now() - $sTime); + +// 备注:并发使用,每个线程或者协程需要创建一个独立的 searcher 对象,但是都共享统一的只读 vectorIndex。 +``` + +### 缓存整个 `xdb` 数据 + +如果你的 PHP 母环境支持,可以预先加载整个 `xdb` 的数据到内存,这样可以实现完全基于内存的查询,类似之前的 memory search 查询。 + +```php +// 1、从 dbPath 加载整个 xdb 到内存。 +$cBuff = XdbSearcher::loadContentFromFile($dbPath); +if ($cBuff === null) { + printf("failed to load content buffer from '%s'\n", $dbPath); + return; +} + +// 2、使用全局的 cBuff 创建带完全基于内存的查询对象。 +try { + $searcher = XdbSearcher::newWithBuffer($cBuff); +} catch (Exception $e) { + printf("failed to create buffer cached searcher: %s\n", $dbFile, $e); + return; +} + +// 3、查询 +$sTime = XdbSearcher::now(); +$region = $searcher->search('1.2.3.4'); +if ($region === null) { + printf("failed search(1.2.3.4)\n"); + return; +} + +printf("{region: %s, took: %.5f ms}\n", $region, XdbSearcher::now() - $sTime); + +// 备注:并发使用,用整个 xdb 缓存创建的 searcher 对象可以安全用于并发。 +``` + +# 查询测试 + +通过 `search_test.php` 脚本来进行查询测试: + +```bash +➜ php git:(v2.0_xdb) ✗ php ./search_test.php +php ./search_test.php [command options] +options: + --db string ip2region binary xdb file path + --cache-policy string cache policy: file/vectorIndex/content +``` + +例如:使用默认的 data/ip2region.xdb 进行查询测试: + +```bash +➜ php git:(v2.0_xdb) ✗ php ./search_test.php --db=../../data/ip2region.xdb --cache-policy=vectorIndex +ip2region xdb searcher test program, cachePolicy: vectorIndex +type 'quit' to exit +ip2region>> 1.2.3.4 +{region: 美国|0|华盛顿|0|谷歌, ioCount: 7, took: 0.04492 ms} +ip2region>> +``` + +输入 ip 即可进行查询测试。也可以分别设置 `cache-policy` 为 file/vectorIndex/content 来测试三种不同缓存实现的效率。 + +# bench 测试 + +通过 `bench_test.php` 脚本来进行自动 bench 测试,一方面确保 `xdb` 文件没有错误,另一方面通过大量的查询测试平均查询性能: + +```bash +➜ php git:(v2.0_xdb) ✗ php ./bench_test.php +php ./bench_test.php [command options] +options: + --db string ip2region binary xdb file path + --src string source ip text file path + --cache-policy string cache policy: file/vectorIndex/content +``` + +例如:通过默认的 data/ip2region.xdb 和 data/ip.merge.txt 来进行 bench 测试: + +```bash +➜ php git:(v2.0_xdb) ✗ php ./bench_test.php --db=../../data/ip2region.xdb --src=../../data/ip.merge.txt --cache-policy=vectorIndex +Bench finished, {cachePolicy: vectorIndex, total: 3417955, took: 15s, cost: 0.005 ms/op} +``` + +可以通过设置 `cache-policy` 参数来分别测试 file/vectorIndex/content 三种不同的缓存实现的的性能。 +@Note:请注意 bench 使用的 src 文件需要是生成对应的 xdb 文件的相同的源文件。 \ No newline at end of file diff --git a/ip2region/XdbSearcher.php b/ip2region/XdbSearcher.php new file mode 100644 index 0000000..680f4cb --- /dev/null +++ b/ip2region/XdbSearcher.php @@ -0,0 +1,368 @@ + +// @Date 2022/06/21 + +class XdbSearcher +{ + const HeaderInfoLength = 256; + const VectorIndexRows = 256; + const VectorIndexCols = 256; + const VectorIndexSize = 8; + const SegmentIndexSize = 14; + + // xdb file handle + private $handle = null; + + // header info + private $header = null; + private $ioCount = 0; + + // vector index in binary string. + // string decode will be faster than the map based Array. + private $vectorIndex = null; + + // xdb content buffer + private $contentBuff = null; + + // --- + // static function to create searcher + + /** + * @throws Exception + */ + public static function newWithFileOnly($dbFile) + { + return new XdbSearcher($dbFile, null, null); + } + + /** + * @throws Exception + */ + public static function newWithVectorIndex($dbFile, $vIndex) + { + return new XdbSearcher($dbFile, $vIndex); + } + + /** + * @throws Exception + */ + public static function newWithBuffer($cBuff) + { + return new XdbSearcher(null, null, $cBuff); + } + + // --- End of static creator + + /** + * initialize the xdb searcher + * @throws Exception + */ + function __construct($dbFile = null, $vectorIndex = null, $cBuff = null) + { + // check the content buffer first + if ($cBuff != null) { + $this->vectorIndex = null; + $this->contentBuff = $cBuff; + } else { + // 加载默认数据文件 by Anyon + if (is_null($dbFile)) { + $dbFile = __DIR__ . DIRECTORY_SEPARATOR . 'ip2region.xdb'; + } + // open the xdb binary file + $this->handle = fopen($dbFile, "r"); + if ($this->handle === false) { + throw new Exception("failed to open xdb file '%s'", $dbFile); + } + + $this->vectorIndex = $vectorIndex; + } + } + + function close() + { + if ($this->handle != null) { + fclose($this->handle); + } + } + + function getIOCount() + { + return $this->ioCount; + } + + /** + * find the region info for the specified ip address + * @throws Exception + */ + function search($ip) + { + // check and convert the sting ip to a 4-bytes long + if (is_string($ip)) { + $t = self::ip2long($ip); + if ($t === null) { + throw new Exception("invalid ip address `$ip`"); + } + $ip = $t; + } + + // reset the global counter + $this->ioCount = 0; + + // locate the segment index block based on the vector index + $il0 = ($ip >> 24) & 0xFF; + $il1 = ($ip >> 16) & 0xFF; + $idx = $il0 * self::VectorIndexCols * self::VectorIndexSize + $il1 * self::VectorIndexSize; + if ($this->vectorIndex != null) { + $sPtr = self::getLong($this->vectorIndex, $idx); + $ePtr = self::getLong($this->vectorIndex, $idx + 4); + } elseif ($this->contentBuff != null) { + $sPtr = self::getLong($this->contentBuff, self::HeaderInfoLength + $idx); + $ePtr = self::getLong($this->contentBuff, self::HeaderInfoLength + $idx + 4); + } else { + // read the vector index block + $buff = $this->read(self::HeaderInfoLength + $idx, 8); + if ($buff === null) { + throw new Exception("failed to read vector index at {$idx}"); + } + + $sPtr = self::getLong($buff, 0); + $ePtr = self::getLong($buff, 4); + } + + // printf("sPtr: %d, ePtr: %d\n", $sPtr, $ePtr); + + // binary search the segment index to get the region info + $dataLen = 0; + $dataPtr = null; + $l = 0; + $h = ($ePtr - $sPtr) / self::SegmentIndexSize; + while ($l <= $h) { + $m = ($l + $h) >> 1; + $p = $sPtr + $m * self::SegmentIndexSize; + + // read the segment index + $buff = $this->read($p, self::SegmentIndexSize); + if ($buff == null) { + throw new Exception("failed to read segment index at {$p}"); + } + + $sip = self::getLong($buff, 0); + if ($ip < $sip) { + $h = $m - 1; + } else { + $eip = self::getLong($buff, 4); + if ($ip > $eip) { + $l = $m + 1; + } else { + $dataLen = self::getShort($buff, 8); + $dataPtr = self::getLong($buff, 10); + break; + } + } + } + + // match nothing interception. + // @TODO: could this even be a case ? + // printf("dataLen: %d, dataPtr: %d\n", $dataLen, $dataPtr); + if ($dataPtr == null) { + return null; + } + + // load and return the region data + $buff = $this->read($dataPtr, $dataLen); + if ($buff == null) { + return null; + } + + return $buff; + } + + // read specified bytes from the specified index + private function read($offset, $len) + { + // check the in-memory buffer first + if ($this->contentBuff != null) { + return substr($this->contentBuff, $offset, $len); + } + + // read from the file + $r = fseek($this->handle, $offset); + if ($r == -1) { + return null; + } + + $this->ioCount++; + $buff = fread($this->handle, $len); + if ($buff === false) { + return null; + } + + if (strlen($buff) != $len) { + return null; + } + + return $buff; + } + + // --- static util functions ---- + + // convert a string ip to long + public static function ip2long($ip) + { + $ip = ip2long($ip); + if ($ip === false) { + return null; + } + + // convert signed int to unsigned int if on 32 bit operating system + if ($ip < 0 && PHP_INT_SIZE == 4) { + $ip = sprintf("%u", $ip); + } + + return $ip; + } + + // read a 4bytes long from a byte buffer + public static function getLong($b, $idx) + { + $val = (ord($b[$idx])) | (ord($b[$idx + 1]) << 8) + | (ord($b[$idx + 2]) << 16) | (ord($b[$idx + 3]) << 24); + + // convert signed int to unsigned int if on 32 bit operating system + if ($val < 0 && PHP_INT_SIZE == 4) { + $val = sprintf("%u", $val); + } + + return $val; + } + + // read a 2bytes short from a byte buffer + public static function getShort($b, $idx) + { + return ((ord($b[$idx])) | (ord($b[$idx + 1]) << 8)); + } + + // load header info from a specified file handle + public static function loadHeader($handle) + { + if (fseek($handle, 0) == -1) { + return null; + } + + $buff = fread($handle, self::HeaderInfoLength); + if ($buff === false) { + return null; + } + + // read bytes length checking + if (strlen($buff) != self::HeaderInfoLength) { + return null; + } + + // return the decoded header info + return [ + 'version' => self::getShort($buff, 0), + 'indexPolicy' => self::getShort($buff, 2), + 'createdAt' => self::getLong($buff, 4), + 'startIndexPtr' => self::getLong($buff, 8), + 'endIndexPtr' => self::getLong($buff, 12) + ]; + } + + // load header info from the specified xdb file path + public static function loadHeaderFromFile($dbFile) + { + $handle = fopen($dbFile, 'r'); + if ($handle === false) { + return null; + } + + $header = self::loadHeader($handle); + fclose($handle); + return $header; + } + + // load vector index from a file handle + public static function loadVectorIndex($handle) + { + if (fseek($handle, self::HeaderInfoLength) == -1) { + return null; + } + + $rLen = self::VectorIndexRows * self::VectorIndexCols * self::SegmentIndexSize; + $buff = fread($handle, $rLen); + if ($buff === false) { + return null; + } + + if (strlen($buff) != $rLen) { + return null; + } + + return $buff; + } + + // load vector index from a specified xdb file path + public static function loadVectorIndexFromFile($dbFile) + { + $handle = fopen($dbFile, 'r'); + if ($handle === false) { + return null; + } + + $vIndex = self::loadVectorIndex($handle); + fclose($handle); + return $vIndex; + } + + // load the xdb content from a file handle + public static function loadContent($handle) + { + if (fseek($handle, 0, SEEK_END) == -1) { + return null; + } + + $size = ftell($handle); + if ($size === false) { + return null; + } + + // seek to the head for reading + if (fseek($handle, 0) == -1) { + return null; + } + + $buff = fread($handle, $size); + if ($buff === false) { + return null; + } + + // read length checking + if (strlen($buff) != $size) { + return null; + } + + return $buff; + } + + // load the xdb content from a file path + public static function loadContentFromFile($dbFile) + { + $str = file_get_contents($dbFile, false); + if ($str === false) { + return null; + } else { + return $str; + } + } + + public static function now() + { + return (microtime(true) * 1000); + } + +} \ No newline at end of file diff --git a/ip2region/_test.php b/ip2region/_test.php new file mode 100644 index 0000000..f347969 --- /dev/null +++ b/ip2region/_test.php @@ -0,0 +1,52 @@ + 1713, +// 'region' => '中国|0|广东省|广州市|电信', +// ) + +for ($i = 0; $i < 10; $i++) { + test(); +} + +function getIp() +{ + $ip_long = array( + array('607649792', '608174079'), // 36.56.0.0-36.63.255.255 + array('1038614528', '1039007743'), // 61.232.0.0-61.237.255.255 + array('1783627776', '1784676351'), // 106.80.0.0-106.95.255.255 + array('2035023872', '2035154943'), // 121.76.0.0-121.77.255.255 + array('2078801920', '2079064063'), // 123.232.0.0-123.235.255.255 + array('-1950089216', '-1948778497'), // 139.196.0.0-139.215.255.255 + array('-1425539072', '-1425014785'), // 171.8.0.0-171.15.255.255 + array('-1236271104', '-1235419137'), // 182.80.0.0-182.92.255.255 + array('-770113536', '-768606209'), // 210.25.0.0-210.47.255.255 + array('-569376768', '-564133889'), // 222.16.0.0-222.95.255.255 + ); + $rkey = mt_rand(0, 9); + return long2ip(mt_rand($ip_long[$rkey][0], $ip_long[$rkey][1])); +} + +function test() +{ + $ip = getIp(); + global $ip2region; + + echo PHP_EOL . "==============================="; + echo PHP_EOL . "测试 IP 地址: {$ip}"; + echo PHP_EOL . "--------【完整结果】------------" . PHP_EOL; + $info = $ip2region->memorySearch($ip); + var_export($info); + + echo PHP_EOL . "---------【简易结果】----------" . PHP_EOL; + var_export($ip2region->simple($ip)); + echo PHP_EOL . "===============================" . PHP_EOL . PHP_EOL; + sleep(2); +} \ No newline at end of file diff --git a/ip2region/composer.json b/ip2region/composer.json new file mode 100644 index 0000000..30b696f --- /dev/null +++ b/ip2region/composer.json @@ -0,0 +1,26 @@ +{ + "type": "library", + "name": "zoujingli/ip2region", + "homepage": "https://github.com/zoujingli/Ip2Region", + "description": "Ip2Region for PHP", + "license": "Apache-2.0", + "authors": [ + { + "name": "Anyon", + "email": "zoujingli@qq.com", + "homepage": "https://thinkadmin.top" + } + ], + "require": { + "php": ">=5.4" + }, + "keywords": [ + "Ip2Region" + ], + "autoload": { + "classmap": [ + "Ip2Region.php", + "XdbSearcher.php" + ] + } +} diff --git a/ip2region/ip2region.xdb b/ip2region/ip2region.xdb new file mode 100644 index 0000000..c78b792 Binary files /dev/null and b/ip2region/ip2region.xdb differ