1: <?php
2:
3: /**
4: * A utility class for carrying out various tasks on HTTP user agent strings
5: *
6: * @package alpha::util
7: * @since 1.0
8: * @author John Collins <dev@alphaframework.org>
9: * @version $Id: AlphaAgentUtils.inc 1496 2012-02-12 20:32:21Z alphadev $
10: * @license http://www.opensource.org/licenses/bsd-license.php The BSD License
11: * @copyright Copyright (c) 2012, John Collins (founder of Alpha Framework).
12: * All rights reserved.
13: *
14: * <pre>
15: * Redistribution and use in source and binary forms, with or
16: * without modification, are permitted provided that the
17: * following conditions are met:
18: *
19: * * Redistributions of source code must retain the above
20: * copyright notice, this list of conditions and the
21: * following disclaimer.
22: * * Redistributions in binary form must reproduce the above
23: * copyright notice, this list of conditions and the
24: * following disclaimer in the documentation and/or other
25: * materials provided with the distribution.
26: * * Neither the name of the Alpha Framework nor the names
27: * of its contributors may be used to endorse or promote
28: * products derived from this software without specific
29: * prior written permission.
30: *
31: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
32: * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
33: * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
34: * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
35: * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
36: * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
37: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
38: * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
39: * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
41: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
42: * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
43: * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44: * </pre>
45: *
46: */
47: class AlphaAgentUtils {
48:
49: /**
50: * An array of partial user agent strings belonging to well known web spider bots
51: *
52: * @var array
53: * @since 1.0
54: */
55: private static $bots = array(
56: 'ia_archiver',
57: 'Scooter/',
58: 'Ask Jeeves',
59: 'Baiduspider+(',
60: 'bingbot/',
61: 'Disqus/',
62: 'Exabot/',
63: 'FAST Enterprise Crawler',
64: 'FAST-WebCrawler/',
65: 'http://www.neomo.de/',
66: 'Gigabot/',
67: 'Mediapartners-Google',
68: 'Google Desktop',
69: 'Feedfetcher-Google',
70: 'Googlebot',
71: 'heise-IT-Markt-Crawler',
72: 'heritrix/1.',
73: 'ibm.com/cs/crawler',
74: 'ICCrawler - ICjobs',
75: 'ichiro/2',
76: 'MJ12bot/',
77: 'MetagerBot/',
78: 'msnbot-NewsBlogs/',
79: 'msnbot/',
80: 'msnbot-media/',
81: 'NG-Search/',
82: 'http://lucene.apache.org/nutch/',
83: 'NutchCVS/',
84: 'OmniExplorer_Bot/',
85: 'online link validator',
86: 'psbot/0',
87: 'Seekbot/',
88: 'Sensis Web Crawler',
89: 'SEO search Crawler/',
90: 'Seoma [SEO Crawler]',
91: 'SEOsearch/',
92: 'Snappy/1.1 ( http://www.urltrends.com/ )',
93: 'http://www.tkl.iis.u-tokyo.ac.jp/~crawler/',
94: 'SynooBot/',
95: 'crawleradmin.t-info@telekom.de',
96: 'TurnitinBot/',
97: 'voyager/1.0',
98: 'W3 SiteSearch Crawler',
99: 'W3C-checklink/',
100: 'W3C_*Validator',
101: 'http://www.WISEnutbot.com',
102: 'yacybot',
103: 'Yahoo-MMCrawler/',
104: 'Yahoo! DE Slurp',
105: 'Yahoo! Slurp',
106: 'YahooSeeker/'
107: );
108:
109: /**
110: * Static method to check if the provided user agent string matches any of the known user
111: * agent strings in the $bots array on this class.
112: *
113: * @param string $userAgent The user agent string that we want to check.
114: * @return boolean
115: * @since 1.0
116: */
117: public static function isBot($userAgent) {
118: $isBot = false;
119:
120: foreach (AlphaAgentUtils::$bots as $botName){
121: if (stristr($userAgent, $botName) == true) {
122: $isBot = true;
123: break;
124: }
125: }
126:
127: return $isBot;
128: }
129: }
130:
131:
132: ?>
133: