--- !com.openexchange.subscribe.crawler.CrawlerDescription crawlerApiVersion: 616 displayName: Facebook id: com.openexchange.subscribe.crawler.facebook priority: 1 workflowString: | --- !com.openexchange.subscribe.crawler.Workflow steps: - !com.openexchange.subscribe.crawler.LoginPageByFormActionStep actionOfLoginForm: "https://login.facebook.com/login.php?" baseUrl: "https://m.facebook.com" description: Login to facebook.com linkAvailableAfterLogin: "(\\/friends.*)" nameOfPasswordField: pass nameOfUserField: email numberOfForm: 1 pageTitleAfterLogin: "(\\/friends.*)" password: "" url: "https://m.facebook.com/" username: "" - !com.openexchange.subscribe.crawler.PageByLinkRegexStep description: click the friends-link url: "\\/friends.*" - !com.openexchange.subscribe.crawler.PageByLinkRegexStep description: click the all-link url: "\\/friends.php?.*&a.*" - !com.openexchange.subscribe.crawler.AnchorsByLinkRegexStep description: click all the individual friends links on all subpages. identifyingCriteria: ".*&id=([0-9]*)&.*" linkRegex: "\\/profile.php.*&id.*" subpageLinkRegex: "\\/friends.php?.*&a&f.*" - !com.openexchange.subscribe.crawler.ContactObjectsByHTMLAnchorsAndPagePartSequenceStep description: Get the info-bits from the contact-page. linkToTargetPage: ".*&v=info.*" pageParts: !com.openexchange.subscribe.crawler.PagePartSequence page: "" pageParts: - !com.openexchange.subscribe.crawler.PagePart regex: "(
)([^<]*)(
)" type: 1 typeOfInfo: display_name - !com.openexchange.subscribe.crawler.PagePart regex: "(([^<]*)()" type: 1 typeOfInfo: instant_messenger1 - !com.openexchange.subscribe.crawler.PagePart regex: "(Mobile Number|Handynummer|Numéro de mobile|Número de móvil):([0-9\\s\\+\\-\\/\\(\\)]*)(<\\/a>)" type: 1 typeOfInfo: cellular_telephone1 - !com.openexchange.subscribe.crawler.PagePart regex: "(Phone|Telefon|Téléphone|Teléfono):([0-9\\s\\+\\-\\/\\(\\)]*)(<\\/a>)" type: 1 typeOfInfo: telephone_business1 - !com.openexchange.subscribe.crawler.PagePart regex: "(Current address|Aktuelle Adresse|Adresse actuelle|Dirección actual):<\\/td>(.+?)(<\\/td>)" type: 1 typeOfInfo: address_note - !com.openexchange.subscribe.crawler.PagePart regex: "(Member of|Mitglied von):<\\/td>(.+?)(<\\/td>)" type: 1 typeOfInfo: company - !com.openexchange.subscribe.crawler.PagePart regex: "(Birthday|Geburtstag|Date de naissance|Fecha de nacimiento):<\\/td>([0-9]{1,2})(\\.|\\sde|)" type: 1 typeOfInfo: birthday_day - !com.openexchange.subscribe.crawler.PagePart regex: "(\\s)([^,0-9\\s]*)(,|)" type: 1 typeOfInfo: birthday_month_string - !com.openexchange.subscribe.crawler.PagePart regex: "(\\s)([0-9]{4})(<)" type: 1 typeOfInfo: birthday_year - !com.openexchange.subscribe.crawler.PagePart regex: "(Hometown|Heimatstadt|Originaire de|Ciudad):<\\/td>(.+?)(<\\/td>)" type: 1 typeOfInfo: city_home - !com.openexchange.subscribe.crawler.PagePart regex: "(Firma|Company|Entreprise|Empresa):<\\/td>([^<]*)(<\\/td>)" type: 1 typeOfInfo: company titleExceptionsRegex: "Facebook.*(Your Profile|Dein Profil)"