--- !com.openexchange.subscribe.crawler.CrawlerDescription
crawlerApiVersion: 616
displayName: Facebook
id: com.openexchange.subscribe.crawler.facebook
priority: 1
workflowString: |
--- !com.openexchange.subscribe.crawler.Workflow
steps:
- !com.openexchange.subscribe.crawler.LoginPageByFormActionStep
actionOfLoginForm: "https://login.facebook.com/login.php?"
baseUrl: "https://m.facebook.com"
description: Login to facebook.com
linkAvailableAfterLogin: "(\\/friends.*)"
nameOfPasswordField: pass
nameOfUserField: email
numberOfForm: 1
pageTitleAfterLogin: "(\\/friends.*)"
password: ""
url: "https://m.facebook.com/"
username: ""
- !com.openexchange.subscribe.crawler.PageByLinkRegexStep
description: click the friends-link
url: "\\/friends.*"
- !com.openexchange.subscribe.crawler.PageByLinkRegexStep
description: click the all-link
url: "\\/friends.php?.*&a.*"
- !com.openexchange.subscribe.crawler.AnchorsByLinkRegexStep
description: click all the individual friends links on all subpages.
identifyingCriteria: ".*&id=([0-9]*)&.*"
linkRegex: "\\/profile.php.*&id.*"
subpageLinkRegex: "\\/friends.php?.*&a&f.*"
- !com.openexchange.subscribe.crawler.ContactObjectsByHTMLAnchorsAndPagePartSequenceStep
description: Get the info-bits from the contact-page.
linkToTargetPage: ".*&v=info.*"
pageParts: !com.openexchange.subscribe.crawler.PagePartSequence
page: ""
pageParts:
- !com.openexchange.subscribe.crawler.PagePart
regex: "(
)([^<]*)(
)"
type: 1
typeOfInfo: display_name
- !com.openexchange.subscribe.crawler.PagePart
regex: "(([^<]*)( | )"
type: 1
typeOfInfo: instant_messenger1
- !com.openexchange.subscribe.crawler.PagePart
regex: "(Mobile Number|Handynummer|Numéro de mobile|Número de móvil):([0-9\\s\\+\\-\\/\\(\\)]*)(<\\/a>)"
type: 1
typeOfInfo: cellular_telephone1
- !com.openexchange.subscribe.crawler.PagePart
regex: "(Phone|Telefon|Téléphone|Teléfono): | ([0-9\\s\\+\\-\\/\\(\\)]*)(<\\/a>)"
type: 1
typeOfInfo: telephone_business1
- !com.openexchange.subscribe.crawler.PagePart
regex: "(Current address|Aktuelle Adresse|Adresse actuelle|Dirección actual):<\\/td> | (.+?)(<\\/td>)"
type: 1
typeOfInfo: address_note
- !com.openexchange.subscribe.crawler.PagePart
regex: "(Member of|Mitglied von):<\\/td> | (.+?)(<\\/td>)"
type: 1
typeOfInfo: company
- !com.openexchange.subscribe.crawler.PagePart
regex: "(Birthday|Geburtstag|Date de naissance|Fecha de nacimiento):<\\/td> | ([0-9]{1,2})(\\.|\\sde|)"
type: 1
typeOfInfo: birthday_day
- !com.openexchange.subscribe.crawler.PagePart
regex: "(\\s)([^,0-9\\s]*)(,|)"
type: 1
typeOfInfo: birthday_month_string
- !com.openexchange.subscribe.crawler.PagePart
regex: "(\\s)([0-9]{4})(<)"
type: 1
typeOfInfo: birthday_year
- !com.openexchange.subscribe.crawler.PagePart
regex: "(Hometown|Heimatstadt|Originaire de|Ciudad):<\\/td> | (.+?)(<\\/td>)"
type: 1
typeOfInfo: city_home
- !com.openexchange.subscribe.crawler.PagePart
regex: "(Firma|Company|Entreprise|Empresa):<\\/td> | ([^<]*)(<\\/td>)"
type: 1
typeOfInfo: company
titleExceptionsRegex: "Facebook.*(Your Profile|Dein Profil)"
|