#!/bin/bash
# SCRIPT NAME: mail_id_extractor_from_url.sh
# DESCRIPTION: This script takes a list of seed URLs and extracts all sub URLs and referenced URLs from seed URLs. Visits all sub and referenced URLs of seed URLs and extracts mail ids.
# USAGE: bash mail_id_extractor_from_url.sh website_list.txt
# website_list.txt file contains all seed URLs to extract mail ids.
#AUTHOR: Reniguntla S
filename="$1"
while read -r seed_url
do
name="$seed_url"
lynx -dump $seed_url | grep "http" >> sub_urllist0.txt
done < "$filename"
cat sub_urllist0.txt | awk '{print $2}' > sub_urllist1.txt
sort sub_urllist1.txt | uniq -u > sub_urllist.txt
rm sub_urllist0.txt sub_urllist1.txt
echo "Extraction of sub URLs of seed URLs done"
mode=1
if [ "$mode" -eq "1" ];then
echo "Be patient, It will take some time :-)"
cat sub_urllist.txt | while read url
do
echo $url
curl $url > url_text.txt
grep -E -o "\b[a-zA-Z0-9.-]+@[a-zA-Z0-9.-]+\.[a-zA-Z0-9.-]+\b" url_text.txt >> mailids.txt
done
fi
sort mailids.txt | uniq >> extracted_mail_list.txt
rm mailids.txt url_text.txt
echo "Email ids extraction done. Check extracted_mail_list.txt file"
exit 0
# End of script
Monday, March 14, 2016
Shell Script to Extract Email ids from Website URL
Posted by
umencs
It is always tedious work to extract e-mail ids from websites for publicity purpose. So I created a shell script which automate the extraction of mail ids from URLs.
Thursday, March 3, 2016
Short Term Courses on Data Science
Posted by
umencs
Call for Participation
Short-term courses on Data Science
CDAC, Kharghar, Navi Mumbai
We are living in a Data Age. Data is being continuously generated and consumed in various formats, and sizes from a number of varied sources. This data can be a big asset if stored, processed and analysed efficiently in real time with the help of intelligent algorithms. There is a growing interest to utilize such data for the improvement of business, health, education, society, etc. There are many ways to process and analyse such data spanning techniques like data visualization, text analysis, predictions and recommendations etc. Applications of these techniques can give companies and organisations valuable insights leading to competitive advantage, efficient service delivery and above all customer satisfaction. And so the demand for skilled resources in these fields is growing day by day.
--Using R for data visualization and analytics
--Text Analytics
--Predictive Analytics and Recommender Systems
For more details and registration, please visit at http://www.kbcs.in/datascience.
Subscribe to:
Posts (Atom)