Perl Notes from Udemy
- Perl 模块安装
- Section 1: Basic Perl: Getting Started
- T2. Hello World
- T3. Downloading Text and Images With Perl
- T4. Arrays and Checking Whether Files Exist
- T5. Reading Files and Beginning Regular Expressions
- T6. Writing Files and Replacing Text
- T7. Wildcards in Regular Expressions
- T8. Groups: Finding Out What You Actually Matched
- T9. Quantifiers: Greedy vs. Non-Greedy
- T10. Escape Sequences
- T11. Numeric Quantifiers
- Section 2: More on Reading Files Line By Line: Tips, Tricks and Vital Knowledge
- T13. Split and reading csv files
- T14. Join and Viewing Data Using Data::Dumper
- T15. Chomp and Removing Spaces in Splits
- T16. "Pushing" Onto Arrays
- T17. Array of arrays
- T18. Hashes: Lookup Tables in Perl
- T19. Iterating over Hashes
- T20. Array of Hashes
- T21. Storing CSV Data in a Data Structure
- T22. Validating CSV Data
- T23. Cleaning CSV Data
- Section 3: Web Scraping and More Regular Expressions
- Section 4: Building a Complete Progam: Command Line Options
- Section 5: Parsing XML and Complex Data Structures
- Section 6: Working with Databases
- Section 8: Modules and OO Perl
- Section 9: Web Application Basics
- Section 10: Basic Sysadmin Tasks
Perl 模块安装
参考:
各个平台下 Perl 模块安装总结
官网介绍
Error: Can’t create ‘/Library/Perl/5.18/App’:改用sudo
Section 1: Basic Perl: Getting Started
T2. Hello World
use strict;
use warnings;
sub main {
print "Hello World!\n";
}
main();
T3. Downloading Text and Images With Perl
use strict;
use warnings;
use LWP::Simple;
sub main {
print "Downloading ...\n";
# Download the specified HTML page and print it to the console.
# print get("http://www.caveofprogramming.com/");
# Download the Cave of Programming home page HTML and save it to "home.html"
# getstore("http://www.caveofprogramming.com/", "home.html");
# Download logo.png from the Internet and store it in a file named "logo.png"
my $code = getstore('http://www.caveofprogramming.com/wp-content/themes/squiffy/images/logo.png', "logo.png");
# Did we get code 200 (successful request) ?
if($code == 200) {
print "Success\n";
}
else {
print "Failed\n";
}
print "Finished\n";
}
main();
T4. Arrays and Checking Whether Files Exist
use strict;
use warnings;
$|=1;
sub main {
# An array of file names. Note the "@"" for array
my @files = (
'/Users//Desktop/Perl/Tutorial2.pl',
'/Users//Desktop/Perl/Tutorial3.pl',
'/Users//Desktop/Perl/Tutorial4.pl',
);
# foreach loops through the array, setting the loop
# variable ($file in this case) to each value in turn.
foreach my $file (@files) {
# -f tests if a file exists.
if ( -f $file ) {
print "Found file: $file\n";
}
else {
print "File not found: $file\n";
}
}
}
main();
T5. Reading Files and Beginning Regular Expressions
use strict;
use warnings;
# turn off output buffering
$|=1;
sub main {
my $file = '/Users//Desktop/Perl/egg.txt';
# Either successfully open the file or else die (stop the program)
open(INPUT, $file) or die("Input file $file not found.\n");
# Read each line from the file, one line at a time.
while(my $line = <INPUT>) {
if($line=~/egg/) {
print "$line\n";
}
}
close(INPUT);
}
main();
T6. Writing Files and Replacing Text
use strict;
use warnings;
$|=1;
sub main {
# open $input for reading
my $input = '/Users//Desktop/Perl/egg.txt';
open(INPUT, $input) or die("Input file $input not found.\n");
# open $output for writing
my $output = 'output.txt';
open(OUTPUT, '>'.$output) or die "Can't create $output.\n";
# Read the input file one line at a time.
while(my $line = <INPUT>) {
# If this line has the word "egg" in it, write it
# to the output file, otherwise ignore it.
# \b matches the edges (boundaries) of words.
if($line =~ /\begg\b/) {
$line =~ s/\begg\b/dinosaur/ig; # i: case insensitive, g: global
print OUTPUT $line;
}
}
close(INPUT);
close(OUTPUT);
}
main();
T7. Wildcards in Regular Expressions
use strict;
use warnings;
$|=1;
sub main {
my $file = 'C:\tutorial\perl\mymanjeeves.txt';
open(INPUT, $file) or die("Input file $file not found.\n");
while(my $line = <INPUT>) {
# The dot matches any character, even space or punctuation
# e.g. the example below matches
# I was
# I said
# If an
# I take
# etc.
# (only five characters including the space are actually matched by the epxression)
if($line =~ /I..a./) {
print $line;
}
}
close(INPUT);
}
main();
T8. Groups: Finding Out What You Actually Matched
use strict;
use warnings;
$|=1;
sub main {
my $file = 'C:\tutorial\perl\mymanjeeves.txt';
open(INPUT, $file) or die("Input file $file not found.\n");
while(my $line = <INPUT>) {
# Surround the bits you want to "capture" with round brackets
if($line =~ /(I..a.)(...)/) {
# The stuff matched by the first set of round brackets is now in $1
# The stuff matched by the second set is in $2
print "First match: '$1'; second match:'$2'\n";
}
}
close(INPUT);
}
main();
T9. Quantifiers: Greedy vs. Non-Greedy
use strict;
use warnings;
$|=1;
sub main {
my $file = 'C:\tutorial\perl\mymanjeeves.txt';
open(INPUT, $file) or die("Input file $file not found.\n");
while(my $line = <INPUT>) {
# * matches zero or more of the preceding character; e.g.
# d* matches zero or more d's 7* zero or more 7's, etc.
# .* matches zero or more of any character, as many as possible
# .*? matches zero or more of any character, as few as possible
if($line =~ /(s.*?n)/) {
print "$1\n";
}
}
close(INPUT);
}
main();
If you want to match a ., remember to use \..
T10. Escape Sequences
use strict;
use warnings;
$|=1;
sub main {
# \d digit
# \s space
# \S non-space
# \w alphanumeric
# Some examples; in the following examples,
# each example shows the text, the regular expression
# and the output, in that order.
# Digits:
# 'I am 117 years old tomorrow.'
# (\d+)
# Matched: '117'
# Space (will also match a tab)
# I am 117 years old tomorrow.
# (am\s*\d+)
# Matched: 'am 117'
# \S (non space -- note, capital 'S')
# 'I am 117 years old tomorrow.'
# (y\S+)
# Matched: 'years'
# Alphanumeric, including underscore
# \w
# 'Iam117yearsold_tomorrow.'
# (y\w*)
# Matched: 'yearsold_tomorrow'
my $text = 'I am 117 years old tomorrow.';
if($text =~ /(y\S+)/) {
print "Matched: '$1'\n";
}
}
main();
T11. Numeric Quantifiers
use strict;
use warnings;
$|=1;
sub main {
# * zero or more of the preceding character, as many as possible
# + one or more of the preceding, as many as possible
# *? zero or more of the preceding character, as few as possible
# +? one or more of the preceding, as few as possible
# {5} five of the preceding
# {3,6} at least three and at most 6
# {3,} at least three
my $text = 'DE$75883';
if($text =~ /(DE\$\d{3,})/) {
print "Matched: '$1'\n";
}
}
main();
Section 2: More on Reading Files Line By Line: Tips, Tricks and Vital Knowledge
T13. Split and reading csv files
use strict;
use warnings;
$|=1;
sub main {
my $input = 'test.csv';
# execute if the condition is FALSE
unless(open(INPUT, $input)) {
die "\nCannot open $input\n";
}
<INPUT>; # get rid of header
# if not use $line, then can use $_
while(my $line = <INPUT>) {
my @values = split ',', $line;
print $values[1] . "\n";
}
close INPUT;
}
main();
T14. Join and Viewing Data Using Data::Dumper
use strict;
use warnings;
use Data::Dumper;
$|=1;
sub main {
my $input = 'test.csv';
unless(open(INPUT, $input)) {
die "\nCannot open $input\n";
}
<INPUT>;
while(my $line = <INPUT>) {
my @values = split ',', $line;
# print join '|', @values;
print Dumper(@values);
}
close INPUT;
}
main();
Execution output:
$VAR1 = 'Isaac Newton';
$VAR2 = '99.10';
$VAR3 = '15051999
';
$VAR1 = 'Albert Einstein';
$VAR2 = '13.20';
$VAR3 = '11062012
';
$VAR1 = 'Carl Scheele';
$VAR2 = '66.23';
$VAR3 = '01012000
';
$VAR1 = 'Rene Descartes';
$VAR2 = '0.57';
$VAR3 = '10072033
';
The problem is that at the end of each line, there is a \n which is the reason why '; is printed out at the next line.
T15. Chomp and Removing Spaces in Splits
use strict;
use warnings;
use Data::Dumper;
$|=1;
sub main {
my $input = 'test.csv';
unless(open(INPUT, $input)) {
die "\nCannot open $input\n";
}
<INPUT>;
while(my $line = <INPUT>) {
chomp $line; # chomp
# get rid of space
my @values = split /\s*,\s*/, $line;
print Dumper(@values);
}
close INPUT;
}
main();
chomp()
Using the Perl chomp() function
perl中chomp的使用
T16. “Pushing” Onto Arrays
use strict;
use warnings;
use Data::Dumper;
$|=1;
sub main {
my @array;
push @array, 'apple';
push @array, 'banana';
push @array, 'peach';
foreach my $element(@array) {
print $element . "\n";
}
}
main();
T17. Array of arrays
use strict;
use warnings;
use Data::Dumper;
my @animals = ('dog', 'cat', 'rabbit');
my @fruits = ('apple', 'banana', 'orange');
my $fruits_ref = \@fruits; # create a reference
print $fruits_ref->[0] . "\n";
my @values;
push @values, \@animals;
push @values, \@fruits;
print Dumper(@values);
Output:
apple
$VAR1 = [
'dog',
'cat',
'rabbit'
];
$VAR2 = [
'apple',
'banana',
'orange'
];
use strict;
use warnings;
use Data::Dumper;
$|=1;
sub main {
my $input = 'test.csv';
unless(open(INPUT, $input)) {
die "\nCannot open $input\n";
}
<INPUT>;
my @lines;
while(my $line = <INPUT>) {
chomp $line;
my @values = split /\s*,\s*/, $line;
push @lines, \@values;
}
close INPUT;
print $lines[3][1] . "\n";
foreach my $line(@lines) {
print Dumper($line);
print "Name " . $line->[0] . "\n";
}
}
main();
Output:
0.57
$VAR1 = [
'Isaac Newton',
'99.10',
'15051999'
];
Name Isaac Newton
$VAR1 = [
'Albert Einstein',
'13.20',
'11062012'
];
Name Albert Einstein
$VAR1 = [
'Carl Scheele',
'66.23',
'01012000'
];
Name Carl Scheele
$VAR1 = [
'Rene Descartes',
'0.57',
'10072033'
];
Name Rene Descartes
T18. Hashes: Lookup Tables in Perl
use strict;
use warnings;
use Data::Dumper;
$| = 1;
sub main {
my %months = (
1 => "Jan",
5 => "May",
7 => "Jul",
);
print $months{5} . "\n";
my %days;
$days{"Sunday"} = 1;
$days{"Monday"} = 2;
$days{"Friday"} = 6;
my $day = $days{"Friday"};
print "Friday is day $day\n";
}
main();
the order of hash can not be relied on.
T19. Iterating over Hashes
use strict;
use warnings;
use Data::Dumper;
$| = 1;
sub main {
my %foods = (
"mice" => "cheese",
"dogs" => "meat",
"birds" => "seeds",
);
# We can define an array of variables like this:
# round brackets are required
my ($one, $two, $three) = (13, 21, 38);
print "The value of \$two is $two\n";
while( my ($key, $value) = each %foods) {
print "$key: $value\n";
}
# sort the key according to the order of A-Z
foreach my $key(sort keys %foods) {
my $value = $foods{$key};
print "$key = $value\n";
}
}
main();
Output:
The value of $two is 21
dogs: meat
birds: seeds
mice: cheese
birds = seeds
dogs = meat
mice = cheese
T20. Array of Hashes
use strict;
use warnings;
use Data::Dumper;
$| = 1;
sub main {
my %hash1 = (
"cat" => "meat",
"birds" => "seeds",
"fish" => "worms",
);
my @test;
# We could push a reference to a hash onto an array.
push @test, \%hash1;
# We could also just refer to the element after the end of the array
# and by setting it, create it:
$test[1] = \%hash1;
print $test[0]{"birds"} . "\n";
print $test[1]{"fish"} . "\n";
}
main();
Output:
seeds
worms
T21. Storing CSV Data in a Data Structure
use strict;
use warnings;
use Data::Dumper;
$|=1;
sub main {
my $input = 'test.csv';
unless(open(INPUT, $input)) {
die "\nCannot open $input\n";
}
<INPUT>;
my @data;
while(my $line = <INPUT>) {
chomp $line;
my ($name, $payment, $date) = split /\s*,\s*/, $line;
my %values = (
"Name" => $name,
"Payment" => $payment,
"Date" => $date,
);
push @data, \%values;
}
close INPUT;
foreach my $data(@data){
print $data -> {"Payment"} . "\n";
}
print "Descartes: " . $data[3]-> {"Name"} . "\n";
print "Descartes: " . $data[3]{"Name"} . "\n";
}
main();
T22. Validating CSV Data
use strict;
use warnings;
use Data::Dumper;
$|=1;
sub main {
my $input = 'test.csv';
unless(open(INPUT, $input)) {
die "\nCannot open $input\n";
}
<INPUT>;
my @data;
LINE: while(my $line = <INPUT>) {
chomp $line;
my @values = split /\s*,\s*/, $line;
if (scalar(@values) < 3){
print "Invalid line: $line\n";
next LINE;
}
foreach my $value(@values){
if ($value eq ''){
print "Invalid line: $line\n";
next LINE;
}
}
my ($name, $payment, $date) = @values;
my %values = (
"Name" => $name,
"Payment" => $payment,
"Date" => $date,
);
push @data, \%values;
}
close INPUT;
foreach my $data(@data){
print $data -> {"Payment"} . "\n";
}
print "Descartes: " . $data[3]-> {"Name"} . "\n";
}
main();
Input:
Name,Payment,Date
Isaac Newton ,99.1,15051999
Albert Einstein,13.2,11062012
Carl Scheele,66.23,1012000
Rene Descartes,0.57,10072033
Sarah,10,
,,
Nicolas,0,
Output:
Invalid line: Sarah,10,
Invalid line: ,,
Invalid line: Nicolas,0,
99.1
13.2
66.23
0.57
Descartes: Rene Descartes
T23. Cleaning CSV Data
use strict;
use warnings;
use Data::Dumper;
$|=1;
sub main {
my $input = 'test.csv';
unless(open(INPUT, $input)) {
die "\nCannot open $input\n";
}
<INPUT>;
my @data;
LINE: while(my $line = <INPUT>) {
# replace the space at the beginning of the line
$line =~ s/^\s*//;
# replace the space at the end of the line
$line =~ s/\s*$//;
# replace the space at the beginning and end of the line
# g stands for global, replace as many as possible
$line =~ s/^\s*|\s*$//g;
$line =~ /\S+/ or next;
chomp $line;
my @values = split /\s*,\s*/, $line;
if (scalar(@values) < 3){
print "Invalid line: $line\n";
next LINE;
}
foreach my $value(@values){
if ($value eq ''){
print "Invalid line: $line\n";
next LINE;
}
}
my ($name, $payment, $date) = @values;
my %values = (
"Name" => $name,
"Payment" => $payment,
"Date" => $date,
);
push @data, \%values;
}
close INPUT;
foreach my $data(@data){
print $data -> {"Payment"} . "\n";
}
print "Descartes: " . $data[3]-> {"Name"} . "\n";
}
main();
Section 3: Web Scraping and More Regular Expressions
T25. Basic Web Scraping
use strict;
use warnings;
use LWP::Simple;
$|=1;
sub main {
my $content = get("http://www.caveofprogramming.com");
unless(defined($content)) {
die "Unreachable url\n";
}
# m stands for match
# '' means there are no escape characters
if($content =~ m'<a class="mainlink" href=".+?">(.+?)</a>') {
my $title = $1;
print "Title: $title\n";
}
else {
print "\nTitle not found\n";
}
}
main();
T26. Character classes
use strict;
use warnings;
$|=1;
sub main {
# Ranges
# [0-9] any number
# [A-Z] any uppercase letter (in the English alphabet)
# Alternatives
# [ABC] -- list of alternates
# [\=\%] - simply list alternatives. Backslash any character that might have a special meaning in regex
# [A-Za-z_0-9] -- specify alternatives just by listing them; can include ranges.
# [^0-9T\s] ^ Match anything EXCEPT the specified characters.
my $content = "The 39 Steps - a GREAT book - Colours_15 ==%== ABBCCBBCCABCA";
if($content =~ /([^0-9T\s]{5,})/) {
print "Matched '$1'\n";
}
else {
print "No match\n";
}
}
main();
T27. Matching Repeatedly
use strict;
use warnings;
use LWP::Simple;
$| = 1;
sub main {
my $content = get("http://www.caveofprogramming.com");
unless ( defined($content) ) {
die "Unreachable url\n";
}
# <a href="http://news.bbc.co.uk">BBC News</a>
# []<>
while (
$content =~
m| # Use a pipe character as the quote, since we don't need to use it inside the regex.
<\s*a\s+ # Match the opening <a, with or without space around the 'a'
[^>]* # Match any amount of gumpf, as long as it's not the closing angle bracket quote
href\s*=\s* # Match href=, with or without space around the '='
['"] # Match either a single or double quote
([^>"']+) # Match any text, as long as it doesn't include a closing '>' bracket or a quote
['"] # Close the quote
[^>]*> # Match anything other than the closing bracket, followed by the closing bracket.
\s*([^<>]*)\s*</ # Match the hyperlinked text; any characters other than angle brackets
|sigx # s: match across new lines; i: case insensitive match; g: global (repeated) match; x: allow whitespace and comments
{
print "$2: $1\n";
}
}
main();
T28. Collecting Repeated Matches All At Once
use strict;
use warnings;
use LWP::Simple;
$| = 1;
sub main {
my $content = get("http://www.caveofprogramming.com");
unless ( defined($content) ) {
die "Unreachable url\n";
}
my @classes = $content =~ m|class="([^"']*?)"|ig;
if(@classes == 0) {
print "No matches\n";
}
else {
foreach my $class(@classes) {
print "$class\n";
}
}
}
main();
Perl 5 version 30.0 documentation defined
defined函数
Section 4: Building a Complete Progam: Command Line Options
T29. Getting Command Line Options
use strict;
use warnings;
use Data::Dumper;
use Getopt::Std;
$| = 1;
sub main {
my %opts;
# : after f means receive the file name after -f
getopts('af:c', \%opts);
print Dumper(%opts);
my $file = $opts{'f'};
print "File: $file\n";
}
main();
Output:
(base) MacBook-Air:desktop $ perl tutorial29.pl -f test.xml -a -c
$VAR1 = 'f';
$VAR2 = 'test.xml';
$VAR3 = 'c';
$VAR4 = 1;
$VAR5 = 'a';
$VAR6 = 1;
File: test.xml
Perl Command Line Options
Perl Argv – Retrieving Command-Line Arguments in Perl
- Take a look at “More Intelligent Command-Line Option Processing Using Getopts” part to get the explaination of above script in detail.
T30. Subroutines and Returning Values
use strict;
use warnings;
use Data::Dumper;
use Getopt::Std;
$| = 1;
sub main {
my %opts;
getopts('af:c', \%opts);
if(!checkusage()) {
usage();
}
}
sub checkusage {
return 0;
}
sub usage {
print "incorrect options\n";
}
main();
T31. Multi-Line Strings and Comments
use strict;
use warnings;
use Data::Dumper;
use Getopt::Std;
$| = 1;
=pod
This is ACME XML parser version 1.0
Use with care.
=cut
sub main {
my %opts;
# Get command line options
getopts('af:c', \%opts);
if(!checkusage()) {
usage();
}
}
sub checkusage {
return 0;
}
sub usage {
print <<USAGE;
usage: perl main.pl <options>
-f <file name> specify XML file name to parse
-a turn off error checking
example usage:
perl main.pl -f test.xml -a
USAGE
exit();
}
main();
Output:
(base) MacBook-Air:desktop $ perl tutorial31.pl
usage: perl main.pl <options>
-f <file name> specify XML file name to parse
-a turn off error checking
example usage:
perl main.pl -f test.xml -a
T32. Passing Arguments to Subroutines
use strict;
use warnings;
use Data::Dumper;
use Getopt::Std;
$| = 1;
=pod
This is ACME XML parser version 1.0
Use with care.
=cut
sub main {
my %opts;
# Get command line options
getopts('af:c', \%opts);
if(!checkusage("Hello", 7)) {
usage();
}
}
sub checkusage {
# my $greeting = shift; or my $greeting = shift @_;
# my $number = shift; or my $number = shift @_;
# shift has @_ argument by default
# shift means getting a value from the beginning of an array
# push means pushing a value to the end of an array
# pop means popping out a value from the end of an array
my ($greeting, $number) = @_;
print "$greeting number $number\n";
return 1;
}
sub usage {
print <<USAGE;
usage: perl main.pl <options>
-f <file name> specify XML file name to parse
-a turn off error checking
example usage:
perl main.pl -f test.xml -a
USAGE
exit();
}
main();
T33. References to Hashes
use strict;
use warnings;
use Data::Dumper;
use Getopt::Std;
$| = 1;
=pod
This is ACME XML parser version 1.0
Use with care.
=cut
sub main {
my %opts;
# Get command line options
getopts('af:c', \%opts);
if(!checkusage(\%opts)) {
usage();
}
=pod
perl parse.pl -a -f test.xml -c
a => 1
c => 1
f => test.xml
=cut
my $opts_ref = \%opts;
# Use hash directly.
print $opts{"f"} . "\n";
# Use reference to hash
print $opts_ref->{"f"} . "\n";
}
sub checkusage {
# only one argument \%opts is passed to the subroutine
my $opts_ref = shift;
print $opts_ref->{"f"} . "\n";
return 1;
}
sub usage {
print <<USAGE;
usage: perl main.pl <options>
-f <file name> specify XML file name to parse
-a turn off error checking
example usage:
perl main.pl -f test.xml -a
USAGE
exit();
}
main();
Output:
(base) MacBook-Air:desktop $ perl tutorial33.pl -f test.xml -a -c
test.xml
test.xml
test.xml
T34. Checking Values in Hashes
use strict;
use warnings;
use Data::Dumper;
use Getopt::Std;
$| = 1;
=pod
This is ACME XML parser version 1.0
Use with care.
=cut
sub main {
my %opts;
# Get command line options
getopts('af:r', \%opts);
if(!checkusage(\%opts)) {
usage();
}
=pod
perl parse.pl -a -f test.xml -r
a => 1
r => 1
f => test.xml
=cut
}
sub checkusage {
my $opts = shift;
my $a = $opts->{"a"};
my $r = $opts->{"r"};
my $f = $opts->{"f"};
# Image a is optional; don't really need to refer to it here at all.
# r is mandatory: it means "run the program"
# f is mandatory.
unless(defined($r) and defined($f)) {
return 0;
}
unless($f =~ /\.xml$/i) {
print "Input file must have the extension .xml\n";
return 0;
}
return 1;
}
sub usage {
print <<USAGE;
usage: perl main.pl <options>
-f <file name> specify XML file name to parse
-a turn off error checking
-r run the program
example usage:
perl main.pl -r -f test.xml -a
USAGE
exit();
}
main();
Section 5: Parsing XML and Complex Data Structures
T35. Finding All Files in a Directory and Filtering Arrays
use strict;
use warnings;
use Data::Dumper;
use Getopt::Std;
$| = 1;
=pod
This is ACME XML parser version 1.0
Use with care.
=cut
sub main {
my %opts;
# Get command line options
getopts('d:r', \%opts);
if(!checkusage(\%opts)) {
usage();
exit();
}
my $input_dir = $opts{"d"};
my @files = get_files($input_dir);
print Dumper(\@files);
}
sub get_files {
my $input_dir = shift;
unless(opendir(INPUTDIR, $input_dir)) {
die "\nUnable to open directory '$input_dir'\n";
}
# find all files in a directory
my @files = readdir(INPUTDIR);
closedir(INPUTDIR);
# filter out files end with ".xml"
@files = grep(/\.xml$/i, @files);
return @files;
}
sub checkusage {
my $opts = shift;
my $r = $opts->{"r"};
my $d = $opts->{"d"};
# Image a is optional; don't really need to refer to it here at all.
# r is optional
# d is mandatory.
unless(defined($d)) {
return 0;
}
return 1;
}
sub usage {
print <<USAGE;
usage: perl main.pl <options>
-d <directory> specify directory in which to find XML files.
-r run the program; process the files
example usage:
# Process files in currect directory.
perl main.pl -d . -r
USAGE
}
main();
If there is no return argument at the end of the subroutine, the subroutine will return its last line. If it’s a print function, then the returned value would be 1.
T36. Processing Files One By One
use strict;
use warnings;
use Data::Dumper;
use Getopt::Std;
$| = 1;
=pod
This is ACME XML parser version 1.0
Use with care.
=cut
sub main {
my %opts;
# Get command line options
getopts('d:r', \%opts);
if(!checkusage(\%opts)) {
usage();
exit();
}
my $input_dir = $opts{"d"};
my @files = get_files($input_dir);
process_files(\@files, $input_dir);
}
sub process_files {
my ($files, $input_dir) = @_;
print "$input_dir\n";
print Dumper($files);
# $files is a reference, so need to put @ before to make it an array
foreach my $file(@$files) {
process_file($file, $input_dir);
}
}
sub process_file {
my ($file, $input_dir) = @_;
print "Processing $file in $input_dir ...\n";
}
sub get_files {
my $input_dir = shift;
unless(opendir(INPUTDIR, $input_dir)) {
die "\nUnable to open directory '$input_dir'\n";
}
my @files = readdir(INPUTDIR);
closedir(INPUTDIR);
@files = grep(/\.xml$/i, @files);
return @files;
}
sub checkusage {
my $opts = shift;
my $r = $opts->{"r"};
my $d = $opts->{"d"};
# Image a is optional; don't really need to refer to it here at all.
# r is optional
# d is mandatory.
unless(defined($d)) {
return 0;
}
return 1;
}
sub usage {
print <<USAGE;
usage: perl main.pl <options>
-d <directory> specify directory in which to find XML files.
-r run the program; process the files
example usage:
# Process files in currect directory.
perl main.pl -d . -r
USAGE
}
main();
@$files means cast $files to the original type array, @{$files} can also be used.
T37. Parsing XML with Regular Expressions
use strict;
use warnings;
use Data::Dumper;
use Getopt::Std;
$| = 1;
=pod
This is ACME XML parser version 1.0
Use with care.
=cut
sub main {
my %opts;
# Get command line options
getopts('d:r', \%opts);
if(!checkusage(\%opts)) {
usage();
exit();
}
my $input_dir = $opts{"d"};
my @files = get_files($input_dir);
process_files(\@files, $input_dir);
}
sub process_files {
my ($files, $input_dir) = @_;
foreach my $file(@$files) {
process_file($file, $input_dir);
}
}
sub process_file {
my ($file, $input_dir) = @_;
print "Processing $file in $input_dir ... \n";
my $filepath = "$input_dir/$file";
open(INPUTFILE, $filepath) or die "Unable to open $filepath\n";
$/ = "</entry>";
while(my $chunk = <INPUTFILE>) {
# there must be round brackets
# otherwise $band will get the value: True or False
my ($band) = $chunk =~ m'<band>(.*?)</band>';
unless(defined($band)) {
next;
}
my @albums = $chunk =~ m'<album>(.*?)</album>'sg;
print " found " . scalar(@albums) . " for $band ...\n";
print Dumper(@albums);
}
close(INPUTFILE);
}
sub get_files {
my $input_dir = shift;
unless(opendir(INPUTDIR, $input_dir)) {
die "\nUnable to open directory '$input_dir'\n";
}
my @files = readdir(INPUTDIR);
closedir(INPUTDIR);
@files = grep(/\.xml$/i, @files);
return @files;
}
sub checkusage {
my $opts = shift;
my $r = $opts->{"r"};
my $d = $opts->{"d"};
# Image a is optional; don't really need to refer to it here at all.
# r is optional
# d is mandatory.
unless(defined($d)) {
return 0;
}
return 1;
}
sub usage {
print <<USAGE;
usage: perl main.pl <options>
-d <directory> specify directory in which to find XML files.
-r run the program; process the files
example usage:
# Process files in currect directory.
perl main.pl -d . -r
USAGE
}
main();
Input: test.xml
<xml>
<entry>
<band>The Basspluckers</band>
<album>
<name>Revenge of the Squirrels</name>
<chartposition>434</chartposition>
</album>
<album>
<name>Pluck My Bass</name>
<chartposition>123</chartposition>
</album>
</entry>
<entry>
<band>The Dead Drunks</band>
<album>
<name>Spy in the House of Love</name>
<chartposition>10</chartposition>
</album>
<album>
<name>Get Out of My House of Love, You Spy</name>
<chartposition>74</chartposition>
</album>
</entry>
</xml>
** Output:**
(base) MacBook-Air:test $ perl tutorial37.pl -d /Users/jiangfan/desktop/test
Processing test.xml in /Users/jiangfan/desktop/test ...
found 2 for The Basspluckers ...
$VAR1 = '
<name>Revenge of the Squirrels</name>
<chartposition>434</chartposition>
';
$VAR2 = '
<name>Pluck My Bass</name>
<chartposition>123</chartposition>
';
found 2 for The Dead Drunks ...
$VAR1 = '
<name>Spy in the House of Love</name>
<chartposition>10</chartposition>
';
$VAR2 = '
<name>Get Out of My House of Love, You Spy</name>
<chartposition>74</chartposition>
';
Highlight:
# there must be round brackets
# otherwise $band will get the value: True or False
my ($band) = $chunk =~ m'<band>(.*?)</band>';
Perl One-Liners | Perl命令行学习5 $/和$\变量
Perl’s Special Variables
Perl: Downloading and Parsing XML
T38. Using XML::Simple, and Extracting Data from Complex Structures
use strict;
use warnings;
use Data::Dumper;
use Getopt::Std;
use XML::Simple;
$| = 1;
=pod
This is ACME XML parser version 1.0
Use with care.
=cut
sub main {
my %opts;
# Get command line options
getopts('d:r', \%opts);
if(!checkusage(\%opts)) {
usage();
exit();
}
my $input_dir = $opts{"d"};
my @files = get_files($input_dir);
process_files(\@files, $input_dir);
}
sub process_files {
my ($files, $input_dir) = @_;
foreach my $file(@$files) {
process_file($file, $input_dir);
}
}
sub process_file {
my ($file, $input_dir) = @_;
print "Processing $file in $input_dir ... \n";
my $filepath = "$input_dir/$file";
open(INPUTFILE, $filepath) or die "Unable to open $filepath\n";
undef $/;
my $content = <INPUTFILE>;
close(INPUTFILE);
print $content;
my $parser = new XML::Simple;
my $dom = $parser->XMLin($content);
print Dumper($dom);
foreach my $band(@{$dom->{"entry"}}) {
print Dumper($band->{"band"});
}
}
sub get_files {
my $input_dir = shift;
unless(opendir(INPUTDIR, $input_dir)) {
die "\nUnable to open directory '$input_dir'\n";
}
my @files = readdir(INPUTDIR);
closedir(INPUTDIR);
@files = grep(/\.xml$/i, @files);
return @files;
}
sub checkusage {
my $opts = shift;
my $r = $opts->{"r"};
my $d = $opts->{"d"};
# Image a is optional; don't really need to refer to it here at all.
# r is optional
# d is mandatory.
unless(defined($d)) {
return 0;
}
return 1;
}
sub usage {
print <<USAGE;
usage: perl main.pl <options>
-d <directory> specify directory in which to find XML files.
-r run the program; process the files
example usage:
# Process files in currect directory.
perl main.pl -d . -r
USAGE
}
main();
Output:
(base) MacBook-Air:test jiangfan$ perl tutorial38.pl -d /Users/jiangfan/desktop/test
Processing test.xml in /Users/jiangfan/desktop/test ...
<xml>
<entry>
<band>The Basspluckers</band>
<album>
<name>Revenge of the Squirrels</name>
<chartposition>434</chartposition>
</album>
<album>
<name>Pluck My Bass</name>
<chartposition>123</chartposition>
</album>
</entry>
<entry>
<band>The Dead Drunks</band>
<album>
<name>Spy in the House of Love</name>
<chartposition>10</chartposition>
</album>
<album>
<name>Get Out of My House of Love, You Spy</name>
<chartposition>74</chartposition>
</album>
</entry>
</xml>$VAR1 = {
'entry' => [
{
'band' => 'The Basspluckers',
'album' => {
'Revenge of the Squirrels' => {
'chartposition' => '434'
},
'Pluck My Bass' => {
'chartposition' => '123'
}
}
},
{
'band' => 'The Dead Drunks',
'album' => {
'Get Out of My House of Love, You Spy' => {
'chartposition' => '74'
},
'Spy in the House of Love' => {
'chartposition' => '10'
}
}
}
]
};
$VAR1 = 'The Basspluckers';
$VAR1 = 'The Dead Drunks';
T39. Extracting Data from Complex Structures: A Complete Example
use strict;
use warnings;
use Data::Dumper;
use Getopt::Std;
use XML::Simple;
$| = 1;
=pod
This is ACME XML parser version 1.0
Use with care.
=cut
sub main {
my %opts;
# Get command line options
getopts('d:r', \%opts);
if(!checkusage(\%opts)) {
usage();
exit();
}
my $input_dir = $opts{"d"};
my @files = get_files($input_dir);
process_files(\@files, $input_dir);
}
sub process_files {
my ($files, $input_dir) = @_;
foreach my $file(@$files) {
process_file($file, $input_dir);
}
}
sub process_file {
my ($file, $input_dir) = @_;
print "Processing $file in $input_dir ... \n";
my $filepath = "$input_dir/$file";
open(INPUTFILE, $filepath) or die "Unable to open $filepath\n";
undef $/;
my $content = <INPUTFILE>;
close(INPUTFILE);
print $content;
my $parser = new XML::Simple;
my $dom = $parser->XMLin($content, ForceArray => 1);
print Dumper($dom);
foreach my $band(@{$dom->{"entry"}}) {
my $band_name = $band->{"band"}->[0];
print "\n\n$band_name\n";
print "============\n";
my $albums = $band->{"album"};
foreach my $album(@$albums) {
my $album_name = $album->{"name"}->[0];
my $chartposition = $album->{"chartposition"}->[0];
print "$album_name: $chartposition\n";
}
}
}
sub get_files {
my $input_dir = shift;
unless(opendir(INPUTDIR, $input_dir)) {
die "\nUnable to open directory '$input_dir'\n";
}
my @files = readdir(INPUTDIR);
closedir(INPUTDIR);
@files = grep(/\.xml$/i, @files);
return @files;
}
sub checkusage {
my $opts = shift;
my $r = $opts->{"r"};
my $d = $opts->{"d"};
# Image a is optional; don't really need to refer to it here at all.
# r is optional
# d is mandatory.
unless(defined($d)) {
return 0;
}
return 1;
}
sub usage {
print <<USAGE;
usage: perl main.pl <options>
-d <directory> specify directory in which to find XML files.
-r run the program; process the files
example usage:
# Process files in currect directory.
perl main.pl -d . -r
USAGE
}
main();
T40. Building Complex Data Structures
use strict;
use warnings;
use Data::Dumper;
use Getopt::Std;
use XML::Simple;
$| = 1;
=pod
This is ACME XML parser version 1.0
Use with care.
=cut
sub main {
my %opts;
# Get command line options
getopts('d:r', \%opts);
if(!checkusage(\%opts)) {
usage();
exit();
}
my $input_dir = $opts{"d"};
my @files = get_files($input_dir);
process_files(\@files, $input_dir);
}
sub process_files {
my ($files, $input_dir) = @_;
foreach my $file(@$files) {
my @bands = process_file($file, $input_dir);
print Dumper(@bands);
}
}
sub process_file {
my ($file, $input_dir) = @_;
print "Processing $file in $input_dir ... \n";
my $filepath = "$input_dir/$file";
open(INPUTFILE, $filepath) or die "Unable to open $filepath\n";
undef $/;
my $content = <INPUTFILE>;
close(INPUTFILE);
my $parser = new XML::Simple;
my $dom = $parser->XMLin($content, ForceArray => 1);
my @output;
foreach my $band(@{$dom->{"entry"}}) {
my $band_name = $band->{"band"}->[0];
my $albums = $band->{"album"};
my @albums;
foreach my $album(@$albums) {
my $album_name = $album->{"name"}->[0];
my $chartposition = $album->{"chartposition"}->[0];
push @albums, {
"name" => $album_name,
"position" => $chartposition,
};
}
push @output, {
"name" => $band_name,
"albums" => \@albums,
};
} # foreach band
return @output;
}
sub get_files {
my $input_dir = shift;
unless(opendir(INPUTDIR, $input_dir)) {
die "\nUnable to open directory '$input_dir'\n";
}
my @files = readdir(INPUTDIR);
closedir(INPUTDIR);
@files = grep(/\.xml$/i, @files);
return @files;
}
sub checkusage {
my $opts = shift;
my $r = $opts->{"r"};
my $d = $opts->{"d"};
# Image a is optional; don't really need to refer to it here at all.
# r is optional
# d is mandatory.
unless(defined($d)) {
return 0;
}
return 1;
}
sub usage {
print <<USAGE;
usage: perl main.pl <options>
-d <directory> specify directory in which to find XML files.
-r run the program; process the files
example usage:
# Process files in currect directory.
perl main.pl -d . -r
USAGE
}
main();
Section 6: Working with Databases
T43. Connecting to a Database
use strict;
use warnings;
use Data::Dumper;
use Getopt::Std;
use XML::Simple;
use DBI;
$| = 1;
=pod
This is ACME XML parser version 1.0
Use with care.
=cut
sub main {
my %opts;
# Get command line options
getopts('d:r', \%opts);
if(!checkusage(\%opts)) {
usage();
exit();
}
my $input_dir = $opts{"d"};
my @files = get_files($input_dir);
my @data = process_files(\@files, $input_dir);
print Dumper(@data);
my $dbh = DBI->connect("dbi:mysql:bands", "john", "letmein");
unless(defined($dbh)) {
die "Cannot connect to database.\n";
}
print "Connected\n";
$dbh->disconnect();
}
sub process_files {
my ($files, $input_dir) = @_;
my @data;
foreach my $file(@$files) {
push @data, process_file($file, $input_dir);
}
return @data;
}
sub process_file {
my ($file, $input_dir) = @_;
print "Processing $file in $input_dir ... \n";
my $filepath = "$input_dir/$file";
open(INPUTFILE, $filepath) or die "Unable to open $filepath\n";
undef $/;
my $content = <INPUTFILE>;
close(INPUTFILE);
my $parser = new XML::Simple;
my $dom = $parser->XMLin($content, ForceArray => 1);
my @output;
foreach my $band(@{$dom->{"entry"}}) {
my $band_name = $band->{"band"}->[0];
my $albums = $band->{"album"};
my @albums;
foreach my $album(@$albums) {
my $album_name = $album->{"name"}->[0];
my $chartposition = $album->{"chartposition"}->[0];
push @albums, {
"name" => $album_name,
"position" => $chartposition,
};
}
push @output, {
"name" => $band_name,
"albums" => \@albums,
};
} # foreach band
return @output;
}
sub get_files {
my $input_dir = shift;
unless(opendir(INPUTDIR, $input_dir)) {
die "\nUnable to open directory '$input_dir'\n";
}
my @files = readdir(INPUTDIR);
closedir(INPUTDIR);
@files = grep(/\.xml$/i, @files);
return @files;
}
sub checkusage {
my $opts = shift;
my $r = $opts->{"r"};
my $d = $opts->{"d"};
# Image a is optional; don't really need to refer to it here at all.
# r is optional
# d is mandatory.
unless(defined($d)) {
return 0;
}
return 1;
}
sub usage {
print <<USAGE;
usage: perl main.pl <options>
-d <directory> specify directory in which to find XML files.
-r run the program; process the files
example usage:
# Process files in currect directory.
perl main.pl -d . -r
USAGE
}
main();
T44. Inserting Data into a Database
use strict;
use warnings;
use Data::Dumper;
use Getopt::Std;
use XML::Simple;
use DBI;
$| = 1;
=pod
This is ACME XML parser version 1.0
Use with care.
=cut
sub main {
my %opts;
# Get command line options
getopts('d:r', \%opts);
if(!checkusage(\%opts)) {
usage();
exit();
}
my $input_dir = $opts{"d"};
my @files = get_files($input_dir);
my @data = process_files(\@files, $input_dir);
add_to_database(\@data);
print Dumper(@data);
}
sub add_to_database {
my $data = shift;
my $dbh = DBI->connect("dbi:mysql:bands", "john", "letmein");
unless(defined($dbh)) {
die "Cannot connect to database.\n";
}
print "Connected to database.\n";
my $sth = $dbh->prepare('insert into bands (name) values (?)');
unless($sth) {
die "Error preparing SQL\n";
}
foreach my $data(@$data) {
my $band_name = $data->{"name"};
print "Inserting $band_name into database ...\n";
unless($sth->execute($band_name)) {
die "Error executing SQL\n";
}
}
$sth->finish();
$dbh->disconnect();
print "Completed.\n";
}
sub process_files {
my ($files, $input_dir) = @_;
my @data;
foreach my $file(@$files) {
push @data, process_file($file, $input_dir);
}
return @data;
}
sub process_file {
my ($file, $input_dir) = @_;
print "Processing $file in $input_dir ... \n";
my $filepath = "$input_dir/$file";
open(INPUTFILE, $filepath) or die "Unable to open $filepath\n";
undef $/;
my $content = <INPUTFILE>;
close(INPUTFILE);
my $parser = new XML::Simple;
my $dom = $parser->XMLin($content, ForceArray => 1);
my @output;
foreach my $band(@{$dom->{"entry"}}) {
my $band_name = $band->{"band"}->[0];
my $albums = $band->{"album"};
my @albums;
foreach my $album(@$albums) {
my $album_name = $album->{"name"}->[0];
my $chartposition = $album->{"chartposition"}->[0];
push @albums, {
"name" => $album_name,
"position" => $chartposition,
};
}
push @output, {
"name" => $band_name,
"albums" => \@albums,
};
} # foreach band
return @output;
}
sub get_files {
my $input_dir = shift;
unless(opendir(INPUTDIR, $input_dir)) {
die "\nUnable to open directory '$input_dir'\n";
}
my @files = readdir(INPUTDIR);
closedir(INPUTDIR);
@files = grep(/\.xml$/i, @files);
return @files;
}
sub checkusage {
my $opts = shift;
my $r = $opts->{"r"};
my $d = $opts->{"d"};
# Image a is optional; don't really need to refer to it here at all.
# r is optional
# d is mandatory.
unless(defined($d)) {
return 0;
}
return 1;
}
sub usage {
print <<USAGE;
usage: perl main.pl <options>
-d <directory> specify directory in which to find XML files.
-r run the program; process the files
example usage:
# Process files in currect directory.
perl main.pl -d . -r
USAGE
}
main();
T45. Deleting Data and Executing Dataless SQL Commands
use strict;
use warnings;
use Data::Dumper;
use Getopt::Std;
use XML::Simple;
use DBI;
$| = 1;
=pod
This is ACME XML parser version 1.0
Use with care.
=cut
sub main {
my %opts;
# Get command line options
getopts('d:r', \%opts);
if(!checkusage(\%opts)) {
usage();
exit();
}
my $input_dir = $opts{"d"};
my @files = get_files($input_dir);
my @data = process_files(\@files, $input_dir);
add_to_database(\@data);
print Dumper(@data);
}
sub add_to_database {
my $data = shift;
my $dbh = DBI->connect("dbi:mysql:bands", "john", "letmein");
unless(defined($dbh)) {
die "Cannot connect to database.\n";
}
print "Connected to database.\n";
my $sth = $dbh->prepare('insert into bands (name) values (?)');
unless($sth) {
die "Error preparing SQL\n";
}
$dbh->do('delete from bands') or die "Can't clean bands table.\n";
$dbh->do('delete from albums') or die "Can't clean bands table.\n";
foreach my $data(@$data) {
my $band_name = $data->{"name"};
print "Inserting $band_name into database ...\n";
unless($sth->execute($band_name)) {
die "Error executing SQL\n";
}
}
$sth->finish();
$dbh->disconnect();
print "Completed.\n";
}
sub process_files {
my ($files, $input_dir) = @_;
my @data;
foreach my $file(@$files) {
push @data, process_file($file, $input_dir);
}
return @data;
}
sub process_file {
my ($file, $input_dir) = @_;
print "Processing $file in $input_dir ... \n";
my $filepath = "$input_dir/$file";
open(INPUTFILE, $filepath) or die "Unable to open $filepath\n";
undef $/;
my $content = <INPUTFILE>;
close(INPUTFILE);
my $parser = new XML::Simple;
my $dom = $parser->XMLin($content, ForceArray => 1);
my @output;
foreach my $band(@{$dom->{"entry"}}) {
my $band_name = $band->{"band"}->[0];
my $albums = $band->{"album"};
my @albums;
foreach my $album(@$albums) {
my $album_name = $album->{"name"}->[0];
my $chartposition = $album->{"chartposition"}->[0];
push @albums, {
"name" => $album_name,
"position" => $chartposition,
};
}
push @output, {
"name" => $band_name,
"albums" => \@albums,
};
} # foreach band
return @output;
}
sub get_files {
my $input_dir = shift;
unless(opendir(INPUTDIR, $input_dir)) {
die "\nUnable to open directory '$input_dir'\n";
}
my @files = readdir(INPUTDIR);
closedir(INPUTDIR);
@files = grep(/\.xml$/i, @files);
return @files;
}
sub checkusage {
my $opts = shift;
my $r = $opts->{"r"};
my $d = $opts->{"d"};
# Image a is optional; don't really need to refer to it here at all.
# r is optional
# d is mandatory.
unless(defined($d)) {
return 0;
}
return 1;
}
sub usage {
print <<USAGE;
usage: perl main.pl <options>
-d <directory> specify directory in which to find XML files.
-r run the program; process the files
example usage:
# Process files in currect directory.
perl main.pl -d . -r
USAGE
}
main();
T46. Getting the IDs of Records You’ve Just Inserted
use strict;
use warnings;
use Data::Dumper;
use Getopt::Std;
use XML::Simple;
use DBI;
$| = 1;
=pod
This is ACME XML parser version 1.0
Use with care.
=cut
sub main {
my %opts;
# Get command line options
getopts('d:r', \%opts);
if(!checkusage(\%opts)) {
usage();
exit();
}
my $input_dir = $opts{"d"};
my @files = get_files($input_dir);
my @data = process_files(\@files, $input_dir);
add_to_database(\@data);
# print Dumper(@data);
}
sub add_to_database {
my $data = shift;
my $dbh = DBI->connect("dbi:mysql:bands", "john", "letmein");
unless(defined($dbh)) {
die "Cannot connect to database.\n";
}
print "Connected to database.\n";
my $sth_bands = $dbh->prepare('insert into bands (name) values (?)');
my $sth_albums = $dbh->prepare('insert into albums (name, position, band_id) values (?, ?, ?)');
unless($sth_bands) {
die "Error preparing band insert SQL\n";
}
unless($sth_albums) {
die "Error preparing album insert SQL\n";
}
$dbh->do('delete from albums') or die "Can't clean bands table.\n";
$dbh->do('delete from bands') or die "Can't clean bands table.\n";
foreach my $data(@$data) {
my $band_name = $data->{"name"};
my $albums = $data->{"albums"};
print "Inserting $band_name into database ...\n";
unless($sth_bands->execute($band_name)) {
die "Error executing SQL\n";
}
my $band_id = $sth_bands->{'mysql_insertid'};
foreach my $album(@$albums) {
my $album_name = $album->{"name"};
my $album_position = $album->{"position"};
# print "$album_name, $album_position\n";
unless($sth_albums->execute($album_name, $album_position, $band_id)) {
die "Unlable to execute albums insert.\n";
}
}
}
$sth_bands->finish();
$sth_albums->finish();
$dbh->disconnect();
print "Completed.\n";
}
sub process_files {
my ($files, $input_dir) = @_;
my @data;
foreach my $file(@$files) {
push @data, process_file($file, $input_dir);
}
return @data;
}
sub process_file {
my ($file, $input_dir) = @_;
print "Processing $file in $input_dir ... \n";
my $filepath = "$input_dir/$file";
open(INPUTFILE, $filepath) or die "Unable to open $filepath\n";
undef $/;
my $content = <INPUTFILE>;
close(INPUTFILE);
my $parser = new XML::Simple;
my $dom = $parser->XMLin($content, ForceArray => 1);
my @output;
foreach my $band(@{$dom->{"entry"}}) {
my $band_name = $band->{"band"}->[0];
my $albums = $band->{"album"};
my @albums;
foreach my $album(@$albums) {
my $album_name = $album->{"name"}->[0];
my $chartposition = $album->{"chartposition"}->[0];
push @albums, {
"name" => $album_name,
"position" => $chartposition,
};
}
push @output, {
"name" => $band_name,
"albums" => \@albums,
};
} # foreach band
return @output;
}
sub get_files {
my $input_dir = shift;
unless(opendir(INPUTDIR, $input_dir)) {
die "\nUnable to open directory '$input_dir'\n";
}
my @files = readdir(INPUTDIR);
closedir(INPUTDIR);
@files = grep(/\.xml$/i, @files);
return @files;
}
sub checkusage {
my $opts = shift;
my $r = $opts->{"r"};
my $d = $opts->{"d"};
# Image a is optional; don't really need to refer to it here at all.
# r is optional
# d is mandatory.
unless(defined($d)) {
return 0;
}
return 1;
}
sub usage {
print <<USAGE;
usage: perl main.pl <options>
-d <directory> specify directory in which to find XML files.
-r run the program; process the files
example usage:
# Process files in currect directory.
perl main.pl -d . -r
USAGE
}
main();
T47. Querying Databases
use strict;
use warnings;
use Data::Dumper;
use Getopt::Std;
use XML::Simple;
use DBI;
$| = 1;
=pod
This is ACME XML parser version 1.0
Use with care.
=cut
sub main {
my %opts;
# Get command line options
getopts( 'i:e', \%opts );
if ( !checkusage( \%opts ) ) {
usage();
exit();
}
my $dbh = DBI->connect( "dbi:mysql:bands", "john", "letmein" );
unless ( defined($dbh) ) {
die "Cannot connect to database.\n";
}
print "Connected to database.\n";
if ( $opts{"i"} ) {
my $input_dir = $opts{"i"};
my @files = get_files($input_dir);
my @data = process_files( \@files, $input_dir );
print "Found " . scalar(@files) . " files\n";
add_to_database( $dbh, \@data );
}
if ( $opts{"e"} ) {
export_from_database($dbh);
}
# print Dumper(@data);
$dbh->disconnect();
print "Completed.\n";
}
sub export_from_database {
my $dbh = shift;
print "Exporting ...\n";
my $sql = 'select b.id as band_id, b.name as band_name, a.id as album_id, ' .
'a.name as album_name, a.position as position ' .
'from bands b join albums a on a.band_id=b.id';
my $sth = $dbh->prepare($sql);
unless(defined($sth)) {
die "Unable to prepare export query.\n";
}
unless($sth->execute()) {
die "Unable to execute export query.\n";
}
while(my $row = $sth->fetchrow_hashref()) {
my $band_id = $row->{"band_id"};
my $band_name = $row->{"band_name"};
my $album_id = $row->{"album_id"};
my $album_name = $row->{"album_name"};
my $position = $row->{"position"};
print "$band_id, $band_name, $album_id, $album_name, $position\n";
}
$sth->finish();
}
sub add_to_database {
my ( $dbh, $data ) = @_;
my $sth_bands = $dbh->prepare('insert into bands (name) values (?)');
my $sth_albums = $dbh->prepare(
'insert into albums (name, position, band_id) values (?, ?, ?)');
unless ($sth_bands) {
die "Error preparing band insert SQL\n";
}
unless ($sth_albums) {
die "Error preparing album insert SQL\n";
}
$dbh->do('delete from albums') or die "Can't clean bands table.\n";
$dbh->do('delete from bands') or die "Can't clean bands table.\n";
foreach my $data (@$data) {
my $band_name = $data->{"name"};
my $albums = $data->{"albums"};
print "Inserting $band_name into database ...\n";
unless ( $sth_bands->execute($band_name) ) {
die "Error executing SQL\n";
}
my $band_id = $sth_bands->{'mysql_insertid'};
foreach my $album (@$albums) {
my $album_name = $album->{"name"};
my $album_position = $album->{"position"};
# print "$album_name, $album_position\n";
unless (
$sth_albums->execute( $album_name, $album_position, $band_id ) )
{
die "Unlable to execute albums insert.\n";
}
}
}
$sth_bands->finish();
$sth_albums->finish();
}
sub process_files {
my ( $files, $input_dir ) = @_;
my @data;
foreach my $file (@$files) {
push @data, process_file( $file, $input_dir );
}
return @data;
}
sub process_file {
my ( $file, $input_dir ) = @_;
print "Processing $file in $input_dir ... \n";
my $filepath = "$input_dir/$file";
open( INPUTFILE, $filepath ) or die "Unable to open $filepath\n";
undef $/;
my $content = <INPUTFILE>;
close(INPUTFILE);
my $parser = new XML::Simple;
my $dom = $parser->XMLin( $content, ForceArray => 1 );
my @output;
foreach my $band ( @{ $dom->{"entry"} } ) {
my $band_name = $band->{"band"}->[0];
my $albums = $band->{"album"};
my @albums;
foreach my $album (@$albums) {
my $album_name = $album->{"name"}->[0];
my $chartposition = $album->{"chartposition"}->[0];
push @albums,
{
"name" => $album_name,
"position" => $chartposition,
};
}
push @output,
{
"name" => $band_name,
"albums" => \@albums,
};
} # foreach band
return @output;
}
sub get_files {
my $input_dir = shift;
unless ( opendir( INPUTDIR, $input_dir ) ) {
die "\nUnable to open directory '$input_dir'\n";
}
my @files = readdir(INPUTDIR);
closedir(INPUTDIR);
@files = grep( /\.xml$/i, @files );
return @files;
}
sub checkusage {
my $opts = shift;
my $i = $opts->{"i"};
my $e = $opts->{"e"};
unless ( defined($i) or defined($e) ) {
return 0;
}
return 1;
}
sub usage {
print <<USAGE;
usage: perl main.pl <options>
-i <directory> import data; specify directory in which to find XML files.
-e export data from database
example usage:
# Process files in currect directory.
perl main.pl -i .
perl main.pl -e
USAGE
}
main();
T48. Exporting Data
use strict;
use warnings;
use Data::Dumper;
use Getopt::Std;
use XML::Simple;
use DBI;
$| = 1;
=pod
This is ACME XML parser version 1.0
Use with care.
=cut
sub main {
my %opts;
# Get command line options
getopts( 'i:e', \%opts );
if ( !checkusage( \%opts ) ) {
usage();
exit();
}
my $dbh = DBI->connect( "dbi:mysql:bands", "john", "letmein" );
unless ( defined($dbh) ) {
die "Cannot connect to database.\n";
}
print "Connected to database.\n";
if ( $opts{"i"} ) {
my $input_dir = $opts{"i"};
my @files = get_files($input_dir);
my @data = process_files( \@files, $input_dir );
print "Found " . scalar(@files) . " files\n";
add_to_database( $dbh, \@data );
}
if ( $opts{"e"} ) {
export_from_database($dbh);
}
# print Dumper(@data);
$dbh->disconnect();
print "Completed.\n";
}
sub export_from_database {
my $dbh = shift;
print "Exporting ...\n";
my $output_file = "output.txt";
open OUTPUT, '>'.$output_file or die "Cannot create output file $output_file.\n";
my $sql = 'select b.id as band_id, b.name as band_name, a.id as album_id, ' .
'a.name as album_name, a.position as position ' .
'from bands b join albums a on a.band_id=b.id';
my $sth = $dbh->prepare($sql);
unless(defined($sth)) {
die "Unable to prepare export query.\n";
}
unless($sth->execute()) {
die "Unable to execute export query.\n";
}
while(my $row = $sth->fetchrow_hashref()) {
my $band_id = $row->{"band_id"};
my $band_name = $row->{"band_name"};
my $album_id = $row->{"album_id"};
my $album_name = $row->{"album_name"};
my $position = $row->{"position"};
print OUTPUT "$band_id, $band_name, $album_id, $album_name, $position\n";
}
$sth->finish();
print "Export completed to $output_file\n";
close OUTPUT;
}
sub add_to_database {
my ( $dbh, $data ) = @_;
my $sth_bands = $dbh->prepare('insert into bands (name) values (?)');
my $sth_albums = $dbh->prepare(
'insert into albums (name, position, band_id) values (?, ?, ?)');
unless ($sth_bands) {
die "Error preparing band insert SQL\n";
}
unless ($sth_albums) {
die "Error preparing album insert SQL\n";
}
$dbh->do('delete from albums') or die "Can't clean bands table.\n";
$dbh->do('delete from bands') or die "Can't clean bands table.\n";
foreach my $data (@$data) {
my $band_name = $data->{"name"};
my $albums = $data->{"albums"};
print "Inserting $band_name into database ...\n";
unless ( $sth_bands->execute($band_name) ) {
die "Error executing SQL\n";
}
my $band_id = $sth_bands->{'mysql_insertid'};
foreach my $album (@$albums) {
my $album_name = $album->{"name"};
my $album_position = $album->{"position"};
# print "$album_name, $album_position\n";
unless (
$sth_albums->execute( $album_name, $album_position, $band_id ) )
{
die "Unlable to execute albums insert.\n";
}
}
}
$sth_bands->finish();
$sth_albums->finish();
}
sub process_files {
my ( $files, $input_dir ) = @_;
my @data;
foreach my $file (@$files) {
push @data, process_file( $file, $input_dir );
}
return @data;
}
sub process_file {
my ( $file, $input_dir ) = @_;
print "Processing $file in $input_dir ... \n";
my $filepath = "$input_dir/$file";
open( INPUTFILE, $filepath ) or die "Unable to open $filepath\n";
undef $/;
my $content = <INPUTFILE>;
close(INPUTFILE);
my $parser = new XML::Simple;
my $dom = $parser->XMLin( $content, ForceArray => 1 );
my @output;
foreach my $band ( @{ $dom->{"entry"} } ) {
my $band_name = $band->{"band"}->[0];
my $albums = $band->{"album"};
my @albums;
foreach my $album (@$albums) {
my $album_name = $album->{"name"}->[0];
my $chartposition = $album->{"chartposition"}->[0];
push @albums,
{
"name" => $album_name,
"position" => $chartposition,
};
}
push @output,
{
"name" => $band_name,
"albums" => \@albums,
};
} # foreach band
return @output;
}
sub get_files {
my $input_dir = shift;
unless ( opendir( INPUTDIR, $input_dir ) ) {
die "\nUnable to open directory '$input_dir'\n";
}
my @files = readdir(INPUTDIR);
closedir(INPUTDIR);
@files = grep( /\.xml$/i, @files );
return @files;
}
sub checkusage {
my $opts = shift;
my $i = $opts->{"i"};
my $e = $opts->{"e"};
unless ( defined($i) or defined($e) ) {
return 0;
}
return 1;
}
sub usage {
print <<USAGE;
usage: perl main.pl <options>
-i <directory> import data; specify directory in which to find XML files.
-e export data from database
example usage:
# Process files in currect directory.
perl main.pl -i .
perl main.pl -e
USAGE
}
main();
Section 8: Modules and OO Perl
T52. Modules
# main.pl
use strict;
use warnings;
use Data::Dumper;
use Speak qw(test greet);
$|=1;
sub main {
#Speak::test();
test();
greet();
#my @dogs = qw(retriever labrador alsatian);
#print Dumper(@dogs);
}
main();
# Speak.pm
package Speak;
use Exporter qw(import);
@EXPORT_OK = qw(test greet);
# @EXPORT = qw(test);
sub test {
print "Hello there.\n";
}
sub greet {
print "Hey, how's it goin?\n";
}
1;
T53. Packages and Directories
# main.pl
use strict;
use warnings;
use Data::Dumper;
use lib '/Users/johnwpurcell/Documents/work/perl/projects/modules';
use Communication::Speak qw(test greet);
$|=1;
sub main {
test();
greet();
}
main();
# Speak.pm
package Communication::Speak;
use Exporter qw(import);
@EXPORT_OK = qw(test greet);
sub test {
print "Hello there.\n";
}
sub greet {
print "Hey, how's it goin?\n";
}
1;
T55. Implementing OO in Perl
# main.pl
use strict;
use warnings;
use Data::Person;
$|=1;
sub main {
my $person1 = new Data::Person("Bob", 45);
$person1->greet("Sue");
my $person2 = new Data::Person("Mike", 55);
$person2->greet("Rogriguez");
}
main();
# Person.pm
package Data::Person;
sub new {
my $class = shift;
my $self = {
"name" => shift,
"age" => shift,
};
bless($self, $class);
return $self;
}
sub greet {
my ($self, $other) = @_;
print "Hello $other; my name is " . $self->{"name"} . "; I am " . $self->{"age"} . " years old.\n";
}
1;
Section 9: Web Application Basics
57. A Hello World Web App
#!/opt/local/bin/perl
use strict;
use warnings;
sub main {
print "Content-type: text/html\n\n";
print "Hello world";
}
main();
T59. Using URL Parameters
#!/opt/local/bin/perl
use strict;
use warnings;
use CGI;
my $CGI = new CGI();
sub main {
print $CGI->header();
my $user = $CGI->param("user");
my $password = $CGI->param("pass");
print<<HTML;
<html>
<b>Hello world</b>
User: $user, Pass: $password
</html>
HTML
}
main();
T60. Website Forms
#!/opt/local/bin/perl
use strict;
use warnings;
use CGI;
my $CGI = new CGI();
sub main {
print $CGI->header();
my @query = $CGI->param();
@query = map($_ . ": " . $CGI->param($_), @query);
my $query = join(', ', @query);
print<<HTML;
<html>
<form action="test4.cgi" method="post">
<input type="text" name="query" />
<input type="hidden" name="go" value="true" />
<input type="submit" name="submit" value="Go" />
</form>
<p>Last submitted: $query</p>
</html>
HTML
}
main();
Section 10: Basic Sysadmin Tasks
T61. Moving, Copying and Deleting Tasks
use strict;
use warnings;
use File::Copy;
$|=1;
sub main {
if(move(
'/Users/johnwpurcell/Documents/work/perl/projects/Tutorial61 - Moving and Copying Files/logo.png', 'logo2.png')){
print "One file moved.\n";
}
else {
print "Unable to move file\n";
}
unlink('logo2.png');
}
main();
T62. Executing System Commands
use strict;
use warnings;
$|=1;
sub main {
my $command = 'cd ..; ls -l';
my @output = `$command`;
print join('', @output);
}
main();
T67. References to Hashes and Arrays Review
use strict;
use warnings;
$|=1;
sub main {
my @fruits = ("apple", "banana", "orange");
my %months = (
"Jan" => 1,
"Feb" => 2,
);
print $fruits[0] . "\n";
$fruits[3] = "kiwi";
print $months{"Jan"}. "\n";
$months{"Mar"} = 3;
my $fruits_ref = \@fruits;
print $fruits_ref->[0] . "\n";
my $months_ref = \%months;
print $months_ref->{"Jan"}. "\n";
foreach my $fruit(@$fruits_ref) {
print "$fruit\n";
}
# while( my ($key, $value) = each %{$months_ref})
# curly brackets are optional
while( my ($key, $value) = each %$months_ref) {
print "$key - $value\n";
}
}
main();