R Tutorials

Exclude missing variables from analysis

1
2
3
x <- c(5,7,8,NA,5)
mean(x) # is NA
mean (x, na.rm=TRUE) # returns correct answer

Recode a variable

1
2
# Select rows where age is 35 and change it to 55
mydata[mydata$age==35, "age"] <-55

Opening a File

1
2
3
4
5
6
7
8
9
10
setwd("/Users/clsnyder/R")
nec<- read.csv(file="NEC.csv",head=TRUE,sep=",")
attach(nec)
library(ggplot2)
 
#when done, use detach(nec)
 
# note, this is a better way
rm(list=ls()) 
link <- choose.files()

Common Useful functions

1
2
3
4
5
6
7
8
9
10
11
12
13
14
print()    # Prints a single R object
cat()      # Prints multiple objects, one after the other
length()   # Number of elements in a vector or of a list
mean()
median()
range()
unique()   # Gives the vector of distinct values
diff()     # Replace a vector by the vector of first differences
             # N. B. diff(x) has one less element than x
sort()     # Sort elements into order, but omitting NAs
order() # x[order(x)] orders elements of x, with NAs last
cumsum()
cumprod()
rev()      # reverse the order of vector element

Plotting a cumulative frequency distribution

1
2
3
4
5
6
7
8
9
10
# a variable called 'duration' is assigned to the baby's age at the time of diagnosis of NEC
duration = DaysatDx
#'breaks' are basically the days which we will use from 0 to 100; we will use 1 day as the unit
breaks = seq(0,100, by=1.0)
duration.cut=cut(duration,breaks, right=FALSE)
duration.freq = table(duration.cut)
cumfreq0=c(0,cumsum(duration.freq))
plot(breaks, cumfreq0)
# to get the cumulative frequency versus the log:
plot(log(breaks), cumfreq0)

Scatterplot with ggplot2

1
2
3
qplot(EGA, DaysatDx, col=as.factor(Term))
#note - to manipulate the dataframe as if it were a spreadsheet, use
nec <-edit(nec)

Split data by factors and do calculations on each subset

“By Function” is “an object-oriented wrapper for ‘tapply’ applied to data frames.”

1
2
3
4
5
6
7
8
attach(iris)
head(iris)
# Calculate the mean of the first 4 variables by species:
by(iris[, 1:4], Species, mean)
 
# in the bnames database, calculate mean percent by gender:
# Note that percent values are in the third column
by(bnames [,3], bnames$sex, mean)

Overlapping density plots

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
library(ggplot2)
setwd("/Users/clsnyder/R")
pectus<- read.csv(file="pectus.csv",head=TRUE,sep=",")
attach(pectus)
names(pectus)
controls = subset(pectus, affected==0)
disease = subset(pectus, affected==1)
## qplot(controls$haller_idx)
## qplot(disease$haller_idx)
qplot(disease$haller_idx) + geom_histogram(colour = "darkgreen", fill = "white", binwidth = 0.5) + geom_vline(xintercept = 3.25, colour="red")
# test
ggplot(pectus, aes(pectus$haller_idx, fill = as.factor(pectus$affected))) + geom_density(alpha = 0.2)
# With axis labels
ggplot(pectus, aes(pectus$haller_idx, fill = as.factor(pectus$affected))) + geom_density(alpha = 0.2) + geom_vline(xintercept = 3.25, colour="red") +labs(x="Haller Index", y="Num Patients")
# No legend
ggplot(pectus, aes(pectus$haller_idx, fill = as.factor(pectus$affected))) + geom_density(alpha = 0.2) + geom_vline(xintercept = 3.25, colour="red") +labs(x="Haller Index", y="Num Patients") + opts(legend.position="none")
# Now for the correction index
ggplot(pectus, aes(pectus$correct_idx, fill = as.factor(pectus$affected))) + geom_density(alpha = 0.2) + geom_vline(xintercept = 10, colour="red") +labs(x="Correction Index", y="Num Patients") + opts(legend.position="none")

Create a custom function

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# load in your data, attach and check it
 ghost<- read.csv(file="C:/Documents and Settings/csnyder/Desktop/ghostcsv.csv", head=TRUE,sep=",")
attach(ghost)
 
# since the 'misrepresent' column = 0 for no instance of misrepresentation, but may have 1 or more misrepresentations, we need to create a binary (T,F or 1,0) category:
liar <- ifelse(misrepresent=="0",c(0),c(1))
# now t test it:
t.test(liar~pub.verified)
# look at the boxplot
boxplot(Num_PR_Pubs~liar)
# since we will want to t test a zillion things, lets make a function (user-defined function)
testfx <- function(x){
+ result <- t.test(x~liar) 
+ return(result)}
> testfx(Num_Book_Chap)
#works!

Excel code to analyze ACSPBLS Data

Import from the ACSPBLS Site

* Introduction
Many pediatric surgeons are now storing or will soon be storing a record of their operative cases on the American College of Surgeons Database, located here

It is useful to be able to analyze your own data, for volume, RVU’s over time, etc. This brief tutorial will show you how to download your stored data, run a single macro on MS Excel 2007 (It should work on Excel 2003), and create a graphic of the result.

* Description
1. Go to the site and sign in
2. On the main page, select Cases > Export
3. A new page will open. Select “All Dates”, and “Exclude ICD/CPT Descriptions”
4. Hit “Submit”
5. Save the resultant CSV (=comma separated values) file as “cases-dump.csv”, where you can find it
6. Download this file of RVU’s for each CPT code
7. Open MS Excel and open this file; rename the worksheet tab the data is on as “rvu”
8. Save the file as a “Macro-enabled” Workbook
9. Click the circular windows icon in the left upper corner of the worksheet – there is a vertical menu “New”, “Open”, “Save”, etc; at the bottom are two rectangular buttons – click on “Excel Options”
10. Select “Popular” if not already open, and pick the “Show developer tab in the ribbon”; click OK
11. Now click on the new Developer tabbed menu at the top right
12. Click on the Macro button
13. Type in “import” in the name box, and click “create”
14. A new Visual basic window will open, with this filled in:

1
2
3
Sub import()
 
End Sub

15. Delete this and past in the code from here:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
Sub import()
	'imports from the
	Dim LastRow As Long
    Dim LastColumn As Long
    Dim FName As Variant
 
    'Query for the file to open
    FName = Application.GetOpenFilename()
    If FName = False Then
        MsgBox "You didn't choose a file"
    Else
        'MsgBox FName
    End If
 
    'Add a new sheet and import the csv data
    Sheets.Add
    ActiveSheet.Name = "cases-dump"
    Range("A1").Select
    With ActiveSheet.QueryTables.Add(Connection:= _
        "TEXT;" & FName, _
        Destination:=Range("$A$1"))
        .Name = "cases-dump"
        .FieldNames = True
        .RowNumbers = False
        .FillAdjacentFormulas = False
        .PreserveFormatting = True
        .RefreshOnFileOpen = False
        .RefreshStyle = xlInsertDeleteCells
        .SavePassword = False
        .SaveData = True
        .AdjustColumnWidth = True
        .RefreshPeriod = 0
        .TextFilePromptOnRefresh = False
        .TextFilePlatform = 437
        .TextFileStartRow = 1
        .TextFileParseType = xlDelimited
        .TextFileTextQualifier = xlTextQualifierDoubleQuote
        .TextFileConsecutiveDelimiter = False
        .TextFileTabDelimiter = True
        .TextFileSemicolonDelimiter = False
        .TextFileCommaDelimiter = True
        .TextFileSpaceDelimiter = False
        .TextFileColumnDataTypes = Array(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, _
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
        .TextFileTrailingMinusNumbers = True
        .Refresh BackgroundQuery:=False
    End With
 
    'Find the last Filled row of the range
    If WorksheetFunction.CountA(Cells) > 0 Then
        'Search for any entry, by searching backwards by Rows.
        LastRow = Cells.Find(What:="*", After:=[A1], SearchOrder:=xlByRows, SearchDirection:=xlPrevious).Row
        MsgBox "Importing:  " & LastRow & "  Operations"
    End If
    Range("A:D,F:F,G:G,H:I,K:M,O:P,V:AI,AO:AZ").Select
    Selection.delete Shift:=xlToLeft
    Columns("A:A").Select
    Selection.Insert Shift:=xlToRight, CopyOrigin:=xlFormatFromLeftOrAbove
    Columns("C:C").Select
    Selection.Cut
    Columns("A:A").Select
    ActiveSheet.Paste
    Columns("B:B").EntireColumn.AutoFit
    Columns("C:C").Select
    Selection.delete Shift:=xlToLeft
    Selection.Cut
    Columns("N:N").Select
    ActiveSheet.Paste
    Columns("C:C").Select
    Selection.delete Shift:=xlToLeft
    Range("N1").Select
    ActiveCell.FormulaR1C1 = "rvu"
    Range("N2").Select
    ActiveCell.FormulaR1C1 = "=VLOOKUP(RC[-6],rvu!R1C[-13]:R7240C[-12],2)"
    Range("N2").Select
    Selection.AutoFill Destination:=Range("N2:N" & LastRow), Type:=xlFillDefault
	Columns("N:N").Select
    Selection.Copy
    Columns("O:O").Select
    Selection.PasteSpecial Paste:=xlPasteValues, Operation:=xlNone, SkipBlanks _
        :=False, Transpose:=False
    Columns("N:N").Select
    Application.CutCopyMode = False
    Selection.delete Shift:=xlToLeft
    Selection.Replace What:="#N/A", Replacement:="0", LookAt:=xlPart, _
        SearchOrder:=xlByRows, MatchCase:=False, SearchFormat:=False, _
        ReplaceFormat:=False
        Range("A6").Select
    	LastRow = Cells.Find(What:="*", After:=[A1], SearchOrder:=xlByRows, SearchDirection:=xlPrevious).Row
	LastColumn = Cells.Find(What:="*", After:=[A1], SearchOrder:=xlByColumns, SearchDirection:=xlPrevious).Column 
 
    Sheets.Add.Name = "pivot"
    ActiveWorkbook.PivotCaches.Create(SourceType:=xlDatabase, SourceData:= _
        "cases-dump!R1C1:R" & LastRow & "C" & LastColumn, Version:=xlPivotTableVersion12).CreatePivotTable _
        TableDestination:="pivot!R3C1", TableName:="PivotTable1", DefaultVersion _
        :=xlPivotTableVersion12
    Sheets("pivot").Select
    Cells(3, 1).Select
    With ActiveSheet.PivotTables("PivotTable1").PivotFields("Procedure Date")
        .Orientation = xlRowField
        .Position = 1
    End With
    ActiveSheet.PivotTables("PivotTable1").AddDataField ActiveSheet.PivotTables( _
        "PivotTable1").PivotFields("rvu"), "Sum of rvu", xlSum
    Range("A6").Select
    Selection.Group Start:=True, End:=True, Periods:=Array(False, False, False, _
        False, True, False, True)
    With ActiveSheet.PivotTables("PivotTable1").PivotFields("Years")
        .Orientation = xlColumnField
        .Position = 1
    End With
End Sub

16. Save and Close the Visual basic window
17. Click anywhere (single-click) in the rvu sheet
18. Run the “import” macro from the Developer tab
19. You should see this:

table_report

20. You can click in the table and then select the “Options” tab, and a column graph to get this to get this (RVU’s generated by month and year):

Op Data July 2009

21. Good Luck!

Ruby Create Dictionary.html from double colon files

This takes a text file of the form below and creates an html dictionary version with a TOC :


Ambiguous Genitalia

Overview:: Four major defects can cause gender confusion in the neonate: 1) female pseudohermaphroditism or adrenogentital syndrome, 2) male pseudohermaphroditism, 3) true hermaphroditism, and 4) mixed gonadal dysgenesis. A physical examination and a chromosome analysis can make the correct diagnosis with approximately 90% accuracy. Gonadal symmetry is sought for - if there is symmetry in the gonadal findings, then the diagnosis is most likely a causative agent applied equally to both sides, such as an androgenized genetic female (=female pseudohermaphroditism) or an incompletely virilized genetic male (= male pseudohermaphroditism). Asymmetry refers to position of one gonad to the other, relative to the external inguinal ring. A buccal smear is felt by some to be useful in that Barr or Chromatin bodies (indicative of the presence of the second female X chromosome) may be found. Therefore: 1) Chromatin Positive, Symmetry = Fem pseudoherm; 2) Chromatin Positive, Asymmetry = True herm; 3) Chromatin Negative,Symmetry = Male pseudoherm; 5) Chromatin Negative, Asymmetry = Mixed gonad dysgen.

Caveats:: Genetic females recognized in the neonatal period historically were raised as female no matter how virilized. This is not currently the case. In genetic males the gender assignment was traditionally based on the phallus size - anyone with an 'inadequate' phallus is raised as a female. What constitutes an inadequate phallus ? Absolute values are impossible, but the measurements are made from the dorsum to the tip of the stretched glans. The following are 'inadequate': < 2.5 cm in the term infant; < 2.0 cm in the 34 week premie; < 1.5 cm in the 30 week preemie. Additionally, a thin (< 1 to 1.5 cm diameter) phallus is of concern. An elevated or normal MIS levels signifies a functioning testis.

Clinical:: The following work-up of the child with AG (ambiguous genitalia) may be indicated:H & P, HCG stimulation and enzyme ratios, Family pedigree, Genitogram, Maternal drug history, Cystoscopy, Buccal smear with Y fluorescence, Laparoscopy, Urinary steroids, Gonadal biopsy (longitudinal), Electrolytes,Androgen receptors (genital skin).

Female pseudohermaphroditism - Description:: (approximately 1/3 of all patients). This refers to 46, XX karyotype patients with severe masculinization secondary to exposure in utero to either endogenous or exogenous androgens.

Female pseudohermaphroditism - Etiology::The most common cause is the adrenogenital syndrome (CAH). CAH is the only cause of ambiguous genitalia that is life-threatening in the newborn period. 95% of these patients will have a 21-hydroxlyase deficiency. The gene for 21 hydroxylase has 2 alleles coded for in the Class II MHComplex, and there is therefore an A and B form of 21 OHase deficiency - the common form is the B variant. Mutations in the A genes result in milder, later onset forms of the disease. 21 hydroxylase deficiency results in aldosterone (mineralocorticoid) and cortisol (glucocorticoid) deficiencies, resulting in salt-wasting, hypotension, and adrenal crises. Other etiologies include 3 beta and 11 hydroxylase deficiencies. The 21, 3, and 11 are all forms of congenital adrenal hyperplasia (CAH). 11Beta OHase deficiency results in mild virilism (in affected females) and hypertension. In this variant, both mineralocorticoid and steroid replacement should be provided, since steroids alone may suppress the mineralocorticoid.

Female pseudohermaphroditism - Evaluation::The prenatal diagnosis of CAH can be made from DNA analysis of CVS (chorionic villous sampling) or amniotic fluid hormone levels during or after the 2nd trimester. The best way to test for CAH is to look for elevated precursors: {Deficiency,Elevated Precursor,Frequency} = [21 Hydroxylase; 17 hydroxyprogesterone; 95% cases] and [3BetaSteroid dehydrogenase; 17 hydroxypregnenolone: < 5% cases] and [11 Beta hydroxylase; 11 deoxycortisol; rare]. Clinically, prompt identification of patients with CAH is critical. Virilization is present in the female, and in the male only barely identifiable hyperpigmentation of the scrotum may be present. The history of an early or unexplained death in a sibling should alert one to the possibility. A filter-paper test for the most common defect (test is for 17 hydroxyprogesterone) is available. ECG findings of peaked T waves or arrhythmia's may be the first sign. Internal structures are normal, and only the external structures are affected (this is true because the adrenal develops after the 11th week of gestation when the internal structures are already formed). All genetically female patients have the potential to be fertile and should be raised as females. The phenotype is variable from very male appearing to milder forms. The life-threatening electrolyte abnormalities (hyperkalemia, hyponatremia) may not occur until 3 - 5 days after birth, and the adrenal crisis may not occur until 1-2 weeks of age.

Female pseudohermaphroditism - Treatment:: Can even be preventative, if CAH is identified prenatally - dexamethasone is given (1 - 1.5 mg, daily in 2 divided doses) to the mother, starting at the 6 - 7th week of gestation (before sexual differentiation). CVS is used to assess therapy - if the fetus is genetically male, Tx is stopped, if the fetus is female, Tx is continued until amniocentesis during 2nd trimester to confirm the Dx. Fetoscopy has been used to evaluate the external genitalia. Medical management of CAH consists of provision of glucocorticoid (hydrocortisone, 12.5 -15 mg/M2 q 8 hrs, provision of mineralocorticoid (Fludrocortisone 0.1mg q am), hydration, correction of Na, K+ abnormalities. All patients with female pseudohermaphroditism should be raised as females. Surgically, clitoral recession is done in the early neonatal period, and a cutback or flap vaginoplasty is usually done at 3 - 6 months of age. A pull-through vaginoplasty is usually delayed until about 2 years of age. Which of these is required depends on the level of entry of the vagina into the urogenital sinus.

Male Pseudohermaphroditism - General:: Accounts for approximately 1/3 of all patients. These patients have a 46 X,Y karyotype but deficient masculinization of the external genitalia. The effects of androgens on sexual differentiation are via receptors in the nucleus.

etc.......

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
p "Enter a title"
title  = gets.chomp
dict = {}
IO.foreach(title + ".txt") do
|x| 
if
y = /::/.match(x)
y = y.pre_match
z=/::/.match(x)
z = z.post_match
dict[y] = z
end
end
File::open(title + '.htm', 'w') do |f|
	f.puts "<HTML>\n<link href='Level3_3.css' rel='stylesheet' type='text/css'><HEAD><TITLE>" + title + "</TITLE></HEAD>"
	f.puts "<CENTER><A NAME=\"page_top\"><H1>" + title + "</H1></A>"	
  	f.puts "by Charles L. Snyder, MD<BR>\n"
	f.puts "</CENTER>\n<HR>\n"
	dict.keys.sort.each {|s| f.puts "<br>#{s}</br>"}
	f.puts "</CENTER>\n<HR>\n"
  i=0
  while i < (dict.length)
  f.puts "<A NAME=\"$anchor\"><H2>" + dict.keys.sort[i] + "</H2></A>" + dict.values[i] + "<BR>"
	f.puts "<CENTER><A HREF=\'javascript:window.history.back()\'>Back</A>&nbsp"
  f.puts "</CENTER>\n<HR>\n"
  i = i + 1
  end
end

Python Google Stock Quotes

import urllib
import re

def get_quote(symbol):
    base_url = 'http://finance.google.com/finance?q='
    content = urllib.urlopen(base_url + symbol).read()
    m = re.search('class="pr".*?>(.*?)<', content)
    if m:
        quote = m.group(1)
        print symbol + "   " + quote
    else:
        quote = 'no quote available for: ' + symbol
    return quote

items = ["AAPL","GOOG","ININ","EWH","IAU"]
for n in items:
    get_quote(n)
raw_input( )

Ruby Code Samples

Table of Contents:

Tag cloud in ruby

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#Tag cloud in Ruby
def font_size_for_tag_cloud(total,lowest,highest, options={})
 return nil if total.nil? or highest.nil? or lowest.nil?
 #
 # options
 maxf = options.delete( :max_font_size ) || 14
 minf = options.delete( :min_font_size ) || 11
 maxc = options.delete( :max_color ) || [ 0, 0, 0 ]
 minc = options.delete( :min_color ) || [ 156, 156, 156 ]
 hide_sizes = options.delete( :hide_sizes )
 hide_colours = options.delete( :hide_colours )
 #
 # function to work out rgb values
 def rgb_color( a, b, i, x)
  return nil if i <= 1 or x <= 1
  if a > b
   a-(Math.log(i)*(a-b)/Math.log(x)).floor
  else
   (Math.log(i)*(b-a)/Math.log(x)+a).floor
  end
 end
 #
 # work out colours
 c = []
 (0..2).each { |i| c << rgb_color( minc[i], maxc[i], total, highest ) || nil }
 colors = c.compact.empty? ? minc.join(‘,’) : c.join(‘,’)
 #
 # work out the font size
 spread = highest.to_f - lowest.to_f
 spread = 1.to_f if spread <= 0
 fontspread = maxf.to_f - minf.to_f
 fontstep = spread / fontspread
 size = ( minf + ( total.to_f / fontstep ) ).to_i
 size = maxf if size > maxf
 #
 # display the results
 size_txt = "font-size:#{ size.to_s }px;" unless hide_sizes
 color_txt = "color:rgb(#{ colors });" unless hide_colours
 return [ size_txt, color_txt ].join
end

Excel in ruby

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
require ‘win32ole’
myfile = ‘C:\Documents and Settings\Charles L.Snyder\My Documents\rvu.xls#put in your path and filename
 
excel = WIN32OLE::new(‘excel.Application)
workbook = excel.Workbooks.Open(myfile)
worksheet = workbook.Worksheets(1) #get the first worksheet - change the name if needed
worksheet.Select
eor = (worksheet.range("a1").end(-4161).address).delete("$")   # >> "$B$7" - end of row (eor)
eoc = (worksheet.range("a1").end(-4121).address).delete("$")
# puts ‘last row is ‘ + eoc
eoc.slice!(0) # just want the number of the last row
eor.slice!(1-5) #just want the letter of the last column
col_titles = []
 
surgeons = ["ANDREWS,WALTER S MD","GATTI,JOHN MD", "HOLCOMB,GEORGE WHIT MD", "MURPHY,JOHN PATRICK MD", "OSTLIE,DANIEL J MD", "SHARP,RONALD MD", "SNYDER,CHARLES MD"]
surgeons.each  do |surg|
    counter =0
        worksheet.Range("a:a").each do |f|
        modf= f.address.delete("$")
         if ((f.value) == surg and worksheet.Range("#{modf}").offset(0,4).value == 54161)
 
         counter +=1
         end
        end
        puts "#{surg}: did  #{counter} operations or encounters during the interval"
end

How to find files /filetypes in Ruby

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
 
require 'find'
module Find
  def match(*paths)
    matched = []
    find(*paths) { |path| matched << path if yield path }
    return matched
  end
  module_function :match
end
#---
require 'create_tree'
create_tree './' => 
  [ { 'Music' => ['cancelled_download.MP3', 
                  ['The Snails - Red Rocket.mp3', 'Song contents #1'],
                  ['The Snails - Moonfall.mp3', 'Song contents #2'] 
                 ]
    },
    { 'tmp' => ['empty1', 'empty2', ['README', 'Hi there!']] },
    { 'rubyprog-0.1' => [['rubyprog.rb', '#!/usr/bin/env ruby'], 
                         ['README', 'This Ruby program is great!']
                        ]
    }
  ]
 
#---
# Find the empty files.
Find.match('./') { |p| File.lstat(p).size == 0 }
 
# => ["./tmp/empty2", "./tmp/empty1", "./Music/cancelled_download.MP3"]
 
# Find the MP3s.
Find.match('./') { |p| ext = p[-4...p.size]; ext && ext.downcase == '.mp3' }
# => ["./Music/The Snails - Red Rocket.mp3", 
#     "./Music/The Snails - Moonfall.mp3", 
#     "./Music/cancelled_download.MP3"]
 
 
# Find the README files.
Find.match('./') { |p| File.split(p)[1] == 'README' }
# => ["./tmp/README", "./rubyprog-0.1/README"]
#---
Find.match('./') do |p| 
  Find.prune if p == "./tmp"  
  File.split(p)[1] == "README"
end
# => ["./rubyprog-0.1/README"]
#---
must_start_with = "This Ruby program"
Find.match('./') do |p|
  if File.file? p
    open(p) { |f| f.read(must_start_with.size) == must_start_with }
  else
    false
  end
end
# => ["./rubyprog-0.1/README"]
#---
# Finds files that were probably left behind by emacs sessions.
def emacs_droppings(*paths)
  Find.match(*paths) do |p| 
    (p[-1] == ?~ and p[0] != ?~) or (p[0] == ?# and p[-1] == ?#)
  end
end
 
# Finds all files that are larger than a certain threshold. Use this to 
# find the files hogging space on your filesystem.
def bigger_than(bytes, *paths)
  Find.match(*paths) { |p| File.lstat(p).size > bytes }  
end
 
# Finds all files modified more recently than a certain number of seconds 
# ago.
def modified_recently(seconds, *paths)
  time = Time.now - seconds
  Find.match(*paths) { |p| File.lstat(p).mtime > time }  
end
 
# Finds all files that haven't been accessed since they were last modified.
def possibly_abandoned(*paths)
  Find.match(*paths) { |p| f = File.lstat(p); f.mtime == f.atime }
end
#---

Ruby Word Unscrambler

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
class Unscrambler
    def initialize(wordsFilename = "/usr/share/dict/words")
        @wordsHash = Hash.new{ Array.new }
        File.open(wordsFilename) do |wordsFile|
            wordsFile.each_line do |word|
                word.chomp!
                @wordsHash[Unscrambler::word_to_key(word)] <<= word
            end
        end
    end
 
    def Unscrambler::word_to_key(word)
        return word unless word.size > 3
        array = word.split(//)
        array[1..-2] = array[1..-2].sort
        return array.join;
    end
    def unscramble(word)
        return @wordsHash[Unscrambler::word_to_key(word)]
    end
 
end
 
puts "Loading wordlist..." if $stdin.isatty
 
u = Unscrambler.new
 
puts "Ready." if $stdin.isatty
 
$stdin.each_line do |line|
    line.gsub!(/\w+/) do |match|
        choices = u.unscramble(match.downcase)
        case choices.size
            when 0 then match
            when 1 then choices[0]
            else "[" + choices.join(", ") + "]"
        end
    end

Ruby converter for xml quiz files to Quizmaker Program

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
require ‘ostruct’
require "rexml/document"
include REXML
print "What is the name of the xml file?"
p myFile = gets.chomp
doc = File.open("#{myFile}.xml") {|io| Document.new io}
quiz=[]
doc.elements.each("QUIZ/QUESTION") do |el_q|
  question = OpenStruct.new
  quiz.push question.text = el_q.attributes["TEXT"]
  question.explain = el_q.attributes["EXPLAIN"]
  question.answer = el_q.attributes["ANSWER"]
 i=0
  el_q.elements.each("CHOICE") do |el_ch|
    (question.choices||=[]) << el_ch.text
    quiz.push(question.choices[i].to_s)
    if (i.to_s==question.answer)
     quiz.push "Correct! #{question.explain}"
    else
    quiz.push "False! #{question.explain}"
    end
    i+=1
    end
end
File::open("#{myFile}" + ‘.quiz’, ‘w’) do |f|
  f.puts quiz
 end

Create a Histogram in Ruby

1
2
3
4
5
6
7
8
counts = my_array.inject(Hash.new {0}) { |counts, key| counts[key] += 1;
counts }
counts = counts.sort {|a,b| a<=>b}
# Here is an example of the technique:
a=%w(a b c d e f f f g h j j j )
bob = a.inject(Hash.new { 0 }) { |counts, key| counts[key] += 1;
counts }
p bob

Clean out old files with Ruby

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/usr/bin/env ruby
 
action = ‘ARCHIVE’ # ‘LIST’ or ‘DELETE’ or ‘TRASH’ or ‘ARCHIVE’
age = 45 # in days (2 weeks default)
 
# DO NOT CHANGE ANYTHING BELOW THIS LINE
# Save the file somewhere as clean.rb, and in the Terminal (Applications > Utilities), cd to the directory (e.g., cd Desktop if it’s on your Desktop), and type chmod +x clean.rb.
#If you want, you can edit the top of the file.
#There are two settings: what you want to do with the file (list, move to trash, or delete), and how long it must be since you accessed it.
#You can then run the file by typing ./clean.rb Downloads, for example. It will clean out or list the files according to your preference.
#It will only remove whole directories.
 
require ‘fileutils’
 
def distance_of_time_in_words(from_time, to_time = 0, include_seconds = false)
  from_time = from_time.to_time if from_time.respond_to?(:to_time)
  to_time = to_time.to_time if to_time.respond_to?(:to_time)
  distance_in_minutes = (((to_time - from_time).abs)/60).round
  distance_in_seconds = ((to_time - from_time).abs).round
  case distance_in_minutes
  when 0..1
    return (distance_in_minutes==0) ? ‘less than a minute’ : ‘1 minute’ unless include_seconds
    case distance_in_seconds
    when 0..5   then ‘less than 5 seconds’
    when 6..10  then ‘less than 10 seconds’
    when 11..20 then ‘less than 20 seconds’
    when 21..40 then ‘half a minute’
    when 41..59 then ‘less than a minute’
    else1 minute’
  end
 
  when 2..45      then "#{distance_in_minutes} minutes"
  when 46..90     then ‘about 1 hour’
  when 90..1440   then "about #{(distance_in_minutes.to_f / 60.0).round} hours"
  when 1441..2880 then1 day’
  else                 "#{(distance_in_minutes / 1440).round} days"
  end
end
 
def last_accessed(dir_name)
  # puts ‘called’
  best = Time.at(0)
  Dir.chdir(dir_name) do
    Dir.foreach(‘.’) do |entry|
      next if entry == ‘.’ or entry == ‘..’
      # puts entry+’!’
      if File.directory? entry
        x = last_accessed entry
        best = x if x > best
      else
        x = File.atime(entry)
        best = x if x > best
      end
    end
  end
  return best
end
Dir.chdir($*[0] || Dir.pwd) do
  Dir.foreach(‘.’) do |entry|
    # puts entry
    next if entry == ‘.’ or entry == ‘..’
    x = (File.directory?(entry) ? last_accessed(entry) : File.atime(entry))
    if Time.now - x > 60 * 60 * 24 * age
      case action
      when ‘DELETE’
        File.delete(entry)
      when ‘TRASH’
        FileUtils.mv(entry, File.expand_path(‘~/Trash/))
      when ‘ARCHIVE’
          FileUtils.mv(entry, File.expand_path(/Users/charleslsnyder/Archives’))
      else
        puts entry + ‘: ‘ + distance_of_time_in_words(x, Time.now, true)
      end
    end
  end
end

Mathematica: Create a TOC

TOCForNotebook[] :=
  Module[{nbr, data, cells, nb}, nb = EvaluationNotebook[];
   SetOptions[nb, System`CreateCellID -> True];
   SelectionMove[nb, All, Notebook, AutoScroll -> False];
   NotebookWrite[nb, NotebookRead[nb]];
   NotebookFind[nb, "Section", All, CellStyle];
   nbr = NotebookRead[nb];
   data =
    nbr /. {Cell[x_, "Section", ___, z : (CellID -> w_), ___] :> {x,
        w}};
   cells =
    TextCell[
       Button[TextCell[#[[1]], "Hyperlink"],
        NotebookFind[EvaluationNotebook[], #[[2]], All, CellID],
        Appearance -> "Frameless"], "Text"] & /@ data;
   cells = First[ToBoxes[#]] & /@ cells;
   SelectionMove[EvaluationNotebook[], Before, Notebook,
    AutoScroll -> True];
   NotebookWrite[nb, cells];];
Button["Create a Table of Contents", TOCForNotebook[],
 Background -> Yellow]